grammar_cop 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,46 @@
1
+ /*
2
+ * corpus.h
3
+ *
4
+ * Data for corpus statistics, used to provide a parse ranking
5
+ * to drive the SAT solver, as well as parse ranking with the
6
+ * ordinary solver.
7
+ *
8
+ * Copyright (c) 2008, 2009 Linas Vepstas <linasvepstas@gmail.com>
9
+ */
10
+
11
+ #ifndef _LINKGRAMMAR_CORPUS_H
12
+ #define _LINKGRAMMAR_CORPUS_H
13
+
14
+ #ifdef USE_CORPUS
15
+
16
+ #include "../api-types.h"
17
+ #include "../link-includes.h"
18
+
19
+ Corpus * lg_corpus_new(void);
20
+ void lg_corpus_delete(Corpus *);
21
+
22
+ void lg_corpus_score(Sentence, Linkage_info *);
23
+ double lg_corpus_disjunct_score(Linkage linkage, int w);
24
+ void lg_corpus_linkage_senses(Linkage);
25
+
26
+ Sense * lg_get_word_sense(Linkage_info *, int word);
27
+ Sense * lg_sense_next(Sense *);
28
+ int lg_sense_get_index(Sense *);
29
+ const char * lg_sense_get_subscripted_word(Sense *);
30
+ const char * lg_sense_get_disjunct(Sense *);
31
+ const char * lg_sense_get_sense(Sense *);
32
+ double lg_sense_get_score(Sense *);
33
+ void lg_sense_delete(Linkage_info *);
34
+
35
+ #else /* USE_CORPUS */
36
+
37
+ static inline void lg_corpus_score(Sentence s, Linkage_info *li) {}
38
+ static inline void lg_corpus_linkage_senses(Linkage l) {}
39
+ static inline Sense * lg_get_word_sense(Linkage_info *lif, int word) { return NULL; }
40
+ static inline Sense * lg_sense_next(Sense *s ) {return NULL; }
41
+ static inline const char * lg_sense_get_sense(Sense *s) { return NULL; }
42
+ static inline double lg_sense_get_score(Sense *s) { return 0.0; }
43
+ static inline double lg_corpus_disjunct_score(Linkage linkage, int w) { return 998.0; }
44
+ #endif /* USE_CORPUS */
45
+
46
+ #endif /* _LINKGRAMMAR_CORPUS_H */
@@ -0,0 +1,828 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #include "api.h"
15
+ #include "disjunct-utils.h"
16
+
17
+ /* This file contains the exhaustive search algorithm. */
18
+
19
+ typedef struct Table_connector_s Table_connector;
20
+ struct Table_connector_s
21
+ {
22
+ short lw, rw;
23
+ Connector *le, *re;
24
+ short cost;
25
+ s64 count;
26
+ Table_connector *next;
27
+ };
28
+
29
+ struct count_context_s
30
+ {
31
+ char ** deletable;
32
+ char ** effective_dist;
33
+ Word * local_sent;
34
+ int null_block;
35
+ int islands_ok;
36
+ int null_links;
37
+ int table_size;
38
+ int log2_table_size;
39
+ Table_connector ** table;
40
+ Resources current_resources;
41
+ };
42
+
43
+ static void free_table(count_context_t *ctxt)
44
+ {
45
+ int i;
46
+ Table_connector *t, *x;
47
+
48
+ for (i=0; i<ctxt->table_size; i++)
49
+ {
50
+ for(t = ctxt->table[i]; t!= NULL; t=x)
51
+ {
52
+ x = t->next;
53
+ xfree((void *) t, sizeof(Table_connector));
54
+ }
55
+ }
56
+ xfree(ctxt->table, ctxt->table_size * sizeof(Table_connector*));
57
+ ctxt->table = NULL;
58
+ ctxt->table_size = 0;
59
+ }
60
+
61
+ static void init_table(Sentence sent)
62
+ {
63
+ int shift;
64
+ /* A piecewise exponential function determines the size of the
65
+ * hash table. Probably should make use of the actual number of
66
+ * disjuncts, rather than just the number of words.
67
+ */
68
+ count_context_t *ctxt = sent->count_ctxt;
69
+
70
+ if (ctxt->table) free_table(ctxt);
71
+
72
+ if (sent->length >= 10)
73
+ {
74
+ shift = 12 + (sent->length) / 6 ;
75
+ }
76
+ else
77
+ {
78
+ shift = 12;
79
+ }
80
+
81
+ /* Clamp at max 4*(1<<24) == 64 MBytes */
82
+ if (24 < shift) shift = 24;
83
+ ctxt->table_size = (1 << shift);
84
+ ctxt->log2_table_size = shift;
85
+ ctxt->table = (Table_connector**)
86
+ xalloc(ctxt->table_size * sizeof(Table_connector*));
87
+ memset(ctxt->table, 0, ctxt->table_size*sizeof(Table_connector*));
88
+ }
89
+
90
+ int x_match(Sentence sent, Connector *a, Connector *b)
91
+ {
92
+ return do_match(sent, a, b, 0, 0);
93
+ }
94
+
95
+ void count_set_effective_distance(Sentence sent)
96
+ {
97
+ sent->count_ctxt->effective_dist = sent->effective_dist;
98
+ }
99
+
100
+ void count_unset_effective_distance(Sentence sent)
101
+ {
102
+ sent->count_ctxt->effective_dist = NULL;
103
+ }
104
+
105
+ /*
106
+ * Returns TRUE if s and t match according to the connector matching
107
+ * rules. The connector strings must be properly formed, starting with
108
+ * zero or more upper case letters, followed by some other letters, and
109
+ * The algorithm is symmetric with respect to a and b.
110
+ *
111
+ * It works as follows: The labels must match. The priorities must be
112
+ * compatible (both THIN_priority, or one UP_priority and one DOWN_priority).
113
+ * The sequence of upper case letters must match exactly. After these comes
114
+ * a sequence of lower case letters "*"s or "^"s. The matching algorithm
115
+ * is different depending on which of the two priority cases is being
116
+ * considered. See the comments below.
117
+ */
118
+ int do_match(Sentence sent, Connector *a, Connector *b, int aw, int bw)
119
+ {
120
+ const char *s, *t;
121
+ int x, y, dist;
122
+ count_context_t *ctxt;
123
+
124
+ if (a->label != b->label) return FALSE;
125
+
126
+ s = a->string;
127
+ t = b->string;
128
+
129
+ while(isupper((int)*s) || isupper((int)*t))
130
+ {
131
+ if (*s != *t) return FALSE;
132
+ s++;
133
+ t++;
134
+ }
135
+
136
+ ctxt = sent->count_ctxt;
137
+
138
+ x = a->priority;
139
+ y = b->priority;
140
+
141
+ /* Probably not necessary, as long as
142
+ * effective_dist[0][0]=0 and is defined */
143
+ if (aw == 0 && bw == 0) {
144
+ dist = 0;
145
+ } else {
146
+ assert(aw < bw, "match() did not receive params in the natural order.");
147
+ dist = ctxt->effective_dist[aw][bw];
148
+ }
149
+ /* printf("M: a=%4s b=%4s ap=%d bp=%d aw=%d bw=%d a->ll=%d b->ll=%d dist=%d\n",
150
+ s, t, x, y, aw, bw, a->length_limit, b->length_limit, dist); */
151
+ if (dist > a->length_limit || dist > b->length_limit) return FALSE;
152
+
153
+ if ((x == THIN_priority) && (y == THIN_priority))
154
+ {
155
+ /*
156
+ Remember that "*" matches anything, and "^" matches nothing
157
+ (except "*"). Otherwise two characters match if and only if
158
+ they're equal. ("^" can be used in the dictionary just like
159
+ any other connector.)
160
+ */
161
+ while ((*s!='\0') && (*t!='\0')) {
162
+ if ((*s == '*') || (*t == '*') ||
163
+ ((*s == *t) && (*s != '^'))) {
164
+ s++;
165
+ t++;
166
+ } else return FALSE;
167
+ }
168
+ return TRUE;
169
+ } else if ((x==UP_priority) && (y==DOWN_priority)) {
170
+ /*
171
+ As you go up (namely from x to y) the set of strings that
172
+ match (in the normal THIN sense above) should get no larger.
173
+ Read the comment in and.c to understand this.
174
+ In other words, the y string (t) must be weaker (or at least
175
+ no stronger) that the x string (s).
176
+
177
+ This code is only correct if the strings are the same
178
+ length. This is currently true, but perhaps for safty
179
+ this assumption should be removed.
180
+ */
181
+ while ((*s!='\0') && (*t!='\0')) {
182
+ if ((*s == *t) || (*s == '*') || (*t == '^')) {
183
+ s++;
184
+ t++;
185
+ } else return FALSE;
186
+ }
187
+ return TRUE;
188
+ }
189
+ else if ((y == UP_priority) && (x == DOWN_priority))
190
+ {
191
+ while ((*s!='\0') && (*t!='\0'))
192
+ {
193
+ if ((*s == *t) || (*t == '*') || (*s == '^'))
194
+ {
195
+ s++;
196
+ t++;
197
+ }
198
+ else
199
+ return FALSE;
200
+ }
201
+ return TRUE;
202
+ }
203
+ else
204
+ return FALSE;
205
+ }
206
+
207
+ /**
208
+ * Stores the value in the table. Assumes it's not already there.
209
+ */
210
+ static Table_connector * table_store(count_context_t *ctxt,
211
+ int lw, int rw,
212
+ Connector *le, Connector *re,
213
+ int cost, s64 count)
214
+ {
215
+ Table_connector *t, *n;
216
+ int h;
217
+
218
+ n = (Table_connector *) xalloc(sizeof(Table_connector));
219
+ n->count = count;
220
+ n->lw = lw; n->rw = rw; n->le = le; n->re = re; n->cost = cost;
221
+ h = pair_hash(ctxt->log2_table_size,lw, rw, le, re, cost);
222
+ t = ctxt->table[h];
223
+ n->next = t;
224
+ ctxt->table[h] = n;
225
+ return n;
226
+ }
227
+
228
+ /** returns the pointer to this info, NULL if not there */
229
+ static Table_connector *
230
+ find_table_pointer(count_context_t *ctxt,
231
+ int lw, int rw,
232
+ Connector *le, Connector *re,
233
+ int cost)
234
+ {
235
+ Table_connector *t;
236
+ int h = pair_hash(ctxt->log2_table_size,lw, rw, le, re, cost);
237
+ t = ctxt->table[h];
238
+ for (; t != NULL; t = t->next) {
239
+ if ((t->lw == lw) && (t->rw == rw) && (t->le == le) && (t->re == re)
240
+ && (t->cost == cost)) return t;
241
+ }
242
+
243
+ /* Create a new connector only if resources are exhausted.
244
+ * (???) Huh? I guess we're in panic parse mode in that case.
245
+ */
246
+ if ((ctxt->current_resources != NULL) &&
247
+ resources_exhausted(ctxt->current_resources))
248
+ {
249
+ return table_store(ctxt, lw, rw, le, re, cost, 0);
250
+ }
251
+ else return NULL;
252
+ }
253
+
254
+ /** returns the count for this quintuple if there, -1 otherwise */
255
+ s64 table_lookup(Sentence sent,
256
+ int lw, int rw, Connector *le, Connector *re, int cost)
257
+ {
258
+ Table_connector *t = find_table_pointer(sent->count_ctxt, lw, rw, le, re, cost);
259
+
260
+ if (t == NULL) return -1; else return t->count;
261
+ }
262
+
263
+ /**
264
+ * Stores the value in the table. Unlike table_store, it assumes
265
+ * it's already there
266
+ */
267
+ static void table_update(count_context_t *ctxt, int lw, int rw,
268
+ Connector *le, Connector *re,
269
+ int cost, s64 count)
270
+ {
271
+ Table_connector *t = find_table_pointer(ctxt, lw, rw, le, re, cost);
272
+
273
+ assert(t != NULL, "This entry is supposed to be in the table.");
274
+ t->count = count;
275
+ }
276
+
277
+ /**
278
+ * Returns 0 if and only if this entry is in the hash table
279
+ * with a count value of 0.
280
+ */
281
+ static s64 pseudocount(Sentence sent,
282
+ int lw, int rw, Connector *le, Connector *re, int cost)
283
+ {
284
+ s64 count;
285
+ count = table_lookup(sent, lw, rw, le, re, cost);
286
+ if (count == 0) return 0; else return 1;
287
+ }
288
+
289
+ static s64 do_count(Sentence sent, int lw, int rw,
290
+ Connector *le, Connector *re, int null_count)
291
+ {
292
+ Disjunct * d;
293
+ s64 total, pseudototal;
294
+ int start_word, end_word, w;
295
+ s64 leftcount, rightcount;
296
+ int lcost, rcost, Lmatch, Rmatch;
297
+
298
+ Match_node * m, *m1;
299
+ Table_connector *t;
300
+
301
+ count_context_t *ctxt = sent->count_ctxt;
302
+
303
+ if (null_count < 0) return 0; /* can this ever happen?? */
304
+
305
+ t = find_table_pointer(ctxt, lw, rw, le, re, null_count);
306
+
307
+ if (t == NULL) {
308
+ /* Create the table entry with a tentative null count of 0.
309
+ * This count must be updated before we return. */
310
+ t = table_store(ctxt, lw, rw, le, re, null_count, 0);
311
+ } else {
312
+ return t->count;
313
+ }
314
+
315
+ if (rw == 1+lw)
316
+ {
317
+ /* lw and rw are neighboring words */
318
+ /* You can't have a linkage here with null_count > 0 */
319
+ if ((le == NULL) && (re == NULL) && (null_count == 0))
320
+ {
321
+ t->count = 1;
322
+ }
323
+ else
324
+ {
325
+ t->count = 0;
326
+ }
327
+ return t->count;
328
+ }
329
+
330
+ if ((le == NULL) && (re == NULL))
331
+ {
332
+ if (!ctxt->islands_ok && (lw != -1))
333
+ {
334
+ /* If we don't allow islands (a set of words linked together
335
+ * but separate from the rest of the sentence) then the
336
+ * null_count of skipping n words is just n */
337
+ if (null_count == ((rw-lw-1) + ctxt->null_block-1)/ctxt->null_block)
338
+ {
339
+ /* If null_block=4 then the null_count of
340
+ 1,2,3,4 nulls is 1; and 5,6,7,8 is 2 etc. */
341
+ t->count = 1;
342
+ }
343
+ else
344
+ {
345
+ t->count = 0;
346
+ }
347
+ return t->count;
348
+ }
349
+ if (null_count == 0)
350
+ {
351
+ /* There is no solution without nulls in this case. There is
352
+ * a slight efficiency hack to separate this null_count==0
353
+ * case out, but not necessary for correctness */
354
+ t->count = 0;
355
+ }
356
+ else
357
+ {
358
+ total = 0;
359
+ w = lw+1;
360
+ for (d = ctxt->local_sent[w].d; d != NULL; d = d->next)
361
+ {
362
+ if (d->left == NULL)
363
+ {
364
+ total += do_count(sent, w, rw, d->right, NULL, null_count-1);
365
+ }
366
+ }
367
+ total += do_count(sent, w, rw, NULL, NULL, null_count-1);
368
+ t->count = total;
369
+ }
370
+ return t->count;
371
+ }
372
+
373
+ if (le == NULL)
374
+ {
375
+ start_word = lw+1;
376
+ }
377
+ else
378
+ {
379
+ start_word = le->word;
380
+ }
381
+
382
+ if (re == NULL)
383
+ {
384
+ end_word = rw;
385
+ }
386
+ else
387
+ {
388
+ end_word = re->word +1;
389
+ }
390
+
391
+ total = 0;
392
+
393
+ for (w = start_word; w < end_word; w++)
394
+ {
395
+ m1 = m = form_match_list(sent, w, le, lw, re, rw);
396
+ for (; m!=NULL; m=m->next)
397
+ {
398
+ d = m->d;
399
+ for (lcost = 0; lcost <= null_count; lcost++)
400
+ {
401
+ rcost = null_count - lcost;
402
+ /* Now lcost and rcost are the costs we're assigning
403
+ * to those parts respectively */
404
+
405
+ /* Now, we determine if (based on table only) we can see that
406
+ the current range is not parsable. */
407
+ Lmatch = (le != NULL) && (d->left != NULL) &&
408
+ do_match(sent, le, d->left, lw, w);
409
+ Rmatch = (d->right != NULL) && (re != NULL) &&
410
+ do_match(sent, d->right, re, w, rw);
411
+
412
+ rightcount = leftcount = 0;
413
+ if (Lmatch)
414
+ {
415
+ leftcount = pseudocount(sent, lw, w, le->next, d->left->next, lcost);
416
+ if (le->multi) leftcount += pseudocount(sent, lw, w, le, d->left->next, lcost);
417
+ if (d->left->multi) leftcount += pseudocount(sent, lw, w, le->next, d->left, lcost);
418
+ if (le->multi && d->left->multi) leftcount += pseudocount(sent, lw, w, le, d->left, lcost);
419
+ }
420
+
421
+ if (Rmatch)
422
+ {
423
+ rightcount = pseudocount(sent, w, rw, d->right->next, re->next, rcost);
424
+ if (d->right->multi) rightcount += pseudocount(sent, w,rw,d->right,re->next, rcost);
425
+ if (re->multi) rightcount += pseudocount(sent, w, rw, d->right->next, re, rcost);
426
+ if (d->right->multi && re->multi) rightcount += pseudocount(sent, w, rw, d->right, re, rcost);
427
+ }
428
+
429
+ /* total number where links are used on both sides */
430
+ pseudototal = leftcount*rightcount;
431
+
432
+ if (leftcount > 0) {
433
+ /* evaluate using the left match, but not the right */
434
+ pseudototal += leftcount * pseudocount(sent, w, rw, d->right, re, rcost);
435
+ }
436
+ if ((le == NULL) && (rightcount > 0)) {
437
+ /* evaluate using the right match, but not the left */
438
+ pseudototal += rightcount * pseudocount(sent, lw, w, le, d->left, lcost);
439
+ }
440
+
441
+ /* now pseudototal is 0 implies that we know that the true total is 0 */
442
+ if (pseudototal != 0) {
443
+ rightcount = leftcount = 0;
444
+ if (Lmatch) {
445
+ leftcount = do_count(sent, lw, w, le->next, d->left->next, lcost);
446
+ if (le->multi) leftcount += do_count(sent, lw, w, le, d->left->next, lcost);
447
+ if (d->left->multi) leftcount += do_count(sent, lw, w, le->next, d->left, lcost);
448
+ if (le->multi && d->left->multi) leftcount += do_count(sent, lw, w, le, d->left, lcost);
449
+ }
450
+
451
+ if (Rmatch) {
452
+ rightcount = do_count(sent, w, rw, d->right->next, re->next, rcost);
453
+ if (d->right->multi) rightcount += do_count(sent, w,rw,d->right,re->next, rcost);
454
+ if (re->multi) rightcount += do_count(sent, w, rw, d->right->next, re, rcost);
455
+ if (d->right->multi && re->multi) rightcount += do_count(sent, w, rw, d->right, re, rcost);
456
+ }
457
+
458
+ total += leftcount*rightcount; /* total number where links are used on both sides */
459
+
460
+ if (leftcount > 0) {
461
+ /* evaluate using the left match, but not the right */
462
+ total += leftcount * do_count(sent, w, rw, d->right, re, rcost);
463
+ }
464
+ if ((le == NULL) && (rightcount > 0)) {
465
+ /* evaluate using the right match, but not the left */
466
+ total += rightcount * do_count(sent, lw, w, le, d->left, lcost);
467
+ }
468
+ }
469
+ }
470
+ }
471
+
472
+ put_match_list(sent, m1);
473
+ }
474
+ t->count = total;
475
+ return total;
476
+ }
477
+
478
+ /**
479
+ * Returns the number of ways the sentence can be parsed with the
480
+ * specified null count. Assumes that the hash table has already been
481
+ * initialized, and is freed later. The "null_count" here is the
482
+ * number of words that are allowed to have no links to them.
483
+ */
484
+ s64 do_parse(Sentence sent, int null_count, Parse_Options opts)
485
+ {
486
+ s64 total;
487
+ count_context_t *ctxt = sent->count_ctxt;
488
+
489
+ count_set_effective_distance(sent);
490
+ ctxt->current_resources = opts->resources;
491
+ ctxt->local_sent = sent->word;
492
+ ctxt->deletable = sent->deletable;
493
+ ctxt->null_block = opts->null_block;
494
+ ctxt->islands_ok = opts->islands_ok;
495
+
496
+ total = do_count(sent, -1, sent->length, NULL, NULL, null_count+1);
497
+
498
+ ctxt->local_sent = NULL;
499
+ ctxt->current_resources = NULL;
500
+ return total;
501
+ }
502
+
503
+ /**
504
+ CONJUNCTION PRUNING.
505
+
506
+ The basic idea is this. Before creating the fat disjuncts,
507
+ we run a modified version of the exhaustive search procedure.
508
+ Its purpose is to mark the disjuncts that can be used in any
509
+ linkage. It's just like the normal exhaustive search, except that
510
+ if a subrange of words are deletable, then we treat them as though
511
+ they were not even there. So, if we call the function in the
512
+ situation where the set of words between the left and right one
513
+ are deletable, and the left and right connector pointers
514
+ are NULL, then that range is considered to have a solution.
515
+
516
+ There are actually two procedures to implement this. One is
517
+ mark_region() and the other is region_valid(). The latter just
518
+ checks to see if the given region can be completed (within it).
519
+ The former actually marks those disjuncts that can be used in
520
+ any valid linkage of the given region.
521
+
522
+ As in the standard search procedure, we make use of the fast-match
523
+ data structure (which requires power pruning to have been done), and
524
+ we also use a hash table. The table is used differently in this case.
525
+ The meaning of values stored in the table are as follows:
526
+
527
+ -1 Nothing known (Actually, this is not stored. It's returned
528
+ by table_lookup when nothing is known.)
529
+ 0 This region can't be completed (marking is therefore irrelevant)
530
+ 1 This region can be completed, but it's not yet marked
531
+ 2 This region can be completed, and it's been marked.
532
+ */
533
+
534
+ static int x_prune_match(count_context_t *ctxt,
535
+ Connector *le, Connector *re, int lw, int rw)
536
+ {
537
+ int dist;
538
+
539
+ assert(lw < rw, "prune_match() did not receive params in the natural order.");
540
+ dist = ctxt->effective_dist[lw][rw];
541
+ return prune_match(dist, le, re);
542
+ }
543
+
544
+ /**
545
+ * Returns 0 if this range cannot be successfully filled in with
546
+ * links. Returns 1 if it can, and it's not been marked, and returns
547
+ * 2 if it can and it has been marked.
548
+ */
549
+ static int region_valid(Sentence sent, int lw, int rw, Connector *le, Connector *re)
550
+ {
551
+ Disjunct * d;
552
+ int left_valid, right_valid, found;
553
+ int i, start_word, end_word;
554
+ int w;
555
+ Match_node * m, *m1;
556
+
557
+ count_context_t *ctxt = sent->count_ctxt;
558
+
559
+ i = table_lookup(sent, lw, rw, le, re, 0);
560
+ if (i >= 0) return i;
561
+
562
+ if ((le == NULL) && (re == NULL) && ctxt->deletable[lw][rw]) {
563
+ table_store(ctxt, lw, rw, le, re, 0, 1);
564
+ return 1;
565
+ }
566
+
567
+ if (le == NULL) {
568
+ start_word = lw+1;
569
+ } else {
570
+ start_word = le->word;
571
+ }
572
+ if (re == NULL) {
573
+ end_word = rw;
574
+ } else {
575
+ end_word = re->word + 1;
576
+ }
577
+
578
+ found = 0;
579
+
580
+ for (w=start_word; w < end_word; w++)
581
+ {
582
+ m1 = m = form_match_list(sent, w, le, lw, re, rw);
583
+ for (; m!=NULL; m=m->next)
584
+ {
585
+ d = m->d;
586
+ /* mark_cost++;*/
587
+ /* in the following expressions we use the fact that 0=FALSE. Could eliminate
588
+ by always saying "region_valid(...) != 0" */
589
+ left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) &&
590
+ ((region_valid(sent, lw, w, le->next, d->left->next)) ||
591
+ ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) ||
592
+ ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) ||
593
+ ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left))));
594
+ if (left_valid && region_valid(sent, w, rw, d->right, re)) {
595
+ found = 1;
596
+ break;
597
+ }
598
+ right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) &&
599
+ ((region_valid(sent, w, rw, d->right->next,re->next)) ||
600
+ ((d->right->multi) && region_valid(sent, w,rw,d->right,re->next)) ||
601
+ ((re->multi) && region_valid(sent, w, rw, d->right->next, re)) ||
602
+ ((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re))));
603
+ if ((left_valid && right_valid) || (right_valid && region_valid(sent, lw, w, le, d->left))) {
604
+ found = 1;
605
+ break;
606
+ }
607
+ }
608
+ put_match_list(sent, m1);
609
+ if (found != 0) break;
610
+ }
611
+ table_store(ctxt, lw, rw, le, re, 0, found);
612
+ return found;
613
+ }
614
+
615
+ /**
616
+ * Mark as useful all disjuncts involved in some way to complete the
617
+ * structure within the current region. Note that only disjuncts
618
+ * strictly between lw and rw will be marked. If it so happens that
619
+ * this region itself is not valid, then this fact will be recorded
620
+ * in the table, and nothing else happens.
621
+ */
622
+ static void mark_region(Sentence sent,
623
+ int lw, int rw, Connector *le, Connector *re)
624
+ {
625
+
626
+ Disjunct * d;
627
+ int left_valid, right_valid, i;
628
+ int start_word, end_word;
629
+ int w;
630
+ Match_node * m, *m1;
631
+ count_context_t *ctxt = sent->count_ctxt;
632
+
633
+ i = region_valid(sent, lw, rw, le, re);
634
+ if ((i==0) || (i==2)) return;
635
+ /* we only reach this point if it's a valid unmarked region, i=1 */
636
+ table_update(ctxt, lw, rw, le, re, 0, 2);
637
+
638
+ if ((le == NULL) && (re == NULL) && (ctxt->null_links) && (rw != 1+lw)) {
639
+ w = lw+1;
640
+ for (d = ctxt->local_sent[w].d; d != NULL; d = d->next) {
641
+ if ((d->left == NULL) && region_valid(sent, w, rw, d->right, NULL)) {
642
+ d->marked = TRUE;
643
+ mark_region(sent, w, rw, d->right, NULL);
644
+ }
645
+ }
646
+ mark_region(sent, w, rw, NULL, NULL);
647
+ return;
648
+ }
649
+
650
+ if (le == NULL) {
651
+ start_word = lw+1;
652
+ } else {
653
+ start_word = le->word;
654
+ }
655
+ if (re == NULL) {
656
+ end_word = rw;
657
+ } else {
658
+ end_word = re->word + 1;
659
+ }
660
+
661
+ for (w=start_word; w < end_word; w++)
662
+ {
663
+ m1 = m = form_match_list(sent, w, le, lw, re, rw);
664
+ for (; m!=NULL; m=m->next)
665
+ {
666
+ d = m->d;
667
+ /* mark_cost++;*/
668
+ left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) &&
669
+ ((region_valid(sent, lw, w, le->next, d->left->next)) ||
670
+ ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) ||
671
+ ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) ||
672
+ ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left))));
673
+ right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) &&
674
+ ((region_valid(sent, w, rw, d->right->next,re->next)) ||
675
+ ((d->right->multi) && region_valid(sent, w,rw,d->right,re->next)) ||
676
+ ((re->multi) && region_valid(sent, w, rw, d->right->next, re)) ||
677
+ ((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re))));
678
+
679
+ /* The following if statements could be restructured to avoid superfluous calls
680
+ to mark_region. It didn't seem a high priority, so I didn't optimize this.
681
+ */
682
+
683
+ if (left_valid && region_valid(sent, w, rw, d->right, re))
684
+ {
685
+ d->marked = TRUE;
686
+ mark_region(sent, w, rw, d->right, re);
687
+ mark_region(sent, lw, w, le->next, d->left->next);
688
+ if (le->multi) mark_region(sent, lw, w, le, d->left->next);
689
+ if (d->left->multi) mark_region(sent, lw, w, le->next, d->left);
690
+ if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left);
691
+ }
692
+
693
+ if (right_valid && region_valid(sent, lw, w, le, d->left))
694
+ {
695
+ d->marked = TRUE;
696
+ mark_region(sent, lw, w, le, d->left);
697
+ mark_region(sent, w, rw, d->right->next,re->next);
698
+ if (d->right->multi) mark_region(sent, w,rw,d->right,re->next);
699
+ if (re->multi) mark_region(sent, w, rw, d->right->next, re);
700
+ if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re);
701
+ }
702
+
703
+ if (left_valid && right_valid)
704
+ {
705
+ d->marked = TRUE;
706
+ mark_region(sent, lw, w, le->next, d->left->next);
707
+ if (le->multi) mark_region(sent, lw, w, le, d->left->next);
708
+ if (d->left->multi) mark_region(sent, lw, w, le->next, d->left);
709
+ if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left);
710
+ mark_region(sent, w, rw, d->right->next,re->next);
711
+ if (d->right->multi) mark_region(sent, w,rw,d->right,re->next);
712
+ if (re->multi) mark_region(sent, w, rw, d->right->next, re);
713
+ if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re);
714
+ }
715
+ }
716
+ put_match_list(sent, m1);
717
+ }
718
+ }
719
+
720
+ void delete_unmarked_disjuncts(Sentence sent)
721
+ {
722
+ int w;
723
+ Disjunct *d_head, *d, *dx;
724
+
725
+ for (w=0; w<sent->length; w++) {
726
+ d_head = NULL;
727
+ for (d=sent->word[w].d; d != NULL; d=dx) {
728
+ dx = d->next;
729
+ if (d->marked) {
730
+ d->next = d_head;
731
+ d_head = d;
732
+ } else {
733
+ d->next = NULL;
734
+ free_disjuncts(d);
735
+ }
736
+ }
737
+ sent->word[w].d = d_head;
738
+ }
739
+ }
740
+
741
+ /**
742
+ * We've already built the sentence disjuncts, and we've pruned them
743
+ * and power_pruned(GENTLE) them also. The sentence contains a
744
+ * conjunction. deletable[][] has been initialized to indicate the
745
+ * ranges which may be deleted in the final linkage.
746
+ *
747
+ * This routine deletes irrelevant disjuncts. It finds them by first
748
+ * marking them all as irrelevant, and then marking the ones that
749
+ * might be useable. Finally, the unmarked ones are removed.
750
+ */
751
+ void conjunction_prune(Sentence sent, Parse_Options opts)
752
+ {
753
+ Disjunct * d;
754
+ int w;
755
+ count_context_t *ctxt = sent->count_ctxt;
756
+
757
+ ctxt->current_resources = opts->resources;
758
+ ctxt->deletable = sent->deletable;
759
+ count_set_effective_distance(sent);
760
+
761
+ /* We begin by unmarking all disjuncts. This would not be necessary if
762
+ whenever we created a disjunct we cleared its marked field.
763
+ I didn't want to search the program for all such places, so
764
+ I did this way. XXX FIXME, someday ...
765
+ */
766
+ for (w=0; w<sent->length; w++) {
767
+ for (d=sent->word[w].d; d != NULL; d=d->next) {
768
+ d->marked = FALSE;
769
+ }
770
+ }
771
+
772
+ init_fast_matcher(sent);
773
+ ctxt->local_sent = sent->word;
774
+ ctxt->null_links = (opts->min_null_count > 0);
775
+ /*
776
+ for (d = sent->word[0].d; d != NULL; d = d->next) {
777
+ if ((d->left == NULL) && region_valid(sent, 0, sent->length, d->right, NULL)) {
778
+ mark_region(sent, 0, sent->length, d->right, NULL);
779
+ d->marked = TRUE;
780
+ }
781
+ }
782
+ mark_region(sent, 0, sent->length, NULL, NULL);
783
+ */
784
+
785
+ if (ctxt->null_links) {
786
+ mark_region(sent, -1, sent->length, NULL, NULL);
787
+ } else {
788
+ for (w=0; w<sent->length; w++) {
789
+ /* consider removing the words [0,w-1] from the beginning
790
+ of the sentence */
791
+ if (ctxt->deletable[-1][w]) {
792
+ for (d = sent->word[w].d; d != NULL; d = d->next) {
793
+ if ((d->left == NULL) && region_valid(sent, w, sent->length, d->right, NULL)) {
794
+ mark_region(sent, w, sent->length, d->right, NULL);
795
+ d->marked = TRUE;
796
+ }
797
+ }
798
+ }
799
+ }
800
+ }
801
+
802
+ delete_unmarked_disjuncts(sent);
803
+
804
+ free_fast_matcher(sent);
805
+
806
+ ctxt->local_sent = NULL;
807
+ ctxt->current_resources = NULL;
808
+ ctxt->deletable = NULL;
809
+ count_unset_effective_distance(sent);
810
+ }
811
+
812
+ void init_count(Sentence sent)
813
+ {
814
+ if (NULL == sent->count_ctxt)
815
+ sent->count_ctxt = (count_context_t *) malloc (sizeof(count_context_t));
816
+ memset(sent->count_ctxt, 0, sizeof(count_context_t));
817
+
818
+ init_table(sent);
819
+ }
820
+
821
+ void free_count(Sentence sent)
822
+ {
823
+ if (NULL == sent->count_ctxt) return;
824
+
825
+ free_table(sent->count_ctxt);
826
+ free(sent->count_ctxt);
827
+ sent->count_ctxt = NULL;
828
+ }