grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,46 @@
1
+ /*
2
+ * corpus.h
3
+ *
4
+ * Data for corpus statistics, used to provide a parse ranking
5
+ * to drive the SAT solver, as well as parse ranking with the
6
+ * ordinary solver.
7
+ *
8
+ * Copyright (c) 2008, 2009 Linas Vepstas <linasvepstas@gmail.com>
9
+ */
10
+
11
+ #ifndef _LINKGRAMMAR_CORPUS_H
12
+ #define _LINKGRAMMAR_CORPUS_H
13
+
14
+ #ifdef USE_CORPUS
15
+
16
+ #include "../api-types.h"
17
+ #include "../link-includes.h"
18
+
19
+ Corpus * lg_corpus_new(void);
20
+ void lg_corpus_delete(Corpus *);
21
+
22
+ void lg_corpus_score(Sentence, Linkage_info *);
23
+ double lg_corpus_disjunct_score(Linkage linkage, int w);
24
+ void lg_corpus_linkage_senses(Linkage);
25
+
26
+ Sense * lg_get_word_sense(Linkage_info *, int word);
27
+ Sense * lg_sense_next(Sense *);
28
+ int lg_sense_get_index(Sense *);
29
+ const char * lg_sense_get_subscripted_word(Sense *);
30
+ const char * lg_sense_get_disjunct(Sense *);
31
+ const char * lg_sense_get_sense(Sense *);
32
+ double lg_sense_get_score(Sense *);
33
+ void lg_sense_delete(Linkage_info *);
34
+
35
+ #else /* USE_CORPUS */
36
+
37
+ static inline void lg_corpus_score(Sentence s, Linkage_info *li) {}
38
+ static inline void lg_corpus_linkage_senses(Linkage l) {}
39
+ static inline Sense * lg_get_word_sense(Linkage_info *lif, int word) { return NULL; }
40
+ static inline Sense * lg_sense_next(Sense *s ) {return NULL; }
41
+ static inline const char * lg_sense_get_sense(Sense *s) { return NULL; }
42
+ static inline double lg_sense_get_score(Sense *s) { return 0.0; }
43
+ static inline double lg_corpus_disjunct_score(Linkage linkage, int w) { return 998.0; }
44
+ #endif /* USE_CORPUS */
45
+
46
+ #endif /* _LINKGRAMMAR_CORPUS_H */
@@ -0,0 +1,828 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #include "api.h"
15
+ #include "disjunct-utils.h"
16
+
17
+ /* This file contains the exhaustive search algorithm. */
18
+
19
+ typedef struct Table_connector_s Table_connector;
20
+ struct Table_connector_s
21
+ {
22
+ short lw, rw;
23
+ Connector *le, *re;
24
+ short cost;
25
+ s64 count;
26
+ Table_connector *next;
27
+ };
28
+
29
+ struct count_context_s
30
+ {
31
+ char ** deletable;
32
+ char ** effective_dist;
33
+ Word * local_sent;
34
+ int null_block;
35
+ int islands_ok;
36
+ int null_links;
37
+ int table_size;
38
+ int log2_table_size;
39
+ Table_connector ** table;
40
+ Resources current_resources;
41
+ };
42
+
43
+ static void free_table(count_context_t *ctxt)
44
+ {
45
+ int i;
46
+ Table_connector *t, *x;
47
+
48
+ for (i=0; i<ctxt->table_size; i++)
49
+ {
50
+ for(t = ctxt->table[i]; t!= NULL; t=x)
51
+ {
52
+ x = t->next;
53
+ xfree((void *) t, sizeof(Table_connector));
54
+ }
55
+ }
56
+ xfree(ctxt->table, ctxt->table_size * sizeof(Table_connector*));
57
+ ctxt->table = NULL;
58
+ ctxt->table_size = 0;
59
+ }
60
+
61
+ static void init_table(Sentence sent)
62
+ {
63
+ int shift;
64
+ /* A piecewise exponential function determines the size of the
65
+ * hash table. Probably should make use of the actual number of
66
+ * disjuncts, rather than just the number of words.
67
+ */
68
+ count_context_t *ctxt = sent->count_ctxt;
69
+
70
+ if (ctxt->table) free_table(ctxt);
71
+
72
+ if (sent->length >= 10)
73
+ {
74
+ shift = 12 + (sent->length) / 6 ;
75
+ }
76
+ else
77
+ {
78
+ shift = 12;
79
+ }
80
+
81
+ /* Clamp at max 4*(1<<24) == 64 MBytes */
82
+ if (24 < shift) shift = 24;
83
+ ctxt->table_size = (1 << shift);
84
+ ctxt->log2_table_size = shift;
85
+ ctxt->table = (Table_connector**)
86
+ xalloc(ctxt->table_size * sizeof(Table_connector*));
87
+ memset(ctxt->table, 0, ctxt->table_size*sizeof(Table_connector*));
88
+ }
89
+
90
+ int x_match(Sentence sent, Connector *a, Connector *b)
91
+ {
92
+ return do_match(sent, a, b, 0, 0);
93
+ }
94
+
95
+ void count_set_effective_distance(Sentence sent)
96
+ {
97
+ sent->count_ctxt->effective_dist = sent->effective_dist;
98
+ }
99
+
100
+ void count_unset_effective_distance(Sentence sent)
101
+ {
102
+ sent->count_ctxt->effective_dist = NULL;
103
+ }
104
+
105
+ /*
106
+ * Returns TRUE if s and t match according to the connector matching
107
+ * rules. The connector strings must be properly formed, starting with
108
+ * zero or more upper case letters, followed by some other letters, and
109
+ * The algorithm is symmetric with respect to a and b.
110
+ *
111
+ * It works as follows: The labels must match. The priorities must be
112
+ * compatible (both THIN_priority, or one UP_priority and one DOWN_priority).
113
+ * The sequence of upper case letters must match exactly. After these comes
114
+ * a sequence of lower case letters "*"s or "^"s. The matching algorithm
115
+ * is different depending on which of the two priority cases is being
116
+ * considered. See the comments below.
117
+ */
118
+ int do_match(Sentence sent, Connector *a, Connector *b, int aw, int bw)
119
+ {
120
+ const char *s, *t;
121
+ int x, y, dist;
122
+ count_context_t *ctxt;
123
+
124
+ if (a->label != b->label) return FALSE;
125
+
126
+ s = a->string;
127
+ t = b->string;
128
+
129
+ while(isupper((int)*s) || isupper((int)*t))
130
+ {
131
+ if (*s != *t) return FALSE;
132
+ s++;
133
+ t++;
134
+ }
135
+
136
+ ctxt = sent->count_ctxt;
137
+
138
+ x = a->priority;
139
+ y = b->priority;
140
+
141
+ /* Probably not necessary, as long as
142
+ * effective_dist[0][0]=0 and is defined */
143
+ if (aw == 0 && bw == 0) {
144
+ dist = 0;
145
+ } else {
146
+ assert(aw < bw, "match() did not receive params in the natural order.");
147
+ dist = ctxt->effective_dist[aw][bw];
148
+ }
149
+ /* printf("M: a=%4s b=%4s ap=%d bp=%d aw=%d bw=%d a->ll=%d b->ll=%d dist=%d\n",
150
+ s, t, x, y, aw, bw, a->length_limit, b->length_limit, dist); */
151
+ if (dist > a->length_limit || dist > b->length_limit) return FALSE;
152
+
153
+ if ((x == THIN_priority) && (y == THIN_priority))
154
+ {
155
+ /*
156
+ Remember that "*" matches anything, and "^" matches nothing
157
+ (except "*"). Otherwise two characters match if and only if
158
+ they're equal. ("^" can be used in the dictionary just like
159
+ any other connector.)
160
+ */
161
+ while ((*s!='\0') && (*t!='\0')) {
162
+ if ((*s == '*') || (*t == '*') ||
163
+ ((*s == *t) && (*s != '^'))) {
164
+ s++;
165
+ t++;
166
+ } else return FALSE;
167
+ }
168
+ return TRUE;
169
+ } else if ((x==UP_priority) && (y==DOWN_priority)) {
170
+ /*
171
+ As you go up (namely from x to y) the set of strings that
172
+ match (in the normal THIN sense above) should get no larger.
173
+ Read the comment in and.c to understand this.
174
+ In other words, the y string (t) must be weaker (or at least
175
+ no stronger) that the x string (s).
176
+
177
+ This code is only correct if the strings are the same
178
+ length. This is currently true, but perhaps for safty
179
+ this assumption should be removed.
180
+ */
181
+ while ((*s!='\0') && (*t!='\0')) {
182
+ if ((*s == *t) || (*s == '*') || (*t == '^')) {
183
+ s++;
184
+ t++;
185
+ } else return FALSE;
186
+ }
187
+ return TRUE;
188
+ }
189
+ else if ((y == UP_priority) && (x == DOWN_priority))
190
+ {
191
+ while ((*s!='\0') && (*t!='\0'))
192
+ {
193
+ if ((*s == *t) || (*t == '*') || (*s == '^'))
194
+ {
195
+ s++;
196
+ t++;
197
+ }
198
+ else
199
+ return FALSE;
200
+ }
201
+ return TRUE;
202
+ }
203
+ else
204
+ return FALSE;
205
+ }
206
+
207
+ /**
208
+ * Stores the value in the table. Assumes it's not already there.
209
+ */
210
+ static Table_connector * table_store(count_context_t *ctxt,
211
+ int lw, int rw,
212
+ Connector *le, Connector *re,
213
+ int cost, s64 count)
214
+ {
215
+ Table_connector *t, *n;
216
+ int h;
217
+
218
+ n = (Table_connector *) xalloc(sizeof(Table_connector));
219
+ n->count = count;
220
+ n->lw = lw; n->rw = rw; n->le = le; n->re = re; n->cost = cost;
221
+ h = pair_hash(ctxt->log2_table_size,lw, rw, le, re, cost);
222
+ t = ctxt->table[h];
223
+ n->next = t;
224
+ ctxt->table[h] = n;
225
+ return n;
226
+ }
227
+
228
+ /** returns the pointer to this info, NULL if not there */
229
+ static Table_connector *
230
+ find_table_pointer(count_context_t *ctxt,
231
+ int lw, int rw,
232
+ Connector *le, Connector *re,
233
+ int cost)
234
+ {
235
+ Table_connector *t;
236
+ int h = pair_hash(ctxt->log2_table_size,lw, rw, le, re, cost);
237
+ t = ctxt->table[h];
238
+ for (; t != NULL; t = t->next) {
239
+ if ((t->lw == lw) && (t->rw == rw) && (t->le == le) && (t->re == re)
240
+ && (t->cost == cost)) return t;
241
+ }
242
+
243
+ /* Create a new connector only if resources are exhausted.
244
+ * (???) Huh? I guess we're in panic parse mode in that case.
245
+ */
246
+ if ((ctxt->current_resources != NULL) &&
247
+ resources_exhausted(ctxt->current_resources))
248
+ {
249
+ return table_store(ctxt, lw, rw, le, re, cost, 0);
250
+ }
251
+ else return NULL;
252
+ }
253
+
254
+ /** returns the count for this quintuple if there, -1 otherwise */
255
+ s64 table_lookup(Sentence sent,
256
+ int lw, int rw, Connector *le, Connector *re, int cost)
257
+ {
258
+ Table_connector *t = find_table_pointer(sent->count_ctxt, lw, rw, le, re, cost);
259
+
260
+ if (t == NULL) return -1; else return t->count;
261
+ }
262
+
263
+ /**
264
+ * Stores the value in the table. Unlike table_store, it assumes
265
+ * it's already there
266
+ */
267
+ static void table_update(count_context_t *ctxt, int lw, int rw,
268
+ Connector *le, Connector *re,
269
+ int cost, s64 count)
270
+ {
271
+ Table_connector *t = find_table_pointer(ctxt, lw, rw, le, re, cost);
272
+
273
+ assert(t != NULL, "This entry is supposed to be in the table.");
274
+ t->count = count;
275
+ }
276
+
277
+ /**
278
+ * Returns 0 if and only if this entry is in the hash table
279
+ * with a count value of 0.
280
+ */
281
+ static s64 pseudocount(Sentence sent,
282
+ int lw, int rw, Connector *le, Connector *re, int cost)
283
+ {
284
+ s64 count;
285
+ count = table_lookup(sent, lw, rw, le, re, cost);
286
+ if (count == 0) return 0; else return 1;
287
+ }
288
+
289
+ static s64 do_count(Sentence sent, int lw, int rw,
290
+ Connector *le, Connector *re, int null_count)
291
+ {
292
+ Disjunct * d;
293
+ s64 total, pseudototal;
294
+ int start_word, end_word, w;
295
+ s64 leftcount, rightcount;
296
+ int lcost, rcost, Lmatch, Rmatch;
297
+
298
+ Match_node * m, *m1;
299
+ Table_connector *t;
300
+
301
+ count_context_t *ctxt = sent->count_ctxt;
302
+
303
+ if (null_count < 0) return 0; /* can this ever happen?? */
304
+
305
+ t = find_table_pointer(ctxt, lw, rw, le, re, null_count);
306
+
307
+ if (t == NULL) {
308
+ /* Create the table entry with a tentative null count of 0.
309
+ * This count must be updated before we return. */
310
+ t = table_store(ctxt, lw, rw, le, re, null_count, 0);
311
+ } else {
312
+ return t->count;
313
+ }
314
+
315
+ if (rw == 1+lw)
316
+ {
317
+ /* lw and rw are neighboring words */
318
+ /* You can't have a linkage here with null_count > 0 */
319
+ if ((le == NULL) && (re == NULL) && (null_count == 0))
320
+ {
321
+ t->count = 1;
322
+ }
323
+ else
324
+ {
325
+ t->count = 0;
326
+ }
327
+ return t->count;
328
+ }
329
+
330
+ if ((le == NULL) && (re == NULL))
331
+ {
332
+ if (!ctxt->islands_ok && (lw != -1))
333
+ {
334
+ /* If we don't allow islands (a set of words linked together
335
+ * but separate from the rest of the sentence) then the
336
+ * null_count of skipping n words is just n */
337
+ if (null_count == ((rw-lw-1) + ctxt->null_block-1)/ctxt->null_block)
338
+ {
339
+ /* If null_block=4 then the null_count of
340
+ 1,2,3,4 nulls is 1; and 5,6,7,8 is 2 etc. */
341
+ t->count = 1;
342
+ }
343
+ else
344
+ {
345
+ t->count = 0;
346
+ }
347
+ return t->count;
348
+ }
349
+ if (null_count == 0)
350
+ {
351
+ /* There is no solution without nulls in this case. There is
352
+ * a slight efficiency hack to separate this null_count==0
353
+ * case out, but not necessary for correctness */
354
+ t->count = 0;
355
+ }
356
+ else
357
+ {
358
+ total = 0;
359
+ w = lw+1;
360
+ for (d = ctxt->local_sent[w].d; d != NULL; d = d->next)
361
+ {
362
+ if (d->left == NULL)
363
+ {
364
+ total += do_count(sent, w, rw, d->right, NULL, null_count-1);
365
+ }
366
+ }
367
+ total += do_count(sent, w, rw, NULL, NULL, null_count-1);
368
+ t->count = total;
369
+ }
370
+ return t->count;
371
+ }
372
+
373
+ if (le == NULL)
374
+ {
375
+ start_word = lw+1;
376
+ }
377
+ else
378
+ {
379
+ start_word = le->word;
380
+ }
381
+
382
+ if (re == NULL)
383
+ {
384
+ end_word = rw;
385
+ }
386
+ else
387
+ {
388
+ end_word = re->word +1;
389
+ }
390
+
391
+ total = 0;
392
+
393
+ for (w = start_word; w < end_word; w++)
394
+ {
395
+ m1 = m = form_match_list(sent, w, le, lw, re, rw);
396
+ for (; m!=NULL; m=m->next)
397
+ {
398
+ d = m->d;
399
+ for (lcost = 0; lcost <= null_count; lcost++)
400
+ {
401
+ rcost = null_count - lcost;
402
+ /* Now lcost and rcost are the costs we're assigning
403
+ * to those parts respectively */
404
+
405
+ /* Now, we determine if (based on table only) we can see that
406
+ the current range is not parsable. */
407
+ Lmatch = (le != NULL) && (d->left != NULL) &&
408
+ do_match(sent, le, d->left, lw, w);
409
+ Rmatch = (d->right != NULL) && (re != NULL) &&
410
+ do_match(sent, d->right, re, w, rw);
411
+
412
+ rightcount = leftcount = 0;
413
+ if (Lmatch)
414
+ {
415
+ leftcount = pseudocount(sent, lw, w, le->next, d->left->next, lcost);
416
+ if (le->multi) leftcount += pseudocount(sent, lw, w, le, d->left->next, lcost);
417
+ if (d->left->multi) leftcount += pseudocount(sent, lw, w, le->next, d->left, lcost);
418
+ if (le->multi && d->left->multi) leftcount += pseudocount(sent, lw, w, le, d->left, lcost);
419
+ }
420
+
421
+ if (Rmatch)
422
+ {
423
+ rightcount = pseudocount(sent, w, rw, d->right->next, re->next, rcost);
424
+ if (d->right->multi) rightcount += pseudocount(sent, w,rw,d->right,re->next, rcost);
425
+ if (re->multi) rightcount += pseudocount(sent, w, rw, d->right->next, re, rcost);
426
+ if (d->right->multi && re->multi) rightcount += pseudocount(sent, w, rw, d->right, re, rcost);
427
+ }
428
+
429
+ /* total number where links are used on both sides */
430
+ pseudototal = leftcount*rightcount;
431
+
432
+ if (leftcount > 0) {
433
+ /* evaluate using the left match, but not the right */
434
+ pseudototal += leftcount * pseudocount(sent, w, rw, d->right, re, rcost);
435
+ }
436
+ if ((le == NULL) && (rightcount > 0)) {
437
+ /* evaluate using the right match, but not the left */
438
+ pseudototal += rightcount * pseudocount(sent, lw, w, le, d->left, lcost);
439
+ }
440
+
441
+ /* now pseudototal is 0 implies that we know that the true total is 0 */
442
+ if (pseudototal != 0) {
443
+ rightcount = leftcount = 0;
444
+ if (Lmatch) {
445
+ leftcount = do_count(sent, lw, w, le->next, d->left->next, lcost);
446
+ if (le->multi) leftcount += do_count(sent, lw, w, le, d->left->next, lcost);
447
+ if (d->left->multi) leftcount += do_count(sent, lw, w, le->next, d->left, lcost);
448
+ if (le->multi && d->left->multi) leftcount += do_count(sent, lw, w, le, d->left, lcost);
449
+ }
450
+
451
+ if (Rmatch) {
452
+ rightcount = do_count(sent, w, rw, d->right->next, re->next, rcost);
453
+ if (d->right->multi) rightcount += do_count(sent, w,rw,d->right,re->next, rcost);
454
+ if (re->multi) rightcount += do_count(sent, w, rw, d->right->next, re, rcost);
455
+ if (d->right->multi && re->multi) rightcount += do_count(sent, w, rw, d->right, re, rcost);
456
+ }
457
+
458
+ total += leftcount*rightcount; /* total number where links are used on both sides */
459
+
460
+ if (leftcount > 0) {
461
+ /* evaluate using the left match, but not the right */
462
+ total += leftcount * do_count(sent, w, rw, d->right, re, rcost);
463
+ }
464
+ if ((le == NULL) && (rightcount > 0)) {
465
+ /* evaluate using the right match, but not the left */
466
+ total += rightcount * do_count(sent, lw, w, le, d->left, lcost);
467
+ }
468
+ }
469
+ }
470
+ }
471
+
472
+ put_match_list(sent, m1);
473
+ }
474
+ t->count = total;
475
+ return total;
476
+ }
477
+
478
+ /**
479
+ * Returns the number of ways the sentence can be parsed with the
480
+ * specified null count. Assumes that the hash table has already been
481
+ * initialized, and is freed later. The "null_count" here is the
482
+ * number of words that are allowed to have no links to them.
483
+ */
484
+ s64 do_parse(Sentence sent, int null_count, Parse_Options opts)
485
+ {
486
+ s64 total;
487
+ count_context_t *ctxt = sent->count_ctxt;
488
+
489
+ count_set_effective_distance(sent);
490
+ ctxt->current_resources = opts->resources;
491
+ ctxt->local_sent = sent->word;
492
+ ctxt->deletable = sent->deletable;
493
+ ctxt->null_block = opts->null_block;
494
+ ctxt->islands_ok = opts->islands_ok;
495
+
496
+ total = do_count(sent, -1, sent->length, NULL, NULL, null_count+1);
497
+
498
+ ctxt->local_sent = NULL;
499
+ ctxt->current_resources = NULL;
500
+ return total;
501
+ }
502
+
503
+ /**
504
+ CONJUNCTION PRUNING.
505
+
506
+ The basic idea is this. Before creating the fat disjuncts,
507
+ we run a modified version of the exhaustive search procedure.
508
+ Its purpose is to mark the disjuncts that can be used in any
509
+ linkage. It's just like the normal exhaustive search, except that
510
+ if a subrange of words are deletable, then we treat them as though
511
+ they were not even there. So, if we call the function in the
512
+ situation where the set of words between the left and right one
513
+ are deletable, and the left and right connector pointers
514
+ are NULL, then that range is considered to have a solution.
515
+
516
+ There are actually two procedures to implement this. One is
517
+ mark_region() and the other is region_valid(). The latter just
518
+ checks to see if the given region can be completed (within it).
519
+ The former actually marks those disjuncts that can be used in
520
+ any valid linkage of the given region.
521
+
522
+ As in the standard search procedure, we make use of the fast-match
523
+ data structure (which requires power pruning to have been done), and
524
+ we also use a hash table. The table is used differently in this case.
525
+ The meaning of values stored in the table are as follows:
526
+
527
+ -1 Nothing known (Actually, this is not stored. It's returned
528
+ by table_lookup when nothing is known.)
529
+ 0 This region can't be completed (marking is therefore irrelevant)
530
+ 1 This region can be completed, but it's not yet marked
531
+ 2 This region can be completed, and it's been marked.
532
+ */
533
+
534
+ static int x_prune_match(count_context_t *ctxt,
535
+ Connector *le, Connector *re, int lw, int rw)
536
+ {
537
+ int dist;
538
+
539
+ assert(lw < rw, "prune_match() did not receive params in the natural order.");
540
+ dist = ctxt->effective_dist[lw][rw];
541
+ return prune_match(dist, le, re);
542
+ }
543
+
544
+ /**
545
+ * Returns 0 if this range cannot be successfully filled in with
546
+ * links. Returns 1 if it can, and it's not been marked, and returns
547
+ * 2 if it can and it has been marked.
548
+ */
549
+ static int region_valid(Sentence sent, int lw, int rw, Connector *le, Connector *re)
550
+ {
551
+ Disjunct * d;
552
+ int left_valid, right_valid, found;
553
+ int i, start_word, end_word;
554
+ int w;
555
+ Match_node * m, *m1;
556
+
557
+ count_context_t *ctxt = sent->count_ctxt;
558
+
559
+ i = table_lookup(sent, lw, rw, le, re, 0);
560
+ if (i >= 0) return i;
561
+
562
+ if ((le == NULL) && (re == NULL) && ctxt->deletable[lw][rw]) {
563
+ table_store(ctxt, lw, rw, le, re, 0, 1);
564
+ return 1;
565
+ }
566
+
567
+ if (le == NULL) {
568
+ start_word = lw+1;
569
+ } else {
570
+ start_word = le->word;
571
+ }
572
+ if (re == NULL) {
573
+ end_word = rw;
574
+ } else {
575
+ end_word = re->word + 1;
576
+ }
577
+
578
+ found = 0;
579
+
580
+ for (w=start_word; w < end_word; w++)
581
+ {
582
+ m1 = m = form_match_list(sent, w, le, lw, re, rw);
583
+ for (; m!=NULL; m=m->next)
584
+ {
585
+ d = m->d;
586
+ /* mark_cost++;*/
587
+ /* in the following expressions we use the fact that 0=FALSE. Could eliminate
588
+ by always saying "region_valid(...) != 0" */
589
+ left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) &&
590
+ ((region_valid(sent, lw, w, le->next, d->left->next)) ||
591
+ ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) ||
592
+ ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) ||
593
+ ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left))));
594
+ if (left_valid && region_valid(sent, w, rw, d->right, re)) {
595
+ found = 1;
596
+ break;
597
+ }
598
+ right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) &&
599
+ ((region_valid(sent, w, rw, d->right->next,re->next)) ||
600
+ ((d->right->multi) && region_valid(sent, w,rw,d->right,re->next)) ||
601
+ ((re->multi) && region_valid(sent, w, rw, d->right->next, re)) ||
602
+ ((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re))));
603
+ if ((left_valid && right_valid) || (right_valid && region_valid(sent, lw, w, le, d->left))) {
604
+ found = 1;
605
+ break;
606
+ }
607
+ }
608
+ put_match_list(sent, m1);
609
+ if (found != 0) break;
610
+ }
611
+ table_store(ctxt, lw, rw, le, re, 0, found);
612
+ return found;
613
+ }
614
+
615
+ /**
616
+ * Mark as useful all disjuncts involved in some way to complete the
617
+ * structure within the current region. Note that only disjuncts
618
+ * strictly between lw and rw will be marked. If it so happens that
619
+ * this region itself is not valid, then this fact will be recorded
620
+ * in the table, and nothing else happens.
621
+ */
622
+ static void mark_region(Sentence sent,
623
+ int lw, int rw, Connector *le, Connector *re)
624
+ {
625
+
626
+ Disjunct * d;
627
+ int left_valid, right_valid, i;
628
+ int start_word, end_word;
629
+ int w;
630
+ Match_node * m, *m1;
631
+ count_context_t *ctxt = sent->count_ctxt;
632
+
633
+ i = region_valid(sent, lw, rw, le, re);
634
+ if ((i==0) || (i==2)) return;
635
+ /* we only reach this point if it's a valid unmarked region, i=1 */
636
+ table_update(ctxt, lw, rw, le, re, 0, 2);
637
+
638
+ if ((le == NULL) && (re == NULL) && (ctxt->null_links) && (rw != 1+lw)) {
639
+ w = lw+1;
640
+ for (d = ctxt->local_sent[w].d; d != NULL; d = d->next) {
641
+ if ((d->left == NULL) && region_valid(sent, w, rw, d->right, NULL)) {
642
+ d->marked = TRUE;
643
+ mark_region(sent, w, rw, d->right, NULL);
644
+ }
645
+ }
646
+ mark_region(sent, w, rw, NULL, NULL);
647
+ return;
648
+ }
649
+
650
+ if (le == NULL) {
651
+ start_word = lw+1;
652
+ } else {
653
+ start_word = le->word;
654
+ }
655
+ if (re == NULL) {
656
+ end_word = rw;
657
+ } else {
658
+ end_word = re->word + 1;
659
+ }
660
+
661
+ for (w=start_word; w < end_word; w++)
662
+ {
663
+ m1 = m = form_match_list(sent, w, le, lw, re, rw);
664
+ for (; m!=NULL; m=m->next)
665
+ {
666
+ d = m->d;
667
+ /* mark_cost++;*/
668
+ left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) &&
669
+ ((region_valid(sent, lw, w, le->next, d->left->next)) ||
670
+ ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) ||
671
+ ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) ||
672
+ ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left))));
673
+ right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) &&
674
+ ((region_valid(sent, w, rw, d->right->next,re->next)) ||
675
+ ((d->right->multi) && region_valid(sent, w,rw,d->right,re->next)) ||
676
+ ((re->multi) && region_valid(sent, w, rw, d->right->next, re)) ||
677
+ ((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re))));
678
+
679
+ /* The following if statements could be restructured to avoid superfluous calls
680
+ to mark_region. It didn't seem a high priority, so I didn't optimize this.
681
+ */
682
+
683
+ if (left_valid && region_valid(sent, w, rw, d->right, re))
684
+ {
685
+ d->marked = TRUE;
686
+ mark_region(sent, w, rw, d->right, re);
687
+ mark_region(sent, lw, w, le->next, d->left->next);
688
+ if (le->multi) mark_region(sent, lw, w, le, d->left->next);
689
+ if (d->left->multi) mark_region(sent, lw, w, le->next, d->left);
690
+ if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left);
691
+ }
692
+
693
+ if (right_valid && region_valid(sent, lw, w, le, d->left))
694
+ {
695
+ d->marked = TRUE;
696
+ mark_region(sent, lw, w, le, d->left);
697
+ mark_region(sent, w, rw, d->right->next,re->next);
698
+ if (d->right->multi) mark_region(sent, w,rw,d->right,re->next);
699
+ if (re->multi) mark_region(sent, w, rw, d->right->next, re);
700
+ if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re);
701
+ }
702
+
703
+ if (left_valid && right_valid)
704
+ {
705
+ d->marked = TRUE;
706
+ mark_region(sent, lw, w, le->next, d->left->next);
707
+ if (le->multi) mark_region(sent, lw, w, le, d->left->next);
708
+ if (d->left->multi) mark_region(sent, lw, w, le->next, d->left);
709
+ if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left);
710
+ mark_region(sent, w, rw, d->right->next,re->next);
711
+ if (d->right->multi) mark_region(sent, w,rw,d->right,re->next);
712
+ if (re->multi) mark_region(sent, w, rw, d->right->next, re);
713
+ if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re);
714
+ }
715
+ }
716
+ put_match_list(sent, m1);
717
+ }
718
+ }
719
+
720
+ void delete_unmarked_disjuncts(Sentence sent)
721
+ {
722
+ int w;
723
+ Disjunct *d_head, *d, *dx;
724
+
725
+ for (w=0; w<sent->length; w++) {
726
+ d_head = NULL;
727
+ for (d=sent->word[w].d; d != NULL; d=dx) {
728
+ dx = d->next;
729
+ if (d->marked) {
730
+ d->next = d_head;
731
+ d_head = d;
732
+ } else {
733
+ d->next = NULL;
734
+ free_disjuncts(d);
735
+ }
736
+ }
737
+ sent->word[w].d = d_head;
738
+ }
739
+ }
740
+
741
+ /**
742
+ * We've already built the sentence disjuncts, and we've pruned them
743
+ * and power_pruned(GENTLE) them also. The sentence contains a
744
+ * conjunction. deletable[][] has been initialized to indicate the
745
+ * ranges which may be deleted in the final linkage.
746
+ *
747
+ * This routine deletes irrelevant disjuncts. It finds them by first
748
+ * marking them all as irrelevant, and then marking the ones that
749
+ * might be useable. Finally, the unmarked ones are removed.
750
+ */
751
+ void conjunction_prune(Sentence sent, Parse_Options opts)
752
+ {
753
+ Disjunct * d;
754
+ int w;
755
+ count_context_t *ctxt = sent->count_ctxt;
756
+
757
+ ctxt->current_resources = opts->resources;
758
+ ctxt->deletable = sent->deletable;
759
+ count_set_effective_distance(sent);
760
+
761
+ /* We begin by unmarking all disjuncts. This would not be necessary if
762
+ whenever we created a disjunct we cleared its marked field.
763
+ I didn't want to search the program for all such places, so
764
+ I did this way. XXX FIXME, someday ...
765
+ */
766
+ for (w=0; w<sent->length; w++) {
767
+ for (d=sent->word[w].d; d != NULL; d=d->next) {
768
+ d->marked = FALSE;
769
+ }
770
+ }
771
+
772
+ init_fast_matcher(sent);
773
+ ctxt->local_sent = sent->word;
774
+ ctxt->null_links = (opts->min_null_count > 0);
775
+ /*
776
+ for (d = sent->word[0].d; d != NULL; d = d->next) {
777
+ if ((d->left == NULL) && region_valid(sent, 0, sent->length, d->right, NULL)) {
778
+ mark_region(sent, 0, sent->length, d->right, NULL);
779
+ d->marked = TRUE;
780
+ }
781
+ }
782
+ mark_region(sent, 0, sent->length, NULL, NULL);
783
+ */
784
+
785
+ if (ctxt->null_links) {
786
+ mark_region(sent, -1, sent->length, NULL, NULL);
787
+ } else {
788
+ for (w=0; w<sent->length; w++) {
789
+ /* consider removing the words [0,w-1] from the beginning
790
+ of the sentence */
791
+ if (ctxt->deletable[-1][w]) {
792
+ for (d = sent->word[w].d; d != NULL; d = d->next) {
793
+ if ((d->left == NULL) && region_valid(sent, w, sent->length, d->right, NULL)) {
794
+ mark_region(sent, w, sent->length, d->right, NULL);
795
+ d->marked = TRUE;
796
+ }
797
+ }
798
+ }
799
+ }
800
+ }
801
+
802
+ delete_unmarked_disjuncts(sent);
803
+
804
+ free_fast_matcher(sent);
805
+
806
+ ctxt->local_sent = NULL;
807
+ ctxt->current_resources = NULL;
808
+ ctxt->deletable = NULL;
809
+ count_unset_effective_distance(sent);
810
+ }
811
+
812
+ void init_count(Sentence sent)
813
+ {
814
+ if (NULL == sent->count_ctxt)
815
+ sent->count_ctxt = (count_context_t *) malloc (sizeof(count_context_t));
816
+ memset(sent->count_ctxt, 0, sizeof(count_context_t));
817
+
818
+ init_table(sent);
819
+ }
820
+
821
+ void free_count(Sentence sent)
822
+ {
823
+ if (NULL == sent->count_ctxt) return;
824
+
825
+ free_table(sent->count_ctxt);
826
+ free(sent->count_ctxt);
827
+ sent->count_ctxt = NULL;
828
+ }