grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,17 @@
1
+
2
+ Parse Ranking and Word Sense Statistics
3
+ ---------------------------------------
4
+
5
+ This directory contains code that computes a parse ranking, as well
6
+ as a word-sense probability (based on WordNet 3.0) by looking up
7
+ frequency statistics from an SQL database. The SQLite database engine
8
+ is used because it is "administration-free" for the user, and because
9
+ its license is compatbile with the current link-grammar license.
10
+
11
+ This directory also contains code for "broadening" word linakges.
12
+ See data/sql/README for more info.
13
+
14
+ This directory contains one administrative tool, "cluster-pop",
15
+ which is not built by default because users do not need this tool.
16
+ See the Makefile.am for notes on how to build it.
17
+
@@ -0,0 +1,286 @@
1
+ /*
2
+ * cluster.c
3
+ *
4
+ * Data for related-word clusters. Meant to expand disjunct coverage
5
+ * for the case where a parse cannot be completed without ommitting
6
+ * a word.
7
+ *
8
+ * Copyright (c) 2009 Linas Vepstas <linasvepstas@gmail.com>
9
+ */
10
+
11
+ #include <stdio.h>
12
+ #include <stdlib.h>
13
+ #include <string.h>
14
+ #include <sqlite3.h>
15
+ #include "cluster.h"
16
+ #include "../build-disjuncts.h"
17
+ #include "../disjunct-utils.h"
18
+ #include "../structures.h"
19
+ #include "../utilities.h"
20
+
21
+ struct cluster_s
22
+ {
23
+ char * dbname;
24
+ sqlite3 *dbconn;
25
+ sqlite3_stmt *clu_query;
26
+ sqlite3_stmt *dj_query;
27
+ char *errmsg;
28
+ int rc;
29
+ };
30
+
31
+ /* ========================================================= */
32
+
33
+ static void * db_file_open(const char * dbname, void * user_data)
34
+ {
35
+ Cluster *c = (Cluster *) user_data;
36
+ int rc;
37
+ sqlite3 *dbconn;
38
+ c->rc = sqlite3_open_v2(dbname, &dbconn, SQLITE_OPEN_READONLY, NULL);
39
+ if (c->rc)
40
+ {
41
+ sqlite3_close(dbconn);
42
+ return NULL;
43
+ }
44
+
45
+ c->dbname = strdup(dbname);
46
+ return dbconn;
47
+ }
48
+
49
+
50
+ /**
51
+ * Initialize the cluster statistics subsystem.
52
+ */
53
+ Cluster * lg_cluster_new(void)
54
+ {
55
+ int rc;
56
+
57
+ Cluster *c = (Cluster *) malloc(sizeof(Cluster));
58
+ c->clu_query = NULL;
59
+ c->dj_query = NULL;
60
+ c->errmsg = NULL;
61
+ c->dbname = NULL;
62
+
63
+ /* dbname = "/link-grammar/data/en/sql/clusters.db"; */
64
+ #define DBNAME "sql/clusters.db"
65
+ c->dbconn = object_open(DBNAME, db_file_open, c);
66
+ if (NULL == c->dbconn)
67
+ {
68
+ /* Very weird .. but if the database is not found, then sqlite
69
+ * reports an "out of memory" error! So hide this misleading
70
+ * error message.
71
+ */
72
+ if (SQLITE_CANTOPEN == c->rc)
73
+ {
74
+ prt_error("Warning: Can't open database: File not found\n"
75
+ "\tWas looking for: " DBNAME);
76
+ }
77
+ else
78
+ {
79
+ prt_error("Warning: Can't open database: %s\n"
80
+ "\tWas looking for: " DBNAME,
81
+ sqlite3_errmsg(c->dbconn));
82
+ }
83
+ return c;
84
+ }
85
+
86
+ /* Now prepare the statements we plan to use */
87
+ rc = sqlite3_prepare_v2(c->dbconn,
88
+ "SELECT cluster_name FROM ClusterMembers "
89
+ "WHERE inflected_word = ?;",
90
+ -1, &c->clu_query, NULL);
91
+ if (rc != SQLITE_OK)
92
+ {
93
+ prt_error("Error: Can't prepare the cluster member statment: %s\n",
94
+ sqlite3_errmsg(c->dbconn));
95
+ }
96
+
97
+ rc = sqlite3_prepare_v2(c->dbconn,
98
+ "SELECT disjunct, cost FROM ClusterDisjuncts "
99
+ "WHERE cluster_name = ?;",
100
+ -1, &c->dj_query, NULL);
101
+ if (rc != SQLITE_OK)
102
+ {
103
+ prt_error("Error: Can't prepare the disjunct statment: %s\n",
104
+ sqlite3_errmsg(c->dbconn));
105
+ }
106
+
107
+ prt_error("Info: Cluster grouping database found at %s\n", c->dbname);
108
+ return c;
109
+ }
110
+
111
+ /**
112
+ * lg_cluster_delete -- shut down the cluster statistics subsystem.
113
+ */
114
+ void lg_cluster_delete(Cluster *c)
115
+ {
116
+ if (NULL == c) return;
117
+
118
+ if (c->clu_query)
119
+ {
120
+ sqlite3_finalize(c->clu_query);
121
+ c->clu_query = NULL;
122
+ }
123
+
124
+ if (c->dj_query)
125
+ {
126
+ sqlite3_finalize(c->dj_query);
127
+ c->dj_query = NULL;
128
+ }
129
+
130
+ if (c->dbconn)
131
+ {
132
+ sqlite3_close(c->dbconn);
133
+ c->dbconn = NULL;
134
+ }
135
+
136
+ if (c->dbname)
137
+ {
138
+ free(c->dbname);
139
+ c->dbname = NULL;
140
+ }
141
+ free(c);
142
+ }
143
+
144
+ /* ========================================================= */
145
+
146
+ static Exp * make_exp(const char *djstr, double cost)
147
+ {
148
+ Exp *e = (Exp *) malloc(sizeof(Exp));
149
+ e->multi = 0;
150
+ e->dir = ' ';
151
+ e->cost = cost;
152
+
153
+ /* If its just a single connector, then do just that */
154
+ char *sp = strchr (djstr, ' ');
155
+ if (NULL == sp || 0x0 == sp[1])
156
+ {
157
+ e->type = CONNECTOR_type;
158
+ if ('@' == djstr[0]) { e->multi = 1; djstr++; }
159
+ size_t len = strlen(djstr) - 1;
160
+ if (sp) len--;
161
+ e->u.string = strndup(djstr, len);
162
+ e->dir = djstr[len];
163
+ return e;
164
+ }
165
+
166
+ /* If there are multiple connectors, and them together */
167
+ size_t len = sp - djstr;
168
+ char * tmp = strndup(djstr, len);
169
+ Exp *p1 = make_exp(tmp, 0.0);
170
+ free (tmp);
171
+ Exp *p2 = make_exp(sp+1, 0.0);
172
+
173
+ E_list *l;
174
+ E_list *lhead = NULL;
175
+
176
+ l = (E_list *) malloc(sizeof(E_list));
177
+ l->next = lhead;
178
+ l->e = p2;
179
+ lhead = l;
180
+
181
+ l = (E_list *) malloc(sizeof(E_list));
182
+ l->next = lhead;
183
+ l->e = p1;
184
+ lhead = l;
185
+
186
+ e->type = AND_type;
187
+ e->u.l = lhead;
188
+
189
+ return e;
190
+ }
191
+
192
+ #if NOT_NEEDED
193
+ static Exp * or_exp(Exp *p1, Exp *p2)
194
+ {
195
+ if (NULL == p2) return p1;
196
+
197
+ Exp *e = (Exp *) malloc(sizeof(Exp));
198
+ e->multi = 0;
199
+ e->dir = ' ';
200
+ e->cost = 0.0;
201
+ e->type = OR_type;
202
+
203
+ E_list *l;
204
+ E_list *lhead = NULL;
205
+
206
+ l = (E_list *) malloc(sizeof(E_list));
207
+ l->next = lhead;
208
+ l->e = p2;
209
+ lhead = l;
210
+
211
+ l = (E_list *) malloc(sizeof(E_list));
212
+ l->next = lhead;
213
+ l->e = p1;
214
+ lhead = l;
215
+
216
+ e->u.l = lhead;
217
+ return e;
218
+ }
219
+ #endif
220
+
221
+ static void free_exp(Exp *e)
222
+ {
223
+ if (CONNECTOR_type != e->type)
224
+ {
225
+ E_list *l = e->u.l;
226
+ while(l)
227
+ {
228
+ free_exp(l->e);
229
+ E_list *ln = l->next;
230
+ free(l);
231
+ l = ln;
232
+ }
233
+ return;
234
+ }
235
+
236
+ free((char *) e->u.string);
237
+ free(e);
238
+ }
239
+
240
+ Disjunct * lg_cluster_get_disjuncts(Cluster *c, const char * wrd)
241
+ {
242
+ Disjunct *djl = NULL;
243
+ int rc;
244
+
245
+ /* Look for a cluster containing this word */
246
+ rc = sqlite3_bind_text(c->clu_query, 1, wrd, -1, SQLITE_STATIC);
247
+ rc = sqlite3_step(c->clu_query);
248
+ if (rc != SQLITE_ROW) goto noclust;
249
+
250
+ /* Get the cluster name, and look for the disjuncts */
251
+ const char * cluname = sqlite3_column_text(c->clu_query,0);
252
+ rc = sqlite3_bind_text(c->dj_query, 1, cluname, -1, SQLITE_STATIC);
253
+
254
+ while(1)
255
+ {
256
+ rc = sqlite3_step(c->dj_query);
257
+ if (rc != SQLITE_ROW) break;
258
+ const char * djs = sqlite3_column_text(c->dj_query,0);
259
+ double cost = sqlite3_column_double(c->dj_query,1);
260
+
261
+ /* All expanded disjuncts are costly! */
262
+ // cost += 0.5;
263
+ cost -= 6.0;
264
+ if (cost < 0.0) cost = 0.0;
265
+
266
+ /* Building expressions */
267
+ Exp *e = make_exp(djs, cost);
268
+ X_node x;
269
+ x.exp = e;
270
+ x.string = wrd;
271
+ Disjunct *dj = build_disjuncts_for_X_node(&x, MAX_CONNECTOR_COST);
272
+ djl = catenate_disjuncts(dj, djl);
273
+ free_exp(e);
274
+ }
275
+
276
+ sqlite3_reset(c->dj_query);
277
+ sqlite3_clear_bindings(c->dj_query);
278
+
279
+ noclust:
280
+ sqlite3_reset(c->clu_query);
281
+ sqlite3_clear_bindings(c->clu_query);
282
+ return djl;
283
+ }
284
+
285
+
286
+ /* ======================= END OF FILE ===================== */
@@ -0,0 +1,32 @@
1
+ /*
2
+ * cluster.h
3
+ *
4
+ * Data for related-word clusters. Meant to expand disjunct covereage
5
+ * for the case where a parse cannot be completed without ommitting
6
+ * a word.
7
+ *
8
+ * Copyright (c) 2009 Linas Vepstas <linasvepstas@gmail.com>
9
+ */
10
+
11
+ #ifndef _LINKGRAMMAR_CLUSTER_H
12
+ #define _LINKGRAMMAR_CLUSTER_H
13
+
14
+ #ifdef USE_CORPUS
15
+
16
+ #include "../api-types.h"
17
+ #include "../link-includes.h"
18
+
19
+ Cluster * lg_cluster_new(void);
20
+ void lg_cluster_delete(Cluster *);
21
+
22
+ Disjunct * lg_cluster_get_disjuncts(Cluster *, const char * wrd);
23
+
24
+ #else /* USE_CORPUS */
25
+
26
+ static inline Cluster * lg_cluster_new(void) { return NULL; }
27
+ static inline void lg_cluster_delete(Cluster *c) {}
28
+ static inline Disjunct * lg_cluster_get_disjuncts(Cluster *c, const char * wrd) { return NULL; }
29
+
30
+ #endif /* USE_CORPUS */
31
+
32
+ #endif /* _LINKGRAMMAR_CLUSTER_H */
@@ -0,0 +1,483 @@
1
+ /*
2
+ * corpus.c
3
+ *
4
+ * Data for corpus statistics, used to provide a parse ranking
5
+ * to drive the SAT solver, as well as parse ranking with the
6
+ * ordinary solver.
7
+ *
8
+ * Copyright (c) 2008, 2009 Linas Vepstas <linasvepstas@gmail.com>
9
+ */
10
+
11
+ #include <stdio.h>
12
+ #include <stdlib.h>
13
+ #include <string.h>
14
+ #include <sqlite3.h>
15
+ #include "corpus.h"
16
+ #include "../api-structures.h"
17
+ #include "../disjuncts.h"
18
+ #include "../utilities.h"
19
+
20
+ struct corpus_s
21
+ {
22
+ char * dbname;
23
+ sqlite3 *dbconn;
24
+ sqlite3_stmt *rank_query;
25
+ sqlite3_stmt *sense_query;
26
+ char *errmsg;
27
+ int rc;
28
+ };
29
+
30
+ struct sense_s
31
+ {
32
+ int word;
33
+ const char * inflected_word;
34
+ const char * disjunct;
35
+ char * sense;
36
+ double score;
37
+ Sense *next;
38
+ };
39
+
40
+ /* ========================================================= */
41
+
42
+ static void * db_file_open(const char * dbname, void * user_data)
43
+ {
44
+ Corpus *c = (Corpus *) user_data;
45
+ int rc;
46
+ sqlite3 *dbconn;
47
+ c->rc = sqlite3_open_v2(dbname, &dbconn, SQLITE_OPEN_READONLY, NULL);
48
+ if (c->rc)
49
+ {
50
+ sqlite3_close(dbconn);
51
+ return NULL;
52
+ }
53
+
54
+ c->dbname = strdup(dbname);
55
+ return dbconn;
56
+ }
57
+
58
+
59
+ /**
60
+ * Initialize the corpus statistics subsystem.
61
+ */
62
+ Corpus * lg_corpus_new(void)
63
+ {
64
+ int rc;
65
+
66
+ Corpus *c = (Corpus *) malloc(sizeof(Corpus));
67
+ c->rank_query = NULL;
68
+ c->sense_query = NULL;
69
+ c->errmsg = NULL;
70
+ c->dbname = NULL;
71
+
72
+ /* dbname = "/link-grammar/data/en/sql/disjuncts.db"; */
73
+ #define DBNAME "sql/disjuncts.db"
74
+ c->dbconn = object_open(DBNAME, db_file_open, c);
75
+ if (NULL == c->dbconn)
76
+ {
77
+ /* Very weird .. but if the database is not found, then sqlite
78
+ * reports an "out of memory" error! So hide this misleading
79
+ * error message.
80
+ */
81
+ if (SQLITE_CANTOPEN == c->rc)
82
+ {
83
+ prt_error("Warning: Can't open database: File not found\n"
84
+ "\tWas looking for: " DBNAME);
85
+ }
86
+ else
87
+ {
88
+ prt_error("Warning: Can't open database: %s\n"
89
+ "\tWas looking for: " DBNAME,
90
+ sqlite3_errmsg(c->dbconn));
91
+ }
92
+ return c;
93
+ }
94
+
95
+ /* Now prepare the statements we plan to use */
96
+ rc = sqlite3_prepare_v2(c->dbconn,
97
+ "SELECT log_cond_probability FROM Disjuncts "
98
+ "WHERE inflected_word = ? AND disjunct = ?;",
99
+ -1, &c->rank_query, NULL);
100
+ if (rc != SQLITE_OK)
101
+ {
102
+ prt_error("Error: Can't prepare the ranking statment: %s\n",
103
+ sqlite3_errmsg(c->dbconn));
104
+ }
105
+
106
+ /* Results are returned in sorted order .. would it be faster
107
+ * to sort locally? Don't know ... */
108
+ rc = sqlite3_prepare_v2(c->dbconn,
109
+ "SELECT word_sense, log_cond_probability FROM DisjunctSenses "
110
+ "WHERE inflected_word = ? AND disjunct = ? "
111
+ "ORDER BY log_cond_probability ASC;",
112
+ -1, &c->sense_query, NULL);
113
+ if (rc != SQLITE_OK)
114
+ {
115
+ prt_error("Error: Can't prepare the sense statment: %s\n",
116
+ sqlite3_errmsg(c->dbconn));
117
+ }
118
+
119
+ prt_error("Info: Corpus statistics database found at %s\n", c->dbname);
120
+ return c;
121
+ }
122
+
123
+ /**
124
+ * lg_corpus_delete -- shut down the corpus statistics subsystem.
125
+ */
126
+ void lg_corpus_delete(Corpus *c)
127
+ {
128
+ if (NULL == c) return;
129
+
130
+ if (c->rank_query)
131
+ {
132
+ sqlite3_finalize(c->rank_query);
133
+ c->rank_query = NULL;
134
+ }
135
+
136
+ if (c->sense_query)
137
+ {
138
+ sqlite3_finalize(c->sense_query);
139
+ c->sense_query = NULL;
140
+ }
141
+
142
+ if (c->dbconn)
143
+ {
144
+ sqlite3_close(c->dbconn);
145
+ c->dbconn = NULL;
146
+ }
147
+
148
+ if (c->dbname)
149
+ {
150
+ free(c->dbname);
151
+ c->dbname = NULL;
152
+ }
153
+ free(c);
154
+ }
155
+
156
+ /* ========================================================= */
157
+
158
+ /* LOW_SCORE is what is assumed if a disjunct-word pair is not found
159
+ * in the dictionary. It is meant to be -log_2(prob(d|w)) where
160
+ * prob(d|w) is the conditional probability of seeing the disjunct d
161
+ * given the word w. A value of 17 is about equal to 1 in 100,000.
162
+ */
163
+ #define LOW_SCORE 17.0
164
+
165
+ /**
166
+ * get_disjunct_score -- get log probability of observing disjunt.
167
+ *
168
+ * Given an "inflected" word and a disjunct, thris routine returns the
169
+ * -log_2 conditional probability prob(d|w) of seeing the disjunct 'd'
170
+ * given that the word 'w' was observed. Here, "inflected word" means
171
+ * the link-grammar dictionary entry, complete with its trailing period
172
+ * and tag -- e.g. run.v or running.g -- everything after the dot is the
173
+ * "inflection".
174
+ */
175
+ static double get_disjunct_score(Corpus *corp,
176
+ const char * inflected_word,
177
+ const char * disjunct)
178
+ {
179
+ double val;
180
+ int rc;
181
+
182
+ /* Look up the disjunct in the database */
183
+ rc = sqlite3_bind_text(corp->rank_query, 1,
184
+ inflected_word, -1, SQLITE_STATIC);
185
+ if (rc != SQLITE_OK)
186
+ {
187
+ prt_error("Error: SQLite can't bind word: rc=%d \n", rc);
188
+ return LOW_SCORE;
189
+ }
190
+
191
+ rc = sqlite3_bind_text(corp->rank_query, 2,
192
+ disjunct, -1, SQLITE_STATIC);
193
+ if (rc != SQLITE_OK)
194
+ {
195
+ prt_error("Error: SQLite can't bind disjunct: rc=%d \n", rc);
196
+ return LOW_SCORE;
197
+ }
198
+
199
+ rc = sqlite3_step(corp->rank_query);
200
+ if (rc != SQLITE_ROW)
201
+ {
202
+ val = LOW_SCORE;
203
+ #ifdef DEBUG
204
+ printf ("Word=%s dj=%s not found in dict, assume score=%f\n",
205
+ inflected_word, disjunct, val);
206
+ #endif
207
+ }
208
+ else
209
+ {
210
+ val = sqlite3_column_double(corp->rank_query, 0);
211
+ if (LOW_SCORE < val) val = LOW_SCORE;
212
+ #ifdef DEBUG
213
+ printf ("Word=%s dj=%s score=%f\n", inflected_word, disjunct, val);
214
+ #endif
215
+ }
216
+
217
+ /* Failure to do both a reset *and* a clear will cause subsequent
218
+ * binds tp fail. */
219
+ sqlite3_reset(corp->rank_query);
220
+ sqlite3_clear_bindings(corp->rank_query);
221
+ return val;
222
+ }
223
+
224
+ /* ========================================================= */
225
+
226
+ /**
227
+ * lg_corpus_score -- compute parse-ranking score for sentence.
228
+ *
229
+ * Given a parsed sentence, this routine will compute a parse ranking
230
+ * score, based on the probabilites of observing the indicated set of
231
+ * disjuncts in the statistics database.
232
+ *
233
+ * The score is stored in the Linkage_info->corpus_cost struct member.
234
+ *
235
+ * The score is currently computed as the average -log_2 conditional
236
+ * probability p(d|w) of observing disjunct 'd', given word 'w'.
237
+ * Lower scores are better -- they indicate more likely parses.
238
+ */
239
+ void lg_corpus_score(Sentence sent, Linkage_info *lifo)
240
+ {
241
+ const char *infword, *djstr;
242
+ double tot_score = 0.0f;
243
+ Corpus *corp = sent->dict->corpus;
244
+ int nwords = sent->length;
245
+ int w;
246
+
247
+ /* No-op if the database is not open */
248
+ if (NULL == corp->dbconn) return;
249
+
250
+ lg_compute_disjunct_strings(sent, lifo);
251
+
252
+ /* Decrement nwords, so as to ignore the RIGHT-WALL */
253
+ nwords --;
254
+
255
+ /* Loop over each word in the sentence (skipping LEFT-WALL, which is
256
+ * word 0. */
257
+ for (w=1; w<nwords; w++)
258
+ {
259
+ Disjunct *disj = sent->parse_info->chosen_disjuncts[w];
260
+
261
+ /* disj is NULL if word did not participate in parse */
262
+ if (NULL == disj)
263
+ {
264
+ tot_score += LOW_SCORE;
265
+ continue;
266
+ }
267
+ infword = disj->string;
268
+ djstr = lifo->disjunct_list_str[w];
269
+ tot_score += get_disjunct_score(corp, infword, djstr);
270
+ }
271
+
272
+ /* Decrement nwords, so as to ignore the LEFT-WALL */
273
+ --nwords;
274
+ tot_score /= nwords;
275
+ lifo->corpus_cost = tot_score;
276
+ }
277
+
278
+ double lg_corpus_disjunct_score(Linkage linkage, int w)
279
+ {
280
+ double score;
281
+ const char *infword, *djstr;
282
+ Sentence sent = linkage->sent;
283
+ Linkage_info *lifo = linkage->info;
284
+ Corpus *corp = sent->dict->corpus;
285
+ Disjunct *disj;
286
+
287
+ /* No-op if the database is not open */
288
+ if (NULL == corp->dbconn) return LOW_SCORE;
289
+
290
+ /* disj is NULL if word did not participate in parse */
291
+ disj = sent->parse_info->chosen_disjuncts[w];
292
+ if (NULL == disj) return LOW_SCORE;
293
+
294
+ lg_compute_disjunct_strings(sent, lifo);
295
+
296
+ infword = disj->string;
297
+ djstr = lifo->disjunct_list_str[w];
298
+ score = get_disjunct_score(corp, infword, djstr);
299
+
300
+ return score;
301
+ }
302
+
303
+ /* ========================================================= */
304
+
305
+ /**
306
+ * lg_corpus_senses -- Given word and disjunct, look up senses.
307
+ *
308
+ * Given a particular disjunct for a word, look up its most
309
+ * likely sense assignments from the database.
310
+ */
311
+
312
+ static Sense * lg_corpus_senses(Corpus *corp,
313
+ const char * inflected_word,
314
+ const char * disjunct,
315
+ int wrd)
316
+ {
317
+ double log_prob;
318
+ const unsigned char *sense;
319
+ Sense *sns, *head = NULL;
320
+ int rc;
321
+
322
+ /* Look up the disjunct in the database */
323
+ rc = sqlite3_bind_text(corp->sense_query, 1,
324
+ inflected_word, -1, SQLITE_STATIC);
325
+ if (rc != SQLITE_OK)
326
+ {
327
+ prt_error("Error: SQLite can't bind word in sense query: rc=%d \n", rc);
328
+ return NULL;
329
+ }
330
+
331
+ rc = sqlite3_bind_text(corp->sense_query, 2,
332
+ disjunct, -1, SQLITE_STATIC);
333
+ if (rc != SQLITE_OK)
334
+ {
335
+ prt_error("Error: SQLite can't bind disjunct in sense query: rc=%d \n", rc);
336
+ return NULL;
337
+ }
338
+
339
+ rc = sqlite3_step(corp->sense_query);
340
+ while (SQLITE_ROW == rc)
341
+ {
342
+ sense = sqlite3_column_text(corp->sense_query, 0);
343
+ log_prob = sqlite3_column_double(corp->sense_query, 1);
344
+ // printf ("Word=%s dj=%s sense=%s score=%f\n",
345
+ // inflected_word, disjunct, sense, log_prob);
346
+
347
+ sns = (Sense *) malloc(sizeof(Sense));
348
+ sns->next = head;
349
+ head = sns;
350
+
351
+ sns->inflected_word = inflected_word;
352
+ sns->disjunct = disjunct;
353
+ sns->sense = strdup(sense);
354
+ sns->score = log_prob;
355
+ sns->word = wrd;
356
+
357
+ /* Get the next row, if any */
358
+ rc = sqlite3_step(corp->sense_query);
359
+ }
360
+
361
+ /* Failure to do both a reset *and* a clear will cause subsequent
362
+ * binds tp fail. */
363
+ sqlite3_reset(corp->sense_query);
364
+ sqlite3_clear_bindings(corp->sense_query);
365
+
366
+ return head;
367
+ }
368
+
369
+ /* ========================================================= */
370
+
371
+ /**
372
+ * lg_corpus_linkage_senses -- Given a linkage, look up senses.
373
+ *
374
+ * Given a particular linakge, look up the most likely sense
375
+ * assignments from the database.
376
+ *
377
+ * This function is not used to guide the parsing process; it is
378
+ * only an informational look-up.
379
+ */
380
+
381
+ void lg_corpus_linkage_senses(Linkage linkage)
382
+ {
383
+ const char * infword;
384
+ Sentence sent = linkage->sent;
385
+ Dictionary dict = sent->dict;
386
+ Corpus *corp = dict->corpus;
387
+ int nwords = sent->length;
388
+ Linkage_info *lifo = linkage->info;
389
+ int w;
390
+
391
+ if (lifo->sense_list) return;
392
+
393
+ /* Set up the disjunct strings first */
394
+ lg_compute_disjunct_strings(sent, lifo);
395
+
396
+ lifo->nwords = nwords;
397
+ lifo->sense_list = (Sense **) malloc(nwords * sizeof (Sense *));
398
+ memset(lifo->sense_list, 0, nwords * sizeof (Sense *));
399
+
400
+ /* Decrement nwords, so as to ignore the RIGHT-WALL */
401
+ nwords --;
402
+
403
+ /* Loop over each word in the sentence (skipping LEFT-WALL, which is
404
+ * word 0. */
405
+ for (w=1; w<nwords; w++)
406
+ {
407
+ Disjunct *disj = sent->parse_info->chosen_disjuncts[w];
408
+
409
+ /* disj is NULL if word did not participate in parse */
410
+ if (NULL == disj)
411
+ {
412
+ continue;
413
+ }
414
+ infword = disj->string;
415
+
416
+ lifo->sense_list[w] = lg_corpus_senses(corp, infword,
417
+ lifo->disjunct_list_str[w], w);
418
+ }
419
+ }
420
+
421
+ /* ========================================================= */
422
+ /* Return bits and pieces of the sense assignments */
423
+
424
+ Sense * lg_get_word_sense(Linkage_info *lifo, int word)
425
+ {
426
+ if (!lifo->sense_list) return NULL;
427
+ if (lifo->nwords <= word) return NULL;
428
+ return lifo->sense_list[word];
429
+ }
430
+
431
+ Sense * lg_sense_next(Sense *sns)
432
+ {
433
+ return sns->next;
434
+ }
435
+
436
+ int lg_sense_get_index(Sense *sns)
437
+ {
438
+ return sns->word;
439
+ }
440
+
441
+ const char * lg_sense_get_subscripted_word(Sense *sns)
442
+ {
443
+ return sns->inflected_word;
444
+ }
445
+
446
+ const char * lg_sense_get_disjunct(Sense *sns)
447
+ {
448
+ return sns->disjunct;
449
+ }
450
+
451
+ const char * lg_sense_get_sense(Sense *sns)
452
+ {
453
+ return sns->sense;
454
+ }
455
+
456
+ double lg_sense_get_score(Sense *sns)
457
+ {
458
+ return sns->score;
459
+ }
460
+
461
+ void lg_sense_delete(Linkage_info *lifo)
462
+ {
463
+ size_t nwords = lifo->nwords;
464
+ size_t w;
465
+
466
+ if (NULL == lifo->sense_list) return;
467
+
468
+ for (w=0; w<nwords; w++)
469
+ {
470
+ Sense *sns = lifo->sense_list[w];
471
+ while (sns)
472
+ {
473
+ Sense * nxt = sns->next;
474
+ free(sns->sense);
475
+ free(sns);
476
+ sns = nxt;
477
+ }
478
+ }
479
+ free (lifo->sense_list);
480
+ lifo->sense_list = NULL;
481
+ }
482
+
483
+ /* ======================= END OF FILE ===================== */