grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,136 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2009 Linas Vepstas */
3
+ /* Copyright (c) 2009 Vikas N. Kumar */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #ifndef SPELLCHECK_HUN_C
15
+ #define
16
+
17
+ #include <stdio.h>
18
+ #include <stdlib.h>
19
+ #include "link-includes.h"
20
+ #include "spellcheck.h"
21
+ #include "utilities.h" /* For Win32 compatibility */
22
+
23
+ #ifdef HAVE_HUNSPELL
24
+
25
+ #ifndef HUNSPELL_DICT_DIR
26
+ #define HUNSPELL_DICT_DIR (char *)0
27
+ #endif /* HUNSPELL_DICT_DIR */
28
+
29
+ static const char *hunspell_dict_dirs[] = {
30
+ "/usr/share/myspell/dicts",
31
+ "/usr/share/hunspell/dicts",
32
+ "/usr/local/share/myspell/dicts",
33
+ "/usr/local/share/hunspell/dicts",
34
+ "/usr/share/myspell",
35
+ "/usr/share/hunspell",
36
+ "/usr/local/share/myspell",
37
+ "/usr/local/share/hunspell",
38
+ HUNSPELL_DICT_DIR
39
+ };
40
+
41
+ static const char *spellcheck_lang_mapping[] = {
42
+ "en" /* link-grammar language */, "en-US" /* hunspell filename */,
43
+ "en" /* link-grammar language */, "en_US" /* hunspell filename */
44
+ };
45
+
46
+ #define FPATHLEN 256
47
+ static char hunspell_aff_file[FPATHLEN];
48
+ static char hunspell_dic_file[FPATHLEN];
49
+
50
+ #include <hunspell.h>
51
+ #include <string.h>
52
+
53
+ void * spellcheck_create(const char * lang)
54
+ {
55
+ size_t i = 0, j = 0;
56
+ Hunhandle *h = NULL;
57
+
58
+ memset(hunspell_aff_file, 0, FPATHLEN);
59
+ memset(hunspell_dic_file, 0, FPATHLEN);
60
+ for (i = 0; i < sizeof(spellcheck_lang_mapping)/sizeof(char *); i += 2)
61
+ {
62
+ if (0 != strcmp(lang, spellcheck_lang_mapping[i])) continue;
63
+
64
+ /* check in each hunspell_dict_dir if the files exist */
65
+ for (j = 0; j < sizeof(hunspell_dict_dirs)/sizeof(char *); ++j)
66
+ {
67
+ FILE *fh;
68
+ /* if the directory name is NULL then ignore */
69
+ if (hunspell_dict_dirs[j] == NULL) continue;
70
+
71
+ snprintf(hunspell_aff_file, FPATHLEN, "%s/%s.aff", hunspell_dict_dirs[j],
72
+ spellcheck_lang_mapping[i+1]);
73
+ snprintf(hunspell_dic_file, FPATHLEN, "%s/%s.dic", hunspell_dict_dirs[j],
74
+ spellcheck_lang_mapping[i+1]);
75
+
76
+ /* Some versions of Hunspell_create() will succeed even if
77
+ * there are no dictionary files. So test for permissions.
78
+ */
79
+ fh = fopen(hunspell_aff_file, "r");
80
+ if (fh) fclose (fh);
81
+ else continue;
82
+
83
+ fh = fopen(hunspell_dic_file, "r");
84
+ if (fh) fclose (fh);
85
+ else continue;
86
+
87
+ h = Hunspell_create(hunspell_aff_file, hunspell_dic_file);
88
+ /* if hunspell handle was created break from loop */
89
+ if (h != NULL)
90
+ break;
91
+ }
92
+ /* if hunspell handle was created break from loop */
93
+ if (h != NULL) break;
94
+ }
95
+ return h;
96
+ }
97
+
98
+ void spellcheck_destroy(void * chk)
99
+ {
100
+ Hunhandle *h = (Hunhandle *) chk;
101
+ Hunspell_destroy(h);
102
+ }
103
+
104
+ /**
105
+ * Return boolean: 1 if spelling looks good, else zero
106
+ */
107
+ int spellcheck_test(void * chk, const char * word)
108
+ {
109
+ if (NULL == chk)
110
+ {
111
+ prt_error("Error: no spell-check handle specified!\n");
112
+ return 0;
113
+ }
114
+
115
+ return Hunspell_spell((Hunhandle *)chk, word);
116
+ }
117
+
118
+ int spellcheck_suggest(void * chk, char ***sug, const char * word)
119
+ {
120
+ if (NULL == chk)
121
+ {
122
+ prt_error("Error: no spell-check handle specified!\n");
123
+ return 0;
124
+ }
125
+
126
+ return Hunspell_suggest((Hunhandle *)chk, sug, word);
127
+ }
128
+
129
+ void spellcheck_free_suggest(char **sug, int size)
130
+ {
131
+ free(sug);
132
+ }
133
+
134
+ #endif /* #ifdef HAVE_HUNSPELL */
135
+
136
+ #endif
@@ -0,0 +1,34 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2009 Linas Vepstas */
3
+ /* All rights reserved */
4
+ /* */
5
+ /* Use of the link grammar parsing system is subject to the terms of the */
6
+ /* license set forth in the LICENSE file included with this software, */
7
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
8
+ /* This license allows free redistribution and use in source and binary */
9
+ /* forms, with or without modification, subject to certain conditions. */
10
+ /* */
11
+ /*************************************************************************/
12
+
13
+ #ifndef SPELLCHECK_H
14
+ #define SPELLCHECK_H
15
+ #if (defined HAVE_HUNSPELL) || (defined HAVE_ASPELL)
16
+
17
+ void * spellcheck_create(const char * lang);
18
+ void spellcheck_destroy(void *);
19
+ int spellcheck_test(void *, const char * word);
20
+ int spellcheck_suggest(void * chk, char ***sug, const char * word);
21
+ void spellcheck_free_suggest(char **sug, int size);
22
+
23
+ #else
24
+
25
+ #include "utilities.h" /* For MSVC inline portability */
26
+
27
+ static inline void * spellcheck_create(const char * lang) { return NULL; }
28
+ static inline void spellcheck_destroy(void * chk) {}
29
+ static inline int spellcheck_test(void * chk, const char * word) { return 0; }
30
+ static inline int spellcheck_suggest(void * chk, char ***sug, const char * word) { return 0; }
31
+ static inline void spellcheck_free_suggest(char **sug, int size) {}
32
+
33
+ #endif
34
+ #endif //endif SPELLCHECK_H
@@ -0,0 +1,169 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #include "api.h"
15
+
16
+ /**
17
+ * Suppose you have a program that generates strings and keeps pointers to them.
18
+ The program never needs to change these strings once they're generated.
19
+ If it generates the same string again, then it can reuse the one it
20
+ generated before. This is what this package supports.
21
+
22
+ String_set is the object. The functions are:
23
+
24
+ char * string_set_add(char * source_string, String_set * ss);
25
+ This function returns a pointer to a string with the same
26
+ contents as the source_string. If that string is already
27
+ in the table, then it uses that copy, otherwise it generates
28
+ and inserts a new one.
29
+
30
+ char * string_set_lookup(char * source_string, String_set * ss);
31
+ This function returns a pointer to a string with the same
32
+ contents as the source_string. If that string is not already
33
+ in the table, returns NULL;
34
+
35
+ String_set * string_set_create(void);
36
+ Create a new empty String_set.
37
+
38
+ string_set_delete(String_set *ss);
39
+ Free all the space associated with this string set.
40
+
41
+ The implementation uses probed hashing (i.e. not bucket).
42
+ */
43
+
44
+ static int hash_string(const char *sa, const String_set *ss)
45
+ {
46
+ unsigned char *str = (unsigned char *) sa;
47
+ unsigned int accum = 0;
48
+ for (;*str != '\0'; str++) accum = ((256*accum) + (*str)) % (ss->size);
49
+ return accum;
50
+ }
51
+
52
+ static int stride_hash_string(const char *sa, const String_set *ss)
53
+ {
54
+ unsigned char *str = (unsigned char *) sa;
55
+ /* This is the stride used, so we have to make sure that its value is not 0 */
56
+ unsigned int accum = 0;
57
+ for (;*str != '\0'; str++) accum = ((17*accum) + (*str)) % (ss->size);
58
+ if (accum == 0) accum = 1;
59
+ return accum;
60
+ }
61
+
62
+ /* return the next prime up from start */
63
+ static int next_prime_up(int start)
64
+ {
65
+ int i;
66
+ start = start | 1; /* make it odd */
67
+ for (;;) {
68
+ for (i=3; (i <= (start/i)); i += 2) {
69
+ if (start % i == 0) break;
70
+ }
71
+ if (start % i == 0) {
72
+ start += 2;
73
+ } else {
74
+ return start;
75
+ }
76
+ }
77
+ }
78
+
79
+ String_set * string_set_create(void)
80
+ {
81
+ String_set *ss;
82
+ int i;
83
+ ss = (String_set *) xalloc(sizeof(String_set));
84
+ ss->size = next_prime_up(100);
85
+ ss->table = (char **) xalloc(ss->size * sizeof(char *));
86
+ ss->count = 0;
87
+ for (i=0; i<ss->size; i++) ss->table[i] = NULL;
88
+ return ss;
89
+ }
90
+
91
+ /**
92
+ * lookup the given string in the table. Return a pointer
93
+ * to the place it is, or the place where it should be.
94
+ */
95
+ static int find_place(const char * str, String_set *ss)
96
+ {
97
+ int h, s, i;
98
+ h = hash_string(str, ss);
99
+ s = stride_hash_string(str, ss);
100
+ for (i=h; 1; i = (i + s)%(ss->size)) {
101
+ if ((ss->table[i] == NULL) || (strcmp(ss->table[i], str) == 0)) return i;
102
+ }
103
+ }
104
+
105
+ static void grow_table(String_set *ss)
106
+ {
107
+ String_set old;
108
+ int i, p;
109
+
110
+ old = *ss;
111
+ ss->size = next_prime_up(2 * old.size); /* at least double the size */
112
+ ss->table = (char **) xalloc(ss->size * sizeof(char *));
113
+ ss->count = 0;
114
+ for (i=0; i<ss->size; i++) ss->table[i] = NULL;
115
+ for (i=0; i<old.size; i++) {
116
+ if (old.table[i] != NULL) {
117
+ p = find_place(old.table[i], ss);
118
+ ss->table[p] = old.table[i];
119
+ ss->count++;
120
+ }
121
+ }
122
+ /*printf("growing from %d to %d\n", old.size, ss->size);*/
123
+ fflush(stdout);
124
+ xfree((char *) old.table, old.size * sizeof(char *));
125
+ }
126
+
127
+ const char * string_set_add(const char * source_string, String_set * ss)
128
+ {
129
+ char * str;
130
+ int len, p;
131
+
132
+ assert(source_string != NULL, "STRING_SET: Can't insert a null string");
133
+
134
+ p = find_place(source_string, ss);
135
+ if (ss->table[p] != NULL) return ss->table[p];
136
+
137
+ len = strlen(source_string);
138
+ str = (char *) xalloc(len+1);
139
+ strcpy(str, source_string);
140
+ ss->table[p] = str;
141
+ ss->count++;
142
+
143
+ /* We just added it to the table.
144
+ If the table got too big, we grow it.
145
+ Too big is defined as being more than 3/4 full */
146
+ if ((4 * ss->count) > (3 * ss->size)) grow_table(ss);
147
+
148
+ return str;
149
+ }
150
+
151
+ const char * string_set_lookup(const char * source_string, String_set * ss)
152
+ {
153
+ int p;
154
+
155
+ p = find_place(source_string, ss);
156
+ return ss->table[p];
157
+ }
158
+
159
+ void string_set_delete(String_set *ss)
160
+ {
161
+ int i;
162
+
163
+ if (ss == NULL) return;
164
+ for (i=0; i<ss->size; i++) {
165
+ if (ss->table[i] != NULL) xfree(ss->table[i], strlen(ss->table[i]) + 1);
166
+ }
167
+ xfree((char *) ss->table, ss->size * sizeof(char *));
168
+ xfree((char *) ss, sizeof(String_set));
169
+ }
@@ -0,0 +1,16 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ String_set * string_set_create(void);
14
+ const char * string_set_add(const char * source_string, String_set * ss);
15
+ const char * string_set_lookup(const char * source_string, String_set * ss);
16
+ void string_set_delete(String_set *ss);
@@ -0,0 +1,498 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #ifndef _STRUCTURES_H_
15
+ #define _STRUCTURES_H_
16
+
17
+ #include "api-types.h"
18
+ #include "utilities.h" /* Needed for inline defn in Windows */
19
+
20
+ /*
21
+ Global variable descriptions
22
+ -- Most of these global variables have been eliminated.
23
+ I've left this comment here for historical purposes --DS 4/98
24
+
25
+ N_words:
26
+ The number of words in the current sentence. Computed by
27
+ separate_sentence().
28
+
29
+ N_links:
30
+ The number of links in the current linkage. Computed by
31
+ extract_linkage().
32
+
33
+ sentence[].string:
34
+ Contains a slightly modified form of the words typed by the user.
35
+ Computed by separate_sentence().
36
+
37
+ sentence[].x:
38
+ Contains, for each word, a pointer to a list of expressions from the
39
+ dictionary that match the word in sentence[].string.
40
+ Computed by build_sentence_expressions().
41
+
42
+ sentence[].d
43
+ Contains for each word, a pointer to a list of disjuncts for this word.
44
+ Computed by: parepare_to_parse(), but modified by pruning and power
45
+ pruning.
46
+
47
+ link_array[]
48
+ This is an array of links. These links define the current linkage.
49
+ It is computed by extract_links(). It is used by analyze_linkage() to
50
+ compute pp_linkage[]. It may contain fat links.
51
+
52
+ pp_link_array[] ** eliminated (ALB)
53
+ Another array of links. Here all fat links have been expunged.
54
+ It is computed by analyze_linkage(), and used by post_process() and by
55
+ print_links();
56
+
57
+ chosen_disjuncts[]
58
+ This is an array pointers to disjuncts, one for each word, that is
59
+ computed by extract_links(). It represents the chosen disjuncts for the
60
+ current linkage. It is used to compute the cost of the linkage, and
61
+ also by compute_chosen_words() to compute the chosen_words[].
62
+
63
+ chosen_words[]
64
+ An array of pointers to strings. These are the words to be displayed
65
+ when printing the solution, the links, etc. Computed as a function of
66
+ chosen_disjuncts[] by compute_chosen_words(). This differs from
67
+ sentence[].string because it contains the suffixes. It differs from
68
+ chosen_disjunct[].string in that the idiom symbols have been removed.
69
+
70
+ has_fat_down[]
71
+ An array of chars, one for each word. TRUE if there is a fat link
72
+ down from this word, FALSE otherwise. (Only set if there is at least
73
+ one fat link.) Set by set_has_fat_down_array() and used by
74
+ analyze_linkage() and is_canonical().
75
+
76
+ is_conjunction[]
77
+ An array of chars, one for each word. TRUE if the word is a conjunction
78
+ ("and", "or", "nor", or "but" at the moment). False otherwise.
79
+ */
80
+
81
+
82
+ #define NEGATIVECOST -1000000
83
+ /* This is a hack that allows one to discard disjuncts containing
84
+ connectors whose cost is greater than given a bound. This number plus
85
+ the cost of any connectors on a disjunct must remain negative, and
86
+ this number multiplied times the number of costly connectors on any
87
+ disjunct must fit into an integer. */
88
+
89
+ /* Upper bound on the cost of any connector. */
90
+ #define MAX_CONNECTOR_COST 1000.0f
91
+
92
+ #define LEFT_WALL_DISPLAY ("LEFT-WALL") /* the string to use to show the wall */
93
+ #define LEFT_WALL_SUPPRESS ("Wd") /* If this connector is used on the wall, */
94
+ /* then suppress the display of the wall */
95
+ /* bogus name to prevent ever suppressing */
96
+ #define RIGHT_WALL_DISPLAY ("RIGHT-WALL") /* the string to use to show the wall */
97
+ #define RIGHT_WALL_SUPPRESS ("RW") /* If this connector is used on the wall, */
98
+
99
+ /* The following define the names of the special strings in the dictionary. */
100
+ #define LEFT_WALL_WORD ("LEFT-WALL")
101
+ #define RIGHT_WALL_WORD ("RIGHT-WALL")
102
+ #define POSTPROCESS_WORD ("POSTPROCESS")
103
+ #define ANDABLE_CONNECTORS_WORD ("ANDABLE-CONNECTORS")
104
+ #define UNLIMITED_CONNECTORS_WORD ("UNLIMITED-CONNECTORS")
105
+
106
+ #if DONT_USE_REGEX_GUESSING
107
+ /* English-language-specific morphology guessing */
108
+ /* Obsolete, replaced by regex-based morphology handler */
109
+ #define PROPER_WORD ("CAPITALIZED-WORDS")
110
+ #define PL_PROPER_WORD ("PL-CAPITALIZED-WORDS")
111
+ #define HYPHENATED_WORD ("HYPHENATED-WORDS")
112
+ #define NUMBER_WORD ("NUMBERS")
113
+ #define ING_WORD ("ING-WORDS")
114
+ #define S_WORD ("S-WORDS")
115
+ #define ED_WORD ("ED-WORDS")
116
+ #define LY_WORD ("LY-WORDS")
117
+ #endif /* DONT_USE_REGEX_GUESSING */
118
+
119
+ #define UNKNOWN_WORD ("UNKNOWN-WORD")
120
+
121
+ #define MAX_PATH_NAME 200 /* file names (including paths)
122
+ should not be longer than this */
123
+
124
+ /* Some size definitions. Reduce these for small machines */
125
+ #define MAX_WORD 60 /* maximum number of chars in a word */
126
+ #define MAX_LINE 1500 /* maximum number of chars in a sentence */
127
+ #define MAX_DISJUNCT_COST 10000
128
+
129
+ /* conditional compiling flags */
130
+ #define PLURALIZATION
131
+ /* If defined, Turns on the pluralization operation in */
132
+ /* "and", "or" and "nor" */
133
+ #define INFIX_NOTATION
134
+ /* If defined, then we're using infix notation for the dictionary */
135
+ /* otherwise we're using prefix notation */
136
+
137
+ #define DOWN_priority 2
138
+ #define UP_priority 1
139
+ #define THIN_priority 0
140
+
141
+ #define NORMAL_LABEL (-1) /* used for normal connectors */
142
+ /* the labels >= 0 are used by fat links */
143
+
144
+ #define UNLIMITED_LEN 255
145
+ #define SHORT_LEN 6
146
+ #define NO_WORD 255
147
+
148
+ #ifndef _MSC_VER
149
+ typedef long long s64; /* signed 64-bit integer, even on 32-bit cpus */
150
+ #define PARSE_NUM_OVERFLOW (1LL<<24)
151
+ #else
152
+ /* Microsoft Visual C Version 6 doesn't support long long. */
153
+ typedef signed __int64 s64; /* signed 64-bit integer, even on 32-bit cpus */
154
+ #define PARSE_NUM_OVERFLOW (((s64)1)<<24)
155
+ #endif
156
+
157
+ struct Connector_struct
158
+ {
159
+ short label;
160
+ short hash;
161
+ unsigned char word;
162
+ /* The nearest word to my left (or right) that
163
+ this could connect to. Computed by power pruning */
164
+ unsigned char length_limit;
165
+ /* If this is a length limited connector, this
166
+ gives the limit of the length of the link
167
+ that can be used on this connector. Since
168
+ this is strictly a funcion of the connector
169
+ name, efficiency is the only reason to store
170
+ this. If no limit, the value is set to 255. */
171
+ char priority;/* one of the three priorities above */
172
+ char multi; /* TRUE if this is a multi-connector */
173
+ Connector * next;
174
+ const char * string;
175
+
176
+ /* Hash table next pointer, used only during pruning. */
177
+ Connector * tableNext;
178
+ const char * prune_string;
179
+ };
180
+
181
+ static inline void connector_set_string(Connector *c, const char *s)
182
+ {
183
+ c->string = s;
184
+ c->hash = -1;
185
+ }
186
+ static inline const char * connector_get_string(Connector *c)
187
+ {
188
+ return c->string;
189
+ }
190
+
191
+ struct Disjunct_struct
192
+ {
193
+ Disjunct *next;
194
+ const char * string;
195
+ Connector *left, *right;
196
+ float cost;
197
+ char marked;
198
+ };
199
+
200
+ typedef struct Match_node_struct Match_node;
201
+ struct Match_node_struct
202
+ {
203
+ Match_node * next;
204
+ Disjunct * d;
205
+ };
206
+
207
+ typedef struct X_node_struct X_node;
208
+ struct X_node_struct
209
+ {
210
+ const char * string; /* the word itself */
211
+ Exp * exp;
212
+ X_node *next;
213
+ };
214
+
215
+ struct Word_struct
216
+ {
217
+ char string[MAX_WORD+1];
218
+ X_node * x; /* sentence starts out with these */
219
+ Disjunct * d; /* eventually these get generated */
220
+ int firstupper;
221
+ };
222
+
223
+ /**
224
+ * Types of Exp_struct structures
225
+ */
226
+ #define OR_type 0
227
+ #define AND_type 1
228
+ #define CONNECTOR_type 2
229
+
230
+ /**
231
+ * The E_list and Exp structures defined below comprise the expression
232
+ * trees that are stored in the dictionary. The expression has a type
233
+ * (AND, OR or TERMINAL). If it is not a terminal it has a list
234
+ * (an E_list) of children.
235
+ */
236
+ struct Exp_struct
237
+ {
238
+ Exp * next; /* Used only for mem management,for freeing */
239
+ char type; /* One of three types, see above */
240
+ char dir; /* '-' means to the left, '+' means to right (for connector) */
241
+ char multi; /* TRUE if a multi-connector (for connector) */
242
+ union {
243
+ E_list * l; /* only needed for non-terminals */
244
+ const char * string; /* only needed if it's a connector */
245
+ } u;
246
+ float cost; /* The cost of using this expression.
247
+ Only used for non-terminals */
248
+ };
249
+
250
+ struct E_list_struct
251
+ {
252
+ E_list * next;
253
+ Exp * e;
254
+ };
255
+
256
+ /* The structure below stores a list of dictionary word files. */
257
+ struct Word_file_struct
258
+ {
259
+ char file[MAX_PATH_NAME+1]; /* the file name */
260
+ int changed; /* TRUE if this file has been changed */
261
+ Word_file * next;
262
+ };
263
+
264
+ /**
265
+ * The dictionary is stored as a binary tree comprised of the following
266
+ * nodes. A list of these (via right pointers) is used to return
267
+ * the result of a dictionary lookup.
268
+ */
269
+ struct Dict_node_struct
270
+ {
271
+ const char * string; /* the word itself */
272
+ Word_file * file; /* the file the word came from (NULL if dict file) */
273
+ Exp * exp;
274
+ Dict_node *left, *right;
275
+ };
276
+
277
+ /* The regexs are stored as a linked list of the following nodes. */
278
+ struct Regex_node_s
279
+ {
280
+ char *name; /* The identifying name of the regex */
281
+ char *pattern; /* The regular expression pattern */
282
+ void *re; /* The compiled regex. void * to avoid
283
+ * having re library details invading the
284
+ * rest of the LG system; regex-morph.c
285
+ * takes care of all matching.
286
+ */
287
+ Regex_node *next;
288
+ };
289
+
290
+
291
+ /* The following three structs comprise what is returned by post_process(). */
292
+ typedef struct D_type_list_struct D_type_list;
293
+ struct D_type_list_struct
294
+ {
295
+ D_type_list * next;
296
+ int type;
297
+ };
298
+
299
+ typedef struct PP_node_struct PP_node;
300
+ struct PP_node_struct
301
+ {
302
+ D_type_list *d_type_array[MAX_LINKS];
303
+ const char *violation;
304
+ };
305
+
306
+ /* Davy added these */
307
+
308
+ typedef struct Andlist_struct Andlist;
309
+ struct Andlist_struct
310
+ {
311
+ Andlist * next;
312
+ int conjunction;
313
+ int num_elements;
314
+ int element[MAX_SENTENCE];
315
+ int num_outside_words;
316
+ int outside_word[MAX_SENTENCE];
317
+ int cost;
318
+ };
319
+
320
+ /**
321
+ * This is for building the graphs of links in post-processing and
322
+ * fat link extraction.
323
+ */
324
+ struct Linkage_info_struct
325
+ {
326
+ int index;
327
+ Boolean fat;
328
+ Boolean canonical;
329
+ Boolean improper_fat_linkage;
330
+ Boolean inconsistent_domains;
331
+ short N_violations;
332
+ short null_cost, unused_word_cost, and_cost, link_cost;
333
+ float disjunct_cost;
334
+ double corpus_cost;
335
+ Andlist * andlist;
336
+ int island[MAX_SENTENCE];
337
+ size_t nwords;
338
+ char **disjunct_list_str;
339
+ #ifdef USE_CORPUS
340
+ Sense **sense_list;
341
+ #endif
342
+ };
343
+
344
+ struct List_o_links_struct
345
+ {
346
+ int link; /* the link number */
347
+ int word; /* the word at the other end of this link */
348
+ int dir; /* 0: undirected, 1: away from me, -1: toward me */
349
+ List_o_links * next;
350
+ };
351
+
352
+ /* These parameters tell power_pruning, to tell whether this is before
353
+ * or after generating and disjuncts. GENTLE is before RUTHLESS is
354
+ * after.
355
+ */
356
+ #define GENTLE 1
357
+ #define RUTHLESS 0
358
+
359
+ typedef struct string_node_struct String_node;
360
+ struct string_node_struct
361
+ {
362
+ char * string;
363
+ int size;
364
+ String_node * next;
365
+ };
366
+
367
+ typedef struct Parse_choice_struct Parse_choice;
368
+
369
+ struct Link_s
370
+ {
371
+ int l, r;
372
+ Connector * lc, * rc;
373
+ const char * name; /* spelling of full link name */
374
+ };
375
+
376
+ struct Parse_choice_struct
377
+ {
378
+ Parse_choice * next;
379
+ Parse_set * set[2];
380
+ Link link[2]; /* the lc fields of these is NULL if there is no link used */
381
+ Disjunct *ld, *md, *rd; /* the chosen disjuncts for the relevant three words */
382
+ };
383
+
384
+ struct Parse_set_struct
385
+ {
386
+ s64 count; /* the number of ways */
387
+ Parse_choice * first;
388
+ Parse_choice * current; /* used to enumerate linkages */
389
+ };
390
+
391
+ struct X_table_connector_struct
392
+ {
393
+ short lw, rw;
394
+ short cost;
395
+ Parse_set *set;
396
+ Connector *le, *re;
397
+ X_table_connector *next;
398
+ };
399
+
400
+ /* from string-set.c */
401
+ struct String_set_s
402
+ {
403
+ int size; /* the current size of the table */
404
+ int count; /* number of things currently in the table */
405
+ char ** table; /* the table itself */
406
+ };
407
+
408
+
409
+ /* from pp_linkset.c */
410
+ typedef struct pp_linkset_node_s
411
+ {
412
+ const char *str;
413
+ struct pp_linkset_node_s *next;
414
+ } pp_linkset_node;
415
+
416
+ typedef struct pp_linkset_s
417
+ {
418
+ int hash_table_size;
419
+ int population;
420
+ pp_linkset_node **hash_table; /* data actually lives here */
421
+ } pp_linkset;
422
+
423
+
424
+ /* from pp_lexer.c */
425
+ #define PP_LEXER_MAX_LABELS 512
426
+
427
+ typedef struct pp_label_node_s
428
+ {
429
+ /* linked list of strings associated with a label in the table */
430
+ const char *str;
431
+ struct pp_label_node_s *next;
432
+ } pp_label_node; /* next=NULL: end of list */
433
+
434
+
435
+ typedef struct PPLexTable_s
436
+ {
437
+ String_set *string_set;
438
+ const char *labels[PP_LEXER_MAX_LABELS]; /* array of labels */
439
+ pp_label_node *nodes_of_label[PP_LEXER_MAX_LABELS]; /*str. for each label*/
440
+ pp_label_node *last_node_of_label[PP_LEXER_MAX_LABELS]; /* efficiency */
441
+ pp_label_node *current_node_of_active_label;/* state: curr node of label */
442
+ int idx_of_active_label; /* read state: current label */
443
+ } PPLexTable;
444
+
445
+ /* from pp_knowledge.c */
446
+ typedef struct StartingLinkAndDomain_s
447
+ {
448
+ const char *starting_link;
449
+ int domain; /* domain which the link belongs to (-1: terminator)*/
450
+ } StartingLinkAndDomain;
451
+
452
+ typedef struct pp_rule_s
453
+ {
454
+ /* Holds a single post-processing rule. Since rules come in many
455
+ flavors, not all fields of the following are always relevant */
456
+ const char *selector; /* name of link to which rule applies */
457
+ int domain; /* type of domain to which rule applies */
458
+ pp_linkset *link_set; /* handle to set of links relevant to rule */
459
+ int link_set_size; /* size of this set */
460
+ const char **link_array; /* array holding the spelled-out names */
461
+ const char *msg; /* explanation (NULL=end sentinel in array)*/
462
+ } pp_rule;
463
+
464
+ struct pp_knowledge_s
465
+ {
466
+ PPLexTable *lt; /* Internal rep'n of sets of strings from knowledge file */
467
+ const char *path; /* Name of file we loaded from */
468
+
469
+ /* handles to sets of links specified in knowledge file. These constitute
470
+ auxiliary data, necessary to implement the rules, below. See comments
471
+ in post-process.c for a description of these. */
472
+ pp_linkset *domain_starter_links;
473
+ pp_linkset *urfl_domain_starter_links;
474
+ pp_linkset *urfl_only_domain_starter_links;
475
+ pp_linkset *domain_contains_links;
476
+ pp_linkset *must_form_a_cycle_links;
477
+ pp_linkset *restricted_links;
478
+ pp_linkset *ignore_these_links;
479
+ pp_linkset *left_domain_starter_links;
480
+
481
+ /* arrays of rules specified in knowledge file */
482
+ pp_rule *connected_rules, *form_a_cycle_rules;
483
+ pp_rule *contains_one_rules, *contains_none_rules;
484
+ pp_rule *bounded_rules;
485
+
486
+ int n_connected_rules, n_form_a_cycle_rules;
487
+ int n_contains_one_rules, n_contains_none_rules;
488
+ int n_bounded_rules;
489
+
490
+ pp_linkset *set_of_links_starting_bounded_domain;
491
+ StartingLinkAndDomain *starting_link_lookup_table;
492
+ int nStartingLinks;
493
+ String_set *string_set;
494
+ };
495
+
496
+
497
+ #endif
498
+