grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,27 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ link_private void free_AND_tables(Sentence sent);
14
+ link_private void print_AND_statistics(Sentence sent);
15
+ link_private void init_andable_hash_table(Dictionary dict);
16
+ link_private void free_andable_hash_table(Dictionary dict);
17
+ link_private void initialize_conjunction_tables(Sentence sent);
18
+ link_private int is_canonical_linkage(Sentence sent);
19
+ link_private Disjunct * build_AND_disjunct_list(Sentence sent, char *);
20
+ link_private Disjunct * build_COMMA_disjunct_list(Sentence sent);
21
+ link_private Disjunct * explode_disjunct_list(Sentence sent, Disjunct *);
22
+ link_private void build_conjunction_tables(Sentence);
23
+ link_private void compute_pp_link_array_connectors(Sentence sent, Sublinkage *sublinkage);
24
+
25
+ /* Following need to be visible to sat solver, can't be private */
26
+ int set_has_fat_down(Sentence sent);
27
+ const char * intersect_strings(Sentence sent, const char * s, const char * t);
@@ -0,0 +1,362 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ /*****************************************************************************
15
+ *
16
+ * NOTE: There are five basic "types" in the link parser API. These are:
17
+ *
18
+ * Dictionary, Parse_Options, Sentence, Linkage, PostProcessor
19
+ *
20
+ * To make the use of the API simpler, each of these is typedef'ed as a pointer
21
+ * to a data structure. As a result, some of the code may look a little funny,
22
+ * since it uses pointers in a way that is syntactically inconsistent. After
23
+ * working a bit with these basic types enough, this should not be confusing.
24
+ *
25
+ ******************************************************************************/
26
+
27
+ #ifndef _API_STRUCTURESH_
28
+ #define _API_STRUCTURESH_
29
+
30
+ #include <wchar.h>
31
+ #include "api-types.h"
32
+ #include "structures.h" /* for definition of Link */
33
+ #include "corpus/corpus.h"
34
+ #include "error.h"
35
+
36
+ typedef enum
37
+ {
38
+ VDAL=1, /* Sort by Violations, Disjunct cost, And cost, Link cost */
39
+ CORPUS, /* Sort by Corpus cost */
40
+ } Cost_Model_type;
41
+
42
+ struct Cost_Model_s
43
+ {
44
+ Cost_Model_type type;
45
+ int (*compare_fn)(Linkage_info *, Linkage_info *);
46
+ };
47
+
48
+ struct Resources_s
49
+ {
50
+ int max_parse_time; /* in seconds */
51
+ size_t max_memory; /* in bytes */
52
+ double time_when_parse_started;
53
+ size_t space_when_parse_started;
54
+ double when_created;
55
+ double when_last_called;
56
+ double cumulative_time;
57
+ int memory_exhausted;
58
+ int timer_expired;
59
+ };
60
+
61
+ struct Parse_Options_s
62
+ {
63
+ int verbosity; /* Level of detail to give about the computation 0 */
64
+ int use_sat_solver; /* Use the Boolean SAT based parser */
65
+ int linkage_limit; /* The maximum number of linkages processed 100 */
66
+ float disjunct_cost; /* Max disjunct cost to allow */
67
+ int use_fat_links; /* Look for fat linkages */
68
+ int min_null_count; /* The minimum number of null links to allow */
69
+ int max_null_count; /* The maximum number of null links to allow */
70
+ int null_block; /* consecutive blocks of this many words are
71
+ considered as one null link (default=1) */
72
+ int islands_ok; /* If TRUE, then linkages with islands
73
+ (separate component of the link graph)
74
+ will be generated (default=FALSE) */
75
+ int twopass_length; /* min length for two-pass post processing */
76
+ int max_sentence_length;
77
+ int short_length; /* Links that are limited in length can be
78
+ * no longer than this. Default = 6 */
79
+ int all_short; /* If true, there can be no connectors that are exempt */
80
+ int use_spell_guess; /* Perform spell-guessing of unknown words. */
81
+ Cost_Model cost_model; /* For sorting linkages in post_processing */
82
+ Resources resources; /* For deciding when to abort the parsing */
83
+
84
+ /* Flags governing the command-line client; not used by parser */
85
+ int display_short;
86
+ int display_word_subscripts; /* as in "dog.n" as opposed to "dog" */
87
+ int display_link_subscripts; /* as in "Ss" as opposed to "S" */
88
+ int display_walls;
89
+ int display_union; /* print squashed version of linkage with conjunction? */
90
+ int allow_null; /* true if we allow null links in parsing */
91
+ int use_cluster_disjuncts; /* if true, atttempt using a borader list of disjuncts */
92
+ int echo_on; /* true if we should echo the input sentence */
93
+ int batch_mode; /* if true, process sentences non-interactively */
94
+ int panic_mode; /* if true, parse in "panic mode" after all else fails */
95
+ int screen_width; /* width of screen for displaying linkages */
96
+ int display_on; /* if true, output graphical linkage diagram */
97
+ int display_postscript; /* if true, output postscript linkage */
98
+ int display_constituents; /* if true, output treebank-style constituent structure */
99
+ int display_bad; /* if true, bad linkages are displayed */
100
+ int display_disjuncts; /* if true, print disjuncts that were used */
101
+ int display_links; /* if true, a list o' links is printed out */
102
+ int display_senses; /* if true, sense candidates are printed out */
103
+ };
104
+
105
+ struct Connector_set_s
106
+ {
107
+ Connector ** hash_table;
108
+ int table_size;
109
+ int is_defined; /* if 0 then there is no such set */
110
+ };
111
+
112
+ struct Dictionary_s
113
+ {
114
+ Dict_node * root;
115
+ Regex_node * regex_root;
116
+ const char * name;
117
+ const char * lang;
118
+
119
+ int use_unknown_word;
120
+ int unknown_word_defined;
121
+
122
+ /* If not null, then use spelling guesser for unknown words */
123
+ void * spell_checker; /* spell checker handle */
124
+ #if USE_CORPUS
125
+ Corpus * corpus; /* Statistics database */
126
+ #endif
127
+
128
+ #if DONT_USE_REGEX_GUESSING
129
+ /* English language morphology bits
130
+ * replaced by regex-based morpho guesser
131
+ * Dead code, remove at leisure.
132
+ */
133
+ int capitalized_word_defined;
134
+ int pl_capitalized_word_defined;
135
+ int hyphenated_word_defined;
136
+ int number_word_defined;
137
+ int ing_word_defined;
138
+ int s_word_defined;
139
+ int ed_word_defined;
140
+ int ly_word_defined;
141
+ #endif /* DONT_USE_REGEX_GUESSING */
142
+
143
+ int left_wall_defined;
144
+ int right_wall_defined;
145
+
146
+ /* Affixes are used during the tokenization stage. */
147
+ Dictionary affix_table;
148
+ int r_strippable; /* right */
149
+ int l_strippable; /* left */
150
+ int u_strippable; /* units on left */
151
+ int s_strippable; /* generic suffix */
152
+ int p_strippable; /* generic prefix */
153
+ const char ** strip_left;
154
+ const char ** strip_right;
155
+ const char ** strip_units;
156
+ const char ** prefix;
157
+ const char ** suffix;
158
+
159
+ Postprocessor * postprocessor;
160
+ Postprocessor * constituent_pp;
161
+ int andable_defined;
162
+ Connector_set * andable_connector_set; /* NULL=everything is andable */
163
+ Connector_set * unlimited_connector_set; /* NULL=everthing is unlimited */
164
+ int max_cost;
165
+ String_set * string_set; /* Set of link names constructed during parsing */
166
+ int num_entries;
167
+ Word_file * word_file_header;
168
+
169
+ /* exp_list links together all the Exp structs that are allocated
170
+ * in reading this dictionary. Needed for freeing the dictionary
171
+ */
172
+ Exp * exp_list;
173
+
174
+ /* Private data elements that come in play only while the
175
+ * dictionary is being read, and are not otherwise used.
176
+ */
177
+ FILE * fp;
178
+ char token[MAX_TOKEN_LENGTH];
179
+ int is_special; /* boolean */
180
+ wint_t already_got_it;
181
+ int line_number;
182
+ int recursive_error; /* boolean */
183
+ mbstate_t mbss; /* multi-byte shift state */
184
+ };
185
+
186
+ struct Label_node_s
187
+ {
188
+ int label;
189
+ Label_node * next;
190
+ };
191
+
192
+ #define HT_SIZE (1<<10)
193
+
194
+ struct And_data_s
195
+ {
196
+ int LT_bound;
197
+ int LT_size;
198
+ Disjunct ** label_table;
199
+ Label_node * hash_table[HT_SIZE];
200
+
201
+ /* keeping statistics */
202
+ int STAT_N_disjuncts;
203
+ int STAT_calls_to_equality_test;
204
+ };
205
+
206
+ struct Parse_info_struct
207
+ {
208
+ int x_table_size;
209
+ int log2_x_table_size;
210
+ X_table_connector ** x_table;
211
+ Parse_set * parse_set;
212
+ int N_words;
213
+ Disjunct ** chosen_disjuncts;
214
+ int N_links;
215
+ Link link_array[MAX_LINKS];
216
+
217
+ /* Points to the image structure for each word.
218
+ * NULL if not a fat word. */
219
+ Image_node ** image_array;
220
+
221
+ /* Array of boolean flags, one per word. Set to TRUE if this
222
+ * word has a fat down link. FALSE otherise */
223
+ Boolean *has_fat_down;
224
+
225
+ /* thread-safe random number state */
226
+ unsigned int rand_state;
227
+ };
228
+
229
+ struct Sentence_s
230
+ {
231
+ Dictionary dict; /* words are defined from this dictionary */
232
+ const char *orig_sentence; /* Copy of original sentence */
233
+ int length; /* number of words */
234
+ Word word[MAX_SENTENCE]; /* array of words after tokenization */
235
+ char * is_conjunction; /* Array of flags, one per word; set to
236
+ TRUE if conjunction, as defined by dictionary */
237
+ char** deletable; /* deletable regions in a sentence with conjunction */
238
+ char** dptr; /* private pointer for mem management only */
239
+ char** effective_dist;
240
+ int num_linkages_found; /* total number before postprocessing. This
241
+ is returned by the count() function */
242
+ int num_linkages_alloced;/* total number of linkages allocated.
243
+ the number post-processed might be fewer
244
+ because some are non-canonical */
245
+ int num_linkages_post_processed;
246
+ /* The number of linkages that are actually
247
+ put into the array that was alloced.
248
+ This is not the same as num alloced
249
+ because some may be non-canonical. */
250
+ int num_valid_linkages; /* number with no pp violations */
251
+ int num_thin_linkages; /* valid linkages which are not fat */
252
+ int null_links; /* null links allowed */
253
+ int null_count; /* number of null links in linkages */
254
+ Parse_info parse_info; /* set of parses for the sentence */
255
+ Linkage_info * link_info; /* array of valid and invalid linkages (sorted) */
256
+ String_set * string_set; /* used for word names, not connectors */
257
+ And_data and_data; /* used to keep track of fat disjuncts */
258
+ char q_pruned_rules; /* don't prune rules more than once in p.p. */
259
+ int post_quote[MAX_SENTENCE]; /* Used only by tokenizer. */
260
+
261
+ analyze_context_t * analyze_ctxt; /* private state used for analyzing */
262
+ count_context_t * count_ctxt; /* private state info used for counting */
263
+ match_context_t * match_ctxt; /* private state info used for matching */
264
+ /* thread-safe random number state */
265
+ unsigned int rand_state;
266
+
267
+ /* Hook for the SAT solver */
268
+ void *hook;
269
+ };
270
+
271
+ /*********************************************************
272
+ *
273
+ * Post processing
274
+ *
275
+ **********************************************************/
276
+
277
+ struct Domain_s
278
+ {
279
+ const char * string;
280
+ int size;
281
+ List_o_links * lol;
282
+ int start_link; /* the link that started this domain */
283
+ int type; /* one letter name */
284
+ DTreeLeaf * child;
285
+ Domain * parent;
286
+ };
287
+
288
+
289
+ struct DTreeLeaf_s
290
+ {
291
+ Domain * parent;
292
+ int link;
293
+ DTreeLeaf * next;
294
+ };
295
+
296
+ struct PP_data_s
297
+ {
298
+ int N_domains;
299
+ List_o_links * word_links[MAX_SENTENCE];
300
+ List_o_links * links_to_ignore;
301
+ Domain domain_array[MAX_LINKS]; /* the domains, sorted by size */
302
+ int length; /* length of current sentence */
303
+ };
304
+
305
+ struct PP_info_s
306
+ {
307
+ int num_domains;
308
+ const char ** domain_name;
309
+ };
310
+
311
+ struct Postprocessor_s
312
+ {
313
+ pp_knowledge *knowledge; /* internal rep'n of the actual rules */
314
+ int n_global_rules_firing; /* this & the next are diagnostic */
315
+ int n_local_rules_firing;
316
+ pp_linkset *set_of_links_of_sentence; /* seen in *any* linkage of sent */
317
+ pp_linkset *set_of_links_in_an_active_rule;/*used in *some* linkage of sent*/
318
+ int *relevant_contains_one_rules; /* -1-terminated list of indices */
319
+ int *relevant_contains_none_rules;
320
+ /* the following maintain state during a call to post_process() */
321
+ String_set *sentence_link_name_set; /* link names seen for sentence */
322
+ int visited[MAX_SENTENCE]; /* for the depth-first search */
323
+ PP_node *pp_node;
324
+ PP_data pp_data;
325
+ };
326
+
327
+
328
+ /*********************************************************
329
+ *
330
+ * Linkages
331
+ *
332
+ **********************************************************/
333
+
334
+ struct Sublinkage_s
335
+ {
336
+ int num_links; /* Number of links in array */
337
+ Link ** link; /* Array of links */
338
+ PP_info * pp_info; /* PP info for each link */
339
+ const char * violation; /* Name of violation, if any */
340
+ PP_data pp_data;
341
+ };
342
+
343
+ typedef struct DIS_node_struct DIS_node;
344
+
345
+ struct Linkage_s
346
+ {
347
+ int num_words; /* number of (tokenized) words */
348
+ const char * * word; /* array of word spellings */
349
+ Linkage_info* info; /* index and cost information */
350
+ int num_sublinkages; /* One for thin linkages, bigger for fat */
351
+ int current; /* Allows user to select particular sublinkage */
352
+ Sublinkage * sublinkage; /* A parse with conjunctions will have several */
353
+ int unionized; /* if TRUE, union of links has been computed */
354
+ Sentence sent;
355
+ Parse_Options opts;
356
+ DIS_node * dis_con_tree; /* Disjunction-conjunction tree */
357
+ };
358
+
359
+
360
+
361
+ #endif
362
+
@@ -0,0 +1,72 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #ifndef _API_TYPES_H_
15
+ #define _API_TYPES_H_
16
+
17
+ #define MAX_TOKEN_LENGTH 50 /* maximum number of chars in a token */
18
+
19
+ /* MAX_SENTENCE cannot be more than 254, because word MAX_SENTENCE+1 is
20
+ * used to indicate that nothing can connect to this connector, and this
21
+ * should fit in one byte (if the word field of a connector is an
22
+ * unsigned char).
23
+ */
24
+ #define MAX_SENTENCE 250 /* maximum number of words in a sentence */
25
+ #define MAX_LINKS (2*MAX_SENTENCE-3) /* maximum number of links allowed */
26
+
27
+
28
+ /* "public" typedefs */
29
+ typedef struct And_data_s And_data;
30
+ typedef struct Connector_struct Connector;
31
+ typedef struct Cost_Model_s Cost_Model;
32
+ typedef struct Dict_node_struct Dict_node;
33
+ typedef struct Domain_s Domain;
34
+ typedef struct DTreeLeaf_s DTreeLeaf;
35
+ typedef struct Image_node_struct Image_node;
36
+ typedef struct Label_node_s Label_node;
37
+ typedef struct Linkage_info_struct Linkage_info;
38
+ typedef struct Parse_info_struct *Parse_info;
39
+ typedef struct Postprocessor_s Postprocessor;
40
+ typedef struct PP_data_s PP_data;
41
+ typedef struct PP_info_s PP_info;
42
+ typedef struct Regex_node_s Regex_node;
43
+ typedef struct Resources_s * Resources;
44
+ typedef struct Sublinkage_s Sublinkage;
45
+
46
+ /* Some private typedefs */
47
+ typedef char Boolean;
48
+ typedef struct analyze_context_s analyze_context_t;
49
+ typedef struct count_context_s count_context_t;
50
+ typedef struct match_context_s match_context_t;
51
+
52
+ typedef struct Connector_set_s Connector_set;
53
+ typedef struct Disjunct_struct Disjunct;
54
+ typedef struct Exp_struct Exp;
55
+ typedef struct E_list_struct E_list;
56
+ typedef struct Link_s Link;
57
+ typedef struct List_o_links_struct List_o_links;
58
+ typedef struct Parse_set_struct Parse_set;
59
+ typedef struct String_set_s String_set;
60
+ typedef struct Word_struct Word;
61
+ typedef struct Word_file_struct Word_file;
62
+ typedef struct X_table_connector_struct X_table_connector;
63
+
64
+
65
+ typedef struct pp_knowledge_s pp_knowledge;
66
+
67
+ typedef struct corpus_s Corpus;
68
+ typedef struct sense_s Sense;
69
+ typedef struct cluster_s Cluster;
70
+
71
+ #endif
72
+