grammar_cop 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,27 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ link_private void free_AND_tables(Sentence sent);
14
+ link_private void print_AND_statistics(Sentence sent);
15
+ link_private void init_andable_hash_table(Dictionary dict);
16
+ link_private void free_andable_hash_table(Dictionary dict);
17
+ link_private void initialize_conjunction_tables(Sentence sent);
18
+ link_private int is_canonical_linkage(Sentence sent);
19
+ link_private Disjunct * build_AND_disjunct_list(Sentence sent, char *);
20
+ link_private Disjunct * build_COMMA_disjunct_list(Sentence sent);
21
+ link_private Disjunct * explode_disjunct_list(Sentence sent, Disjunct *);
22
+ link_private void build_conjunction_tables(Sentence);
23
+ link_private void compute_pp_link_array_connectors(Sentence sent, Sublinkage *sublinkage);
24
+
25
+ /* Following need to be visible to sat solver, can't be private */
26
+ int set_has_fat_down(Sentence sent);
27
+ const char * intersect_strings(Sentence sent, const char * s, const char * t);
@@ -0,0 +1,362 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ /*****************************************************************************
15
+ *
16
+ * NOTE: There are five basic "types" in the link parser API. These are:
17
+ *
18
+ * Dictionary, Parse_Options, Sentence, Linkage, PostProcessor
19
+ *
20
+ * To make the use of the API simpler, each of these is typedef'ed as a pointer
21
+ * to a data structure. As a result, some of the code may look a little funny,
22
+ * since it uses pointers in a way that is syntactically inconsistent. After
23
+ * working a bit with these basic types enough, this should not be confusing.
24
+ *
25
+ ******************************************************************************/
26
+
27
+ #ifndef _API_STRUCTURESH_
28
+ #define _API_STRUCTURESH_
29
+
30
+ #include <wchar.h>
31
+ #include "api-types.h"
32
+ #include "structures.h" /* for definition of Link */
33
+ #include "corpus/corpus.h"
34
+ #include "error.h"
35
+
36
+ typedef enum
37
+ {
38
+ VDAL=1, /* Sort by Violations, Disjunct cost, And cost, Link cost */
39
+ CORPUS, /* Sort by Corpus cost */
40
+ } Cost_Model_type;
41
+
42
+ struct Cost_Model_s
43
+ {
44
+ Cost_Model_type type;
45
+ int (*compare_fn)(Linkage_info *, Linkage_info *);
46
+ };
47
+
48
+ struct Resources_s
49
+ {
50
+ int max_parse_time; /* in seconds */
51
+ size_t max_memory; /* in bytes */
52
+ double time_when_parse_started;
53
+ size_t space_when_parse_started;
54
+ double when_created;
55
+ double when_last_called;
56
+ double cumulative_time;
57
+ int memory_exhausted;
58
+ int timer_expired;
59
+ };
60
+
61
+ struct Parse_Options_s
62
+ {
63
+ int verbosity; /* Level of detail to give about the computation 0 */
64
+ int use_sat_solver; /* Use the Boolean SAT based parser */
65
+ int linkage_limit; /* The maximum number of linkages processed 100 */
66
+ float disjunct_cost; /* Max disjunct cost to allow */
67
+ int use_fat_links; /* Look for fat linkages */
68
+ int min_null_count; /* The minimum number of null links to allow */
69
+ int max_null_count; /* The maximum number of null links to allow */
70
+ int null_block; /* consecutive blocks of this many words are
71
+ considered as one null link (default=1) */
72
+ int islands_ok; /* If TRUE, then linkages with islands
73
+ (separate component of the link graph)
74
+ will be generated (default=FALSE) */
75
+ int twopass_length; /* min length for two-pass post processing */
76
+ int max_sentence_length;
77
+ int short_length; /* Links that are limited in length can be
78
+ * no longer than this. Default = 6 */
79
+ int all_short; /* If true, there can be no connectors that are exempt */
80
+ int use_spell_guess; /* Perform spell-guessing of unknown words. */
81
+ Cost_Model cost_model; /* For sorting linkages in post_processing */
82
+ Resources resources; /* For deciding when to abort the parsing */
83
+
84
+ /* Flags governing the command-line client; not used by parser */
85
+ int display_short;
86
+ int display_word_subscripts; /* as in "dog.n" as opposed to "dog" */
87
+ int display_link_subscripts; /* as in "Ss" as opposed to "S" */
88
+ int display_walls;
89
+ int display_union; /* print squashed version of linkage with conjunction? */
90
+ int allow_null; /* true if we allow null links in parsing */
91
+ int use_cluster_disjuncts; /* if true, atttempt using a borader list of disjuncts */
92
+ int echo_on; /* true if we should echo the input sentence */
93
+ int batch_mode; /* if true, process sentences non-interactively */
94
+ int panic_mode; /* if true, parse in "panic mode" after all else fails */
95
+ int screen_width; /* width of screen for displaying linkages */
96
+ int display_on; /* if true, output graphical linkage diagram */
97
+ int display_postscript; /* if true, output postscript linkage */
98
+ int display_constituents; /* if true, output treebank-style constituent structure */
99
+ int display_bad; /* if true, bad linkages are displayed */
100
+ int display_disjuncts; /* if true, print disjuncts that were used */
101
+ int display_links; /* if true, a list o' links is printed out */
102
+ int display_senses; /* if true, sense candidates are printed out */
103
+ };
104
+
105
+ struct Connector_set_s
106
+ {
107
+ Connector ** hash_table;
108
+ int table_size;
109
+ int is_defined; /* if 0 then there is no such set */
110
+ };
111
+
112
+ struct Dictionary_s
113
+ {
114
+ Dict_node * root;
115
+ Regex_node * regex_root;
116
+ const char * name;
117
+ const char * lang;
118
+
119
+ int use_unknown_word;
120
+ int unknown_word_defined;
121
+
122
+ /* If not null, then use spelling guesser for unknown words */
123
+ void * spell_checker; /* spell checker handle */
124
+ #if USE_CORPUS
125
+ Corpus * corpus; /* Statistics database */
126
+ #endif
127
+
128
+ #if DONT_USE_REGEX_GUESSING
129
+ /* English language morphology bits
130
+ * replaced by regex-based morpho guesser
131
+ * Dead code, remove at leisure.
132
+ */
133
+ int capitalized_word_defined;
134
+ int pl_capitalized_word_defined;
135
+ int hyphenated_word_defined;
136
+ int number_word_defined;
137
+ int ing_word_defined;
138
+ int s_word_defined;
139
+ int ed_word_defined;
140
+ int ly_word_defined;
141
+ #endif /* DONT_USE_REGEX_GUESSING */
142
+
143
+ int left_wall_defined;
144
+ int right_wall_defined;
145
+
146
+ /* Affixes are used during the tokenization stage. */
147
+ Dictionary affix_table;
148
+ int r_strippable; /* right */
149
+ int l_strippable; /* left */
150
+ int u_strippable; /* units on left */
151
+ int s_strippable; /* generic suffix */
152
+ int p_strippable; /* generic prefix */
153
+ const char ** strip_left;
154
+ const char ** strip_right;
155
+ const char ** strip_units;
156
+ const char ** prefix;
157
+ const char ** suffix;
158
+
159
+ Postprocessor * postprocessor;
160
+ Postprocessor * constituent_pp;
161
+ int andable_defined;
162
+ Connector_set * andable_connector_set; /* NULL=everything is andable */
163
+ Connector_set * unlimited_connector_set; /* NULL=everthing is unlimited */
164
+ int max_cost;
165
+ String_set * string_set; /* Set of link names constructed during parsing */
166
+ int num_entries;
167
+ Word_file * word_file_header;
168
+
169
+ /* exp_list links together all the Exp structs that are allocated
170
+ * in reading this dictionary. Needed for freeing the dictionary
171
+ */
172
+ Exp * exp_list;
173
+
174
+ /* Private data elements that come in play only while the
175
+ * dictionary is being read, and are not otherwise used.
176
+ */
177
+ FILE * fp;
178
+ char token[MAX_TOKEN_LENGTH];
179
+ int is_special; /* boolean */
180
+ wint_t already_got_it;
181
+ int line_number;
182
+ int recursive_error; /* boolean */
183
+ mbstate_t mbss; /* multi-byte shift state */
184
+ };
185
+
186
+ struct Label_node_s
187
+ {
188
+ int label;
189
+ Label_node * next;
190
+ };
191
+
192
+ #define HT_SIZE (1<<10)
193
+
194
+ struct And_data_s
195
+ {
196
+ int LT_bound;
197
+ int LT_size;
198
+ Disjunct ** label_table;
199
+ Label_node * hash_table[HT_SIZE];
200
+
201
+ /* keeping statistics */
202
+ int STAT_N_disjuncts;
203
+ int STAT_calls_to_equality_test;
204
+ };
205
+
206
+ struct Parse_info_struct
207
+ {
208
+ int x_table_size;
209
+ int log2_x_table_size;
210
+ X_table_connector ** x_table;
211
+ Parse_set * parse_set;
212
+ int N_words;
213
+ Disjunct ** chosen_disjuncts;
214
+ int N_links;
215
+ Link link_array[MAX_LINKS];
216
+
217
+ /* Points to the image structure for each word.
218
+ * NULL if not a fat word. */
219
+ Image_node ** image_array;
220
+
221
+ /* Array of boolean flags, one per word. Set to TRUE if this
222
+ * word has a fat down link. FALSE otherise */
223
+ Boolean *has_fat_down;
224
+
225
+ /* thread-safe random number state */
226
+ unsigned int rand_state;
227
+ };
228
+
229
+ struct Sentence_s
230
+ {
231
+ Dictionary dict; /* words are defined from this dictionary */
232
+ const char *orig_sentence; /* Copy of original sentence */
233
+ int length; /* number of words */
234
+ Word word[MAX_SENTENCE]; /* array of words after tokenization */
235
+ char * is_conjunction; /* Array of flags, one per word; set to
236
+ TRUE if conjunction, as defined by dictionary */
237
+ char** deletable; /* deletable regions in a sentence with conjunction */
238
+ char** dptr; /* private pointer for mem management only */
239
+ char** effective_dist;
240
+ int num_linkages_found; /* total number before postprocessing. This
241
+ is returned by the count() function */
242
+ int num_linkages_alloced;/* total number of linkages allocated.
243
+ the number post-processed might be fewer
244
+ because some are non-canonical */
245
+ int num_linkages_post_processed;
246
+ /* The number of linkages that are actually
247
+ put into the array that was alloced.
248
+ This is not the same as num alloced
249
+ because some may be non-canonical. */
250
+ int num_valid_linkages; /* number with no pp violations */
251
+ int num_thin_linkages; /* valid linkages which are not fat */
252
+ int null_links; /* null links allowed */
253
+ int null_count; /* number of null links in linkages */
254
+ Parse_info parse_info; /* set of parses for the sentence */
255
+ Linkage_info * link_info; /* array of valid and invalid linkages (sorted) */
256
+ String_set * string_set; /* used for word names, not connectors */
257
+ And_data and_data; /* used to keep track of fat disjuncts */
258
+ char q_pruned_rules; /* don't prune rules more than once in p.p. */
259
+ int post_quote[MAX_SENTENCE]; /* Used only by tokenizer. */
260
+
261
+ analyze_context_t * analyze_ctxt; /* private state used for analyzing */
262
+ count_context_t * count_ctxt; /* private state info used for counting */
263
+ match_context_t * match_ctxt; /* private state info used for matching */
264
+ /* thread-safe random number state */
265
+ unsigned int rand_state;
266
+
267
+ /* Hook for the SAT solver */
268
+ void *hook;
269
+ };
270
+
271
+ /*********************************************************
272
+ *
273
+ * Post processing
274
+ *
275
+ **********************************************************/
276
+
277
+ struct Domain_s
278
+ {
279
+ const char * string;
280
+ int size;
281
+ List_o_links * lol;
282
+ int start_link; /* the link that started this domain */
283
+ int type; /* one letter name */
284
+ DTreeLeaf * child;
285
+ Domain * parent;
286
+ };
287
+
288
+
289
+ struct DTreeLeaf_s
290
+ {
291
+ Domain * parent;
292
+ int link;
293
+ DTreeLeaf * next;
294
+ };
295
+
296
+ struct PP_data_s
297
+ {
298
+ int N_domains;
299
+ List_o_links * word_links[MAX_SENTENCE];
300
+ List_o_links * links_to_ignore;
301
+ Domain domain_array[MAX_LINKS]; /* the domains, sorted by size */
302
+ int length; /* length of current sentence */
303
+ };
304
+
305
+ struct PP_info_s
306
+ {
307
+ int num_domains;
308
+ const char ** domain_name;
309
+ };
310
+
311
+ struct Postprocessor_s
312
+ {
313
+ pp_knowledge *knowledge; /* internal rep'n of the actual rules */
314
+ int n_global_rules_firing; /* this & the next are diagnostic */
315
+ int n_local_rules_firing;
316
+ pp_linkset *set_of_links_of_sentence; /* seen in *any* linkage of sent */
317
+ pp_linkset *set_of_links_in_an_active_rule;/*used in *some* linkage of sent*/
318
+ int *relevant_contains_one_rules; /* -1-terminated list of indices */
319
+ int *relevant_contains_none_rules;
320
+ /* the following maintain state during a call to post_process() */
321
+ String_set *sentence_link_name_set; /* link names seen for sentence */
322
+ int visited[MAX_SENTENCE]; /* for the depth-first search */
323
+ PP_node *pp_node;
324
+ PP_data pp_data;
325
+ };
326
+
327
+
328
+ /*********************************************************
329
+ *
330
+ * Linkages
331
+ *
332
+ **********************************************************/
333
+
334
+ struct Sublinkage_s
335
+ {
336
+ int num_links; /* Number of links in array */
337
+ Link ** link; /* Array of links */
338
+ PP_info * pp_info; /* PP info for each link */
339
+ const char * violation; /* Name of violation, if any */
340
+ PP_data pp_data;
341
+ };
342
+
343
+ typedef struct DIS_node_struct DIS_node;
344
+
345
+ struct Linkage_s
346
+ {
347
+ int num_words; /* number of (tokenized) words */
348
+ const char * * word; /* array of word spellings */
349
+ Linkage_info* info; /* index and cost information */
350
+ int num_sublinkages; /* One for thin linkages, bigger for fat */
351
+ int current; /* Allows user to select particular sublinkage */
352
+ Sublinkage * sublinkage; /* A parse with conjunctions will have several */
353
+ int unionized; /* if TRUE, union of links has been computed */
354
+ Sentence sent;
355
+ Parse_Options opts;
356
+ DIS_node * dis_con_tree; /* Disjunction-conjunction tree */
357
+ };
358
+
359
+
360
+
361
+ #endif
362
+
@@ -0,0 +1,72 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #ifndef _API_TYPES_H_
15
+ #define _API_TYPES_H_
16
+
17
+ #define MAX_TOKEN_LENGTH 50 /* maximum number of chars in a token */
18
+
19
+ /* MAX_SENTENCE cannot be more than 254, because word MAX_SENTENCE+1 is
20
+ * used to indicate that nothing can connect to this connector, and this
21
+ * should fit in one byte (if the word field of a connector is an
22
+ * unsigned char).
23
+ */
24
+ #define MAX_SENTENCE 250 /* maximum number of words in a sentence */
25
+ #define MAX_LINKS (2*MAX_SENTENCE-3) /* maximum number of links allowed */
26
+
27
+
28
+ /* "public" typedefs */
29
+ typedef struct And_data_s And_data;
30
+ typedef struct Connector_struct Connector;
31
+ typedef struct Cost_Model_s Cost_Model;
32
+ typedef struct Dict_node_struct Dict_node;
33
+ typedef struct Domain_s Domain;
34
+ typedef struct DTreeLeaf_s DTreeLeaf;
35
+ typedef struct Image_node_struct Image_node;
36
+ typedef struct Label_node_s Label_node;
37
+ typedef struct Linkage_info_struct Linkage_info;
38
+ typedef struct Parse_info_struct *Parse_info;
39
+ typedef struct Postprocessor_s Postprocessor;
40
+ typedef struct PP_data_s PP_data;
41
+ typedef struct PP_info_s PP_info;
42
+ typedef struct Regex_node_s Regex_node;
43
+ typedef struct Resources_s * Resources;
44
+ typedef struct Sublinkage_s Sublinkage;
45
+
46
+ /* Some private typedefs */
47
+ typedef char Boolean;
48
+ typedef struct analyze_context_s analyze_context_t;
49
+ typedef struct count_context_s count_context_t;
50
+ typedef struct match_context_s match_context_t;
51
+
52
+ typedef struct Connector_set_s Connector_set;
53
+ typedef struct Disjunct_struct Disjunct;
54
+ typedef struct Exp_struct Exp;
55
+ typedef struct E_list_struct E_list;
56
+ typedef struct Link_s Link;
57
+ typedef struct List_o_links_struct List_o_links;
58
+ typedef struct Parse_set_struct Parse_set;
59
+ typedef struct String_set_s String_set;
60
+ typedef struct Word_struct Word;
61
+ typedef struct Word_file_struct Word_file;
62
+ typedef struct X_table_connector_struct X_table_connector;
63
+
64
+
65
+ typedef struct pp_knowledge_s pp_knowledge;
66
+
67
+ typedef struct corpus_s Corpus;
68
+ typedef struct sense_s Sense;
69
+ typedef struct cluster_s Cluster;
70
+
71
+ #endif
72
+