grammar_cop 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,411 @@
1
+ !verbosity=1
2
+ !echo
3
+ !limit=1000
4
+ !batch
5
+ !short=20
6
+ !constituents=1
7
+ !spell=0
8
+
9
+ %UNITS
10
+
11
+ % Some basic cases
12
+ The 50 kDa protein is examined
13
+ The protein ( 50 kDa ) is examined
14
+ The mass is 50 kDa
15
+ A protein of 50 kDa is examined
16
+ The rate is 10 nm per msec
17
+ The rate is 10 nm per one msec of time
18
+ The last 195 bp of the DNA are examined
19
+
20
+ % These should work also if the tokens are "merges" of number and unit
21
+ The 50-kDa protein is examined
22
+ The 50kDa protein is examined
23
+ The mass is 50kDa
24
+ A protein of 50kDa is examined
25
+ The rate is 10nm per msec
26
+ The rate is 10nm per 1msec of time
27
+ The last 195bp of the DNA are examined
28
+
29
+ % A number derived units are also recognized
30
+ The rate is 10 mg/sec
31
+ The rate is 10 mol/day
32
+ The rate is 10 micrograms/mouse/day
33
+
34
+ % Units previously in the dictionary have not been modified, and are
35
+ % not allowed all the necessary roles.
36
+ The 10 foot distance is examined
37
+
38
+ % The following dont work yet, and would require special ".p" definitions
39
+ % of the units, similar to, for example, feet.p and seconds.p.
40
+ % This would require an OD- connectors (see feet.p)
41
+ It fell 10 nm
42
+ % This would require an OT- connector (see seconds.p)
43
+ It lasted 10 msec
44
+ % This would require Yd+ (see feet.p)
45
+ It happened 10 nm away
46
+ % This would require Yt+ (see seconds.p)
47
+ It happened 10 msec later
48
+ % This would require EC+ to have the intended parse (compare feet.p)
49
+ The 10 bp longer sequence is examined
50
+
51
+ % This one doesn't work because of "leftmost"
52
+ The leftmost 195 bp of the DNA are examined
53
+
54
+ % NUMERIC RANGES
55
+
56
+ The 50 to 100 kDa protein is examined
57
+ Exons 40 to 50 were examined
58
+ It was submitted in the period 1990 to 1995
59
+ The range is 0 to 1
60
+ The range is nine to twenty
61
+ % ambiguities are mainly due to the bracketed part
62
+ The production is low in rich media ( 50 to 300 LrpC molecules per cell )
63
+ The enzyme has a weight of 125,000 to 130,000
64
+ The domain ( residues 73 to 90 ) was shown
65
+ The analysis revealed a transcript after 6 to 7.5 h
66
+ CSF reached 50 to 100 nM
67
+ A shift from 37 to 20 degrees C resulted in an increase
68
+ An operon showed at 310 to 320 kb
69
+ The sequence shows 28.2 to 34.6 % identities
70
+ In the past 4 to 5 years results have advanced
71
+ One is the region 1911 to 1917
72
+ % unnecessary (?) ambiguity with the range linking to "are"
73
+ The segment includes the pacL from which genes 1 to 7 are transcribed
74
+ The gene 1 to 7 mRNA synthesis was reduced
75
+ There are deviations of 2 to 3 A
76
+ % the range should link to the verb with MVp (~ "north"): to much ambiguous ?
77
+ These transcripts are located 5 to 3
78
+ These transcripts are located 5' to 3'
79
+
80
+ % Ranges with hyphens
81
+
82
+ The 50 - 100 kDa protein is examined
83
+ Exons 40 -- 50 were examined
84
+ It was submitted in the period 1990 --- 1995
85
+
86
+ % Cases with "from" (e.g. "from 10 to 20") (TODO):
87
+ % shifts
88
+ The number of repeats was increased from 7 to 11 .
89
+ Experiments revealed an increase from 0 to 5 min postinduction
90
+ The transfer of cells from 37 to 50 degrees C repressed synthesis .
91
+ The concentration increased from 125 to 325 microgram per assay .
92
+ % ranges
93
+ We inserted sequences from 5 to 21 bp in length
94
+ The start point ranged from 17 to 18 base pairs
95
+ The promoter contained a sequence near the region ( from 60 to 73 % )
96
+ A region extending from 183 to 118 base pairs was required
97
+
98
+ % Cases with "between"
99
+ Transcription sets in ( between 8 and 16 h of culture ) .
100
+ The pH optimum is between 5 and 7
101
+ concentrations were measured between 0 and 3 h after the beginning
102
+ it shows activity at a temperature between 60 and 70 degrees C
103
+ The activity is stable between pH 6 and 12
104
+
105
+ % Numeric ranges with "merged" units (or "fold") also occur
106
+ An 80 to 100-fold increase was observed
107
+ Antigens ranged 82 to 90%
108
+
109
+ % FOLD-WORDS
110
+
111
+ This included an up to threefold increase
112
+ There were increases in proteins, including actin (twofold to threefold)
113
+ The association rate constant is also increased about 2-fold
114
+ The affinity is approximately 30-fold weaker
115
+ Leaves display a 2-fold accumulation
116
+ This was about 10-fold higher
117
+ % correct parse ranked second
118
+ sigmaF was some twofold higher than sigmaE
119
+ % does not parse
120
+ It was two to threefold more abundant
121
+ I found an at least twofold reduction
122
+ It showed a fivefold anaerobic induction
123
+ loci increased more than twofold
124
+ % correct parse ranked fourth
125
+ It corresponds to one- , two- and threefold phosphorilated proteins
126
+ The structure shows a fold consisting of a beta-sheet
127
+
128
+ % EQUATIONS ETC.
129
+
130
+ non-denaturing gradient gel electrophoresis (r = 0.859) was used
131
+ preparations of 5 x 10(8) cfu/ml are made
132
+ phosphorylation was observed (P = 0.06)
133
+ bacteria with low G + C DNA content contain genes
134
+ the strength was in the order of gerE > cotD > yfhP P2 > yfhP P1
135
+ delta binds RNAP with an affinity of 2.5 x 10(6) M-1
136
+
137
+ % "x" between numbers denoting multiplication
138
+ A single cell inside a pool of 5 x 10000 lymphocytes could be quantified
139
+ A single cell inside a pool of 5 x 10(4) could be quantified
140
+
141
+ % "Arrows"
142
+ We consider the MPO --> PAG pathway
143
+ Codon 311 (Cys --> Ser) polymorphism is associated with apolipoprotein E4
144
+
145
+ % GREEK LETTERS
146
+ minicells revealed the expression of both lambda and SPP1 genes
147
+ We cloned a new gene encoding an alternative sigma factor
148
+ The sigma factor sigma 35 of B. thuringiensis is homologous
149
+ Each polymerase had a subunit composition analogous to beta beta2 alpha sigma delta omega 1 omega 2
150
+
151
+ % SOME ADVERB CASES
152
+ % compare
153
+ patients are treated, therefore, even if they are negative
154
+ patients are treated, however, even if they are negative
155
+ % related?
156
+ the results indicated, therefore, that it is required
157
+
158
+ they prefer studies that are, however, open
159
+ more importantly , they are open
160
+
161
+ % i.e., e.g. and related
162
+ they were not side-by-side (i.e., stacked)
163
+ antagonists (e.g. WEB-2086) were examined
164
+ receptors, e.g. GluR5 and GluR, have been examined
165
+ there is genetic heterogeneity, i.e. there are several genes
166
+
167
+ % UPSTREAM, DOWNSTREAM, 3', 5'
168
+ The soil from the river banks is washed downstream.
169
+ He was making his way upstream.
170
+ The view is upstream and the discharge is about 5.0 m3/s.
171
+ It will require more information from upstream.
172
+ The inverted repeat is found upstream of the promoter.
173
+ The promoter is located immediately upstream of ftsY.
174
+ The cryIAb gene was located 3 kb upstream of its initiator codon.
175
+ mphR is located downstream from mrx.
176
+ These transcripts are oriented 5' to 3'
177
+
178
+ % MISCELLANIA (commented out not to confuse)
179
+ % the patient tested negative
180
+
181
+ % "MADE OF" VARIANTS
182
+ the protein films have a microstructure formed of woven sheaves
183
+ The sheaves are composed of well-defined whisker crystallites
184
+ Different conjugates, composed of a peptide carrier and a cytotoxic moiety, have been investigated
185
+ A study was made of the stability
186
+ a protein made of the luminal domain fused to the tail
187
+ the intracellular pool of enzyme is formed of newly synthesized molecules
188
+
189
+ % "DESIGNATED" ETC.
190
+ Mice that express epitope tagged SF-1 are being used
191
+ The method labelled FAXS is rapid
192
+
193
+ % ATTACH TO
194
+ Isolated eosinophils from healthy donors rapidly attach to ASMC
195
+ Dystrophin can attach to the cytoskeleton
196
+
197
+ % INVESTIGATED, EXAMINED WHETHER
198
+ this study examined whether AQP1 is present in HPMC
199
+
200
+
201
+
202
+ % from PASBio
203
+
204
+ % abolish.01
205
+ % MEDLINE No.1
206
+ This mutation abolishes splicing
207
+ % EMBO No.1 (passive)
208
+ Transcription is completely abolished
209
+
210
+ % alter.01
211
+ % MEDLINE No.1
212
+ Mutations alter splice sites
213
+ % EMBO No.3
214
+ Phosphorylation was not altered by treating the cells
215
+
216
+ % begin.01 ("start existing")
217
+ % EMBO No.1
218
+ The density begins between amino acids 136 and 140
219
+
220
+ % begin.02 ("start doing")
221
+ % EMBO No.1
222
+ The levels begin to return
223
+
224
+ % block.01
225
+ % MEDLINE No.1 ('the' was manually added to "step")
226
+ Mutations block the step II
227
+ % EMBO No.3 (passive)
228
+ Labeling is blocked by pre-incubation
229
+
230
+ % catalyse.01, ("catalyze") is unknown
231
+ % EMBO No.2
232
+ enzymes catalyze the unwinding
233
+ % MEDLINE No.1 (passive)
234
+ The metabolism is most likely catalysed by P450
235
+
236
+ % confer.01
237
+ % MEDLINE No.2
238
+ The variant does not confer a risk
239
+ % EMBO No.1 (passive)
240
+ The phenotype can be conferred by replacing the C-terminus with Stat5
241
+
242
+ % decrease.01
243
+ % MEDLINE No.1
244
+ Treatment decreased synthesis
245
+ % EMBO No.2 (passive)
246
+ The protection is decreased
247
+
248
+ % delete.01
249
+ % MEDLINE No.1
250
+ Transcripts delete exons
251
+ % EMBO No.4 (passive)
252
+ the binding was deleted
253
+
254
+ % develop.01
255
+ % MEDLINE No.1 ('a' was manually added to "deficiency")
256
+ The son developed a deficiency
257
+
258
+ % disrupt.01
259
+ % MEDLINE No.1
260
+ A mutation disrupted a sequence
261
+
262
+ % eliminate.01
263
+ % MEDLINE No.1
264
+ Deletion would eliminate a residue within a domain
265
+ % EMBO No.4 (passive)
266
+ All three sites are eliminated
267
+
268
+ % encode.01
269
+ % MEDLINE No.1
270
+ Supt4h2 encodes a protein
271
+ % EMBO No.1
272
+ SBP2 may be encoded by three transcripts
273
+
274
+ % express.01
275
+ % MEDLINE No.1 (passive) ('the' was manually added to "brain")
276
+ The enzyme was expressed exclusively in the brain
277
+ % MEDLINE No.7
278
+ Retroelements express Pol
279
+
280
+ % generate.01
281
+ % MEDLINE No.1
282
+ Prnd generates transcripts
283
+ % MEDLINE No.7
284
+ Molecules are generated by an alternative splicing
285
+
286
+ % inhibit.01 MEDLINE No.1
287
+ This peptide inhibited binding
288
+ % inhibit.01 MEDLINE No.2 (passive)
289
+ Isoforms are inhibited by rolipram
290
+
291
+ % initiate.01 MEDLINE No.1 (?)
292
+ Tumours had altered mRNAs , initiated within intron 1
293
+ % initiate.01 MEDLINE No.2
294
+ Cells initiate transcription at multiple sites
295
+ % initiate.01 MEDLINE No.3 (intransitive)
296
+ Translation initiates from an internal codon
297
+
298
+ % lead.01 MEDLINE No.1
299
+ A mutation leads to ligation
300
+ % lead.01 ? (passive)
301
+
302
+ % lose.01 MEDLINE No.1
303
+ A variant which lost a site has been characterized
304
+ % lose.01 EMBO No.3 (passive)
305
+ Anchoring ability was lost
306
+
307
+ % modify.01 MEDLINE No.1 (passive)
308
+ Genes were modified
309
+ % modify.01 EMBO No.2
310
+ Factors that can modify the binding may regulate binding
311
+
312
+ % mutate.01 MEDLINE No.1 (adj?)
313
+ % [there may be a problem here with "deficiency"]
314
+ The mutated allele resulted in deficiency
315
+ % mutate.01 MEDLINE No.2 (passive participle postmodifier)
316
+ The gene mutated in mice encodes a protein
317
+ % mutate.01 EMBO No.4
318
+ The fragments were mutated by the sequence
319
+ % mutate.01 ? (active)
320
+
321
+ % proliferate.01 MEDLINE No.1
322
+ Cells are characterized by an ability to proliferate
323
+ % proliferate.01 MEDLINE No.2
324
+ Cells proliferate
325
+ % proliferate.01 ? (passive)
326
+
327
+ % recognize.01 MEDLINE No.1 (passive participle postmodifier)
328
+ The protein would lack epitopes recognized by the serum
329
+ % recognize.01 MEDLINE No.3
330
+ A number recognized by cells have been isolated
331
+ % recognize.01 MEDLINE No.5
332
+ Antibodies recognize specifically a polypeptide
333
+
334
+ % result.01 MEDLINE No.1
335
+ We report the existence of isoforms which result from splicing
336
+ % result.01 MEDLINE No.2
337
+ Both mutations result in high proportions of mRNAs
338
+ % result.01 ? (passive)
339
+
340
+ % skip.01 MEDLINE No.1
341
+ sequencing revealed a mutation, which skipped exon 3
342
+ % skip.01 MEDLINE No.2
343
+ An exon can be skipped by splicing
344
+
345
+ % splice.01 MEDLINE No.1
346
+ exon 30 is spliced together with the intron
347
+ % splice.01 PNAS No.4
348
+ 3I spliced 20% as efficiently as 3F
349
+ % splice.01 PNAS No.4 (oversimplified?)
350
+ 3I spliced
351
+
352
+ % splice.02 MEDLINE No.1
353
+ CD1c has a form that is thought to be spliced out
354
+ % splice.02 MEDLINE No.2
355
+ One exon is spliced out of the transcript
356
+ % splice.02 MEDLINE No.9
357
+ Exon 16 can be spliced out
358
+
359
+ % transcribe.01 MEDLINE No.1
360
+ The gene is transcribed
361
+ % transcribe.01 MEDLINE No.2
362
+ KLK41 transcribes two alternative transcripts
363
+
364
+ % transform.01 MEDLINE No.1
365
+ FGF8b can transform the midbrain into a cerebellum fate
366
+ % transform.01 MEDLINE No.3
367
+ Phospholipase D is known to transform cells into tumorigenic forms
368
+
369
+ % transform.02 MEDLINE No.1
370
+ The DNA was used to transform E. coli
371
+
372
+ % translate.01 MEDLINE No.1
373
+ Splicing results in a transcript which would be translated into a protein
374
+ % translate.01 EMBO No.1
375
+ Stat1 was translated
376
+ % translate.01 EMBO No.2 (oversimplified? slightly modified)
377
+ Stat1 translated can become a dimer
378
+
379
+ % translate.02 MEDLINE No.1
380
+ This review examines technologies that can be used to translate information
381
+ % translate.02 MEDLINE No.3
382
+ Acj6 translates information into specificity
383
+
384
+ % translate.03 MEDLINE No.1
385
+ The functions translate into modulations
386
+
387
+ % truncate.01 MEDLINE No.1
388
+ Changes were predicted to truncate the protein
389
+ % truncate.01 MEDLINE No.2
390
+ The domains were truncated
391
+
392
+ % verbs taking particles
393
+ The recombinant plasmid was screened out
394
+ The adenine bulge is looped out
395
+ A monomer is built up of strands
396
+
397
+ % "in gel"
398
+ % should have been in-gel to be grammatical -- could a spell cheker guess this?
399
+ They were measured by in gel kinase assays
400
+
401
+
402
+ % IMPORTANT: CAPITALIZED-WORDS SHOULD ALLOW "that" ETC. (<noun-sub-s> missing)
403
+ it encodes a GPCR that is homologous to the chemokine
404
+ % compare
405
+ it encodes a gPCR that is homologous to the chemokine
406
+
407
+ % "ORDERED" as an adjective
408
+ The complex plays a role in the construction of ordered multicellular structures
409
+
410
+ % CONCENTRATED as an adjective
411
+ the genes were most concentrated in the cell
@@ -0,0 +1,127 @@
1
+ DOMAIN_STARTER_LINKS: S##* S##i SF SX Ce* Cet RS O OX Js Jp J* MVp Mp R* Rn Pa TH Pg Pv I PP Cr
2
+ Cs MX#* Wc Wd Wi
3
+ TI BIt MVs TO Mv Mg* MVa OF ON IN QI Ma CP* CPi CPx MVt S##w MX#r Pp MVi MVg Mgp MVx SI MX#p Cc
4
+ S##t S##h S##b S##q L MX#a MG JG MX#x U S##d JT MVh Mr B#w B#d MVb COq Mj OD CX S##g PF Zc MX#d Bc
5
+ K NIax
6
+
7
+ DOMAIN_CONTAINS_LINKS:
8
+
9
+ URFL_ONLY_DOMAIN_STARTER_LINKS:
10
+
11
+ LEFT_DOMAIN_STARTER_LINKS: A EA E CO#s CO* COd* YS YP Yt Yd GN DTie CO#n COp D##n ND
12
+
13
+ STARTING_LINK_TYPE_TABLE:
14
+ S##* v
15
+ S##i v
16
+ S##t v
17
+ S##h v
18
+ S##b v
19
+ S##q v
20
+ S##d z
21
+ S##g v
22
+ SF v
23
+ SX v
24
+ Wd s
25
+ Wc s
26
+ Wi z
27
+ Ce* s
28
+ Cet s
29
+ TH b
30
+ RS z
31
+ O n
32
+ OX n
33
+ Js n
34
+ Jp n
35
+ J* n
36
+ MVp p
37
+ Mp p
38
+ R* b
39
+ Rn s
40
+ Pa a
41
+ A u
42
+ Pg v
43
+ Pv v
44
+ I v
45
+ PP v
46
+ Cr s
47
+ Cs s
48
+ MX#* n
49
+ TI n
50
+ BIt n
51
+ MVs b
52
+ EA e
53
+ E e
54
+ TO t
55
+ MVi t
56
+ Mv v
57
+ Mg* v
58
+ CO#s f
59
+ CO* g
60
+ COd* g
61
+ CO#n g
62
+ MVa i
63
+ MVb i
64
+ OF p
65
+ YS y
66
+ YP y
67
+ IN n
68
+ ON n
69
+ QI b
70
+ Ma a
71
+ CP* s
72
+ CPi s
73
+ CPx q
74
+ MVt p
75
+ MX#r b
76
+ S##w z
77
+ Pp p
78
+ MVg z
79
+ Mgp z
80
+ MVx p
81
+ SI n
82
+ MX#p v
83
+ Cc s
84
+ L a
85
+ MX#a a
86
+ Yt y
87
+ Yd y
88
+ MG p
89
+ JG n
90
+ GN y
91
+ MX#x p
92
+ U n
93
+ JT n
94
+ MVh b
95
+ Mr b
96
+ MX#d b
97
+ B#w s
98
+ B#d s
99
+ DTie e
100
+ COq s
101
+ Mj b
102
+ OD n
103
+ CX s
104
+ COp c
105
+ PF v
106
+ Zc s
107
+ Bc s
108
+ K k
109
+ D##n d
110
+ ND d
111
+ NIax h
112
+
113
+ ; ----------------------------------------------------------------------
114
+ ; These links are not put in the word/link graph. They also cannot be the
115
+ ; starter links for a domain.
116
+
117
+ IGNORE_THESE_LINKS: Xca HA
118
+
119
+
120
+
121
+ ; ----------------------------------------------------------------------
122
+ ; these links are not traced further if they point back before the root word
123
+
124
+ RESTRICTED_LINKS:
125
+ B#* D##w B#w B#d AFh MVt Xx HL SFsic AFd Bc CX EAh
126
+ H HA PFc B#j Wd PF Z BW
127
+