grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,411 @@
1
+ !verbosity=1
2
+ !echo
3
+ !limit=1000
4
+ !batch
5
+ !short=20
6
+ !constituents=1
7
+ !spell=0
8
+
9
+ %UNITS
10
+
11
+ % Some basic cases
12
+ The 50 kDa protein is examined
13
+ The protein ( 50 kDa ) is examined
14
+ The mass is 50 kDa
15
+ A protein of 50 kDa is examined
16
+ The rate is 10 nm per msec
17
+ The rate is 10 nm per one msec of time
18
+ The last 195 bp of the DNA are examined
19
+
20
+ % These should work also if the tokens are "merges" of number and unit
21
+ The 50-kDa protein is examined
22
+ The 50kDa protein is examined
23
+ The mass is 50kDa
24
+ A protein of 50kDa is examined
25
+ The rate is 10nm per msec
26
+ The rate is 10nm per 1msec of time
27
+ The last 195bp of the DNA are examined
28
+
29
+ % A number derived units are also recognized
30
+ The rate is 10 mg/sec
31
+ The rate is 10 mol/day
32
+ The rate is 10 micrograms/mouse/day
33
+
34
+ % Units previously in the dictionary have not been modified, and are
35
+ % not allowed all the necessary roles.
36
+ The 10 foot distance is examined
37
+
38
+ % The following dont work yet, and would require special ".p" definitions
39
+ % of the units, similar to, for example, feet.p and seconds.p.
40
+ % This would require an OD- connectors (see feet.p)
41
+ It fell 10 nm
42
+ % This would require an OT- connector (see seconds.p)
43
+ It lasted 10 msec
44
+ % This would require Yd+ (see feet.p)
45
+ It happened 10 nm away
46
+ % This would require Yt+ (see seconds.p)
47
+ It happened 10 msec later
48
+ % This would require EC+ to have the intended parse (compare feet.p)
49
+ The 10 bp longer sequence is examined
50
+
51
+ % This one doesn't work because of "leftmost"
52
+ The leftmost 195 bp of the DNA are examined
53
+
54
+ % NUMERIC RANGES
55
+
56
+ The 50 to 100 kDa protein is examined
57
+ Exons 40 to 50 were examined
58
+ It was submitted in the period 1990 to 1995
59
+ The range is 0 to 1
60
+ The range is nine to twenty
61
+ % ambiguities are mainly due to the bracketed part
62
+ The production is low in rich media ( 50 to 300 LrpC molecules per cell )
63
+ The enzyme has a weight of 125,000 to 130,000
64
+ The domain ( residues 73 to 90 ) was shown
65
+ The analysis revealed a transcript after 6 to 7.5 h
66
+ CSF reached 50 to 100 nM
67
+ A shift from 37 to 20 degrees C resulted in an increase
68
+ An operon showed at 310 to 320 kb
69
+ The sequence shows 28.2 to 34.6 % identities
70
+ In the past 4 to 5 years results have advanced
71
+ One is the region 1911 to 1917
72
+ % unnecessary (?) ambiguity with the range linking to "are"
73
+ The segment includes the pacL from which genes 1 to 7 are transcribed
74
+ The gene 1 to 7 mRNA synthesis was reduced
75
+ There are deviations of 2 to 3 A
76
+ % the range should link to the verb with MVp (~ "north"): to much ambiguous ?
77
+ These transcripts are located 5 to 3
78
+ These transcripts are located 5' to 3'
79
+
80
+ % Ranges with hyphens
81
+
82
+ The 50 - 100 kDa protein is examined
83
+ Exons 40 -- 50 were examined
84
+ It was submitted in the period 1990 --- 1995
85
+
86
+ % Cases with "from" (e.g. "from 10 to 20") (TODO):
87
+ % shifts
88
+ The number of repeats was increased from 7 to 11 .
89
+ Experiments revealed an increase from 0 to 5 min postinduction
90
+ The transfer of cells from 37 to 50 degrees C repressed synthesis .
91
+ The concentration increased from 125 to 325 microgram per assay .
92
+ % ranges
93
+ We inserted sequences from 5 to 21 bp in length
94
+ The start point ranged from 17 to 18 base pairs
95
+ The promoter contained a sequence near the region ( from 60 to 73 % )
96
+ A region extending from 183 to 118 base pairs was required
97
+
98
+ % Cases with "between"
99
+ Transcription sets in ( between 8 and 16 h of culture ) .
100
+ The pH optimum is between 5 and 7
101
+ concentrations were measured between 0 and 3 h after the beginning
102
+ it shows activity at a temperature between 60 and 70 degrees C
103
+ The activity is stable between pH 6 and 12
104
+
105
+ % Numeric ranges with "merged" units (or "fold") also occur
106
+ An 80 to 100-fold increase was observed
107
+ Antigens ranged 82 to 90%
108
+
109
+ % FOLD-WORDS
110
+
111
+ This included an up to threefold increase
112
+ There were increases in proteins, including actin (twofold to threefold)
113
+ The association rate constant is also increased about 2-fold
114
+ The affinity is approximately 30-fold weaker
115
+ Leaves display a 2-fold accumulation
116
+ This was about 10-fold higher
117
+ % correct parse ranked second
118
+ sigmaF was some twofold higher than sigmaE
119
+ % does not parse
120
+ It was two to threefold more abundant
121
+ I found an at least twofold reduction
122
+ It showed a fivefold anaerobic induction
123
+ loci increased more than twofold
124
+ % correct parse ranked fourth
125
+ It corresponds to one- , two- and threefold phosphorilated proteins
126
+ The structure shows a fold consisting of a beta-sheet
127
+
128
+ % EQUATIONS ETC.
129
+
130
+ non-denaturing gradient gel electrophoresis (r = 0.859) was used
131
+ preparations of 5 x 10(8) cfu/ml are made
132
+ phosphorylation was observed (P = 0.06)
133
+ bacteria with low G + C DNA content contain genes
134
+ the strength was in the order of gerE > cotD > yfhP P2 > yfhP P1
135
+ delta binds RNAP with an affinity of 2.5 x 10(6) M-1
136
+
137
+ % "x" between numbers denoting multiplication
138
+ A single cell inside a pool of 5 x 10000 lymphocytes could be quantified
139
+ A single cell inside a pool of 5 x 10(4) could be quantified
140
+
141
+ % "Arrows"
142
+ We consider the MPO --> PAG pathway
143
+ Codon 311 (Cys --> Ser) polymorphism is associated with apolipoprotein E4
144
+
145
+ % GREEK LETTERS
146
+ minicells revealed the expression of both lambda and SPP1 genes
147
+ We cloned a new gene encoding an alternative sigma factor
148
+ The sigma factor sigma 35 of B. thuringiensis is homologous
149
+ Each polymerase had a subunit composition analogous to beta beta2 alpha sigma delta omega 1 omega 2
150
+
151
+ % SOME ADVERB CASES
152
+ % compare
153
+ patients are treated, therefore, even if they are negative
154
+ patients are treated, however, even if they are negative
155
+ % related?
156
+ the results indicated, therefore, that it is required
157
+
158
+ they prefer studies that are, however, open
159
+ more importantly , they are open
160
+
161
+ % i.e., e.g. and related
162
+ they were not side-by-side (i.e., stacked)
163
+ antagonists (e.g. WEB-2086) were examined
164
+ receptors, e.g. GluR5 and GluR, have been examined
165
+ there is genetic heterogeneity, i.e. there are several genes
166
+
167
+ % UPSTREAM, DOWNSTREAM, 3', 5'
168
+ The soil from the river banks is washed downstream.
169
+ He was making his way upstream.
170
+ The view is upstream and the discharge is about 5.0 m3/s.
171
+ It will require more information from upstream.
172
+ The inverted repeat is found upstream of the promoter.
173
+ The promoter is located immediately upstream of ftsY.
174
+ The cryIAb gene was located 3 kb upstream of its initiator codon.
175
+ mphR is located downstream from mrx.
176
+ These transcripts are oriented 5' to 3'
177
+
178
+ % MISCELLANIA (commented out not to confuse)
179
+ % the patient tested negative
180
+
181
+ % "MADE OF" VARIANTS
182
+ the protein films have a microstructure formed of woven sheaves
183
+ The sheaves are composed of well-defined whisker crystallites
184
+ Different conjugates, composed of a peptide carrier and a cytotoxic moiety, have been investigated
185
+ A study was made of the stability
186
+ a protein made of the luminal domain fused to the tail
187
+ the intracellular pool of enzyme is formed of newly synthesized molecules
188
+
189
+ % "DESIGNATED" ETC.
190
+ Mice that express epitope tagged SF-1 are being used
191
+ The method labelled FAXS is rapid
192
+
193
+ % ATTACH TO
194
+ Isolated eosinophils from healthy donors rapidly attach to ASMC
195
+ Dystrophin can attach to the cytoskeleton
196
+
197
+ % INVESTIGATED, EXAMINED WHETHER
198
+ this study examined whether AQP1 is present in HPMC
199
+
200
+
201
+
202
+ % from PASBio
203
+
204
+ % abolish.01
205
+ % MEDLINE No.1
206
+ This mutation abolishes splicing
207
+ % EMBO No.1 (passive)
208
+ Transcription is completely abolished
209
+
210
+ % alter.01
211
+ % MEDLINE No.1
212
+ Mutations alter splice sites
213
+ % EMBO No.3
214
+ Phosphorylation was not altered by treating the cells
215
+
216
+ % begin.01 ("start existing")
217
+ % EMBO No.1
218
+ The density begins between amino acids 136 and 140
219
+
220
+ % begin.02 ("start doing")
221
+ % EMBO No.1
222
+ The levels begin to return
223
+
224
+ % block.01
225
+ % MEDLINE No.1 ('the' was manually added to "step")
226
+ Mutations block the step II
227
+ % EMBO No.3 (passive)
228
+ Labeling is blocked by pre-incubation
229
+
230
+ % catalyse.01, ("catalyze") is unknown
231
+ % EMBO No.2
232
+ enzymes catalyze the unwinding
233
+ % MEDLINE No.1 (passive)
234
+ The metabolism is most likely catalysed by P450
235
+
236
+ % confer.01
237
+ % MEDLINE No.2
238
+ The variant does not confer a risk
239
+ % EMBO No.1 (passive)
240
+ The phenotype can be conferred by replacing the C-terminus with Stat5
241
+
242
+ % decrease.01
243
+ % MEDLINE No.1
244
+ Treatment decreased synthesis
245
+ % EMBO No.2 (passive)
246
+ The protection is decreased
247
+
248
+ % delete.01
249
+ % MEDLINE No.1
250
+ Transcripts delete exons
251
+ % EMBO No.4 (passive)
252
+ the binding was deleted
253
+
254
+ % develop.01
255
+ % MEDLINE No.1 ('a' was manually added to "deficiency")
256
+ The son developed a deficiency
257
+
258
+ % disrupt.01
259
+ % MEDLINE No.1
260
+ A mutation disrupted a sequence
261
+
262
+ % eliminate.01
263
+ % MEDLINE No.1
264
+ Deletion would eliminate a residue within a domain
265
+ % EMBO No.4 (passive)
266
+ All three sites are eliminated
267
+
268
+ % encode.01
269
+ % MEDLINE No.1
270
+ Supt4h2 encodes a protein
271
+ % EMBO No.1
272
+ SBP2 may be encoded by three transcripts
273
+
274
+ % express.01
275
+ % MEDLINE No.1 (passive) ('the' was manually added to "brain")
276
+ The enzyme was expressed exclusively in the brain
277
+ % MEDLINE No.7
278
+ Retroelements express Pol
279
+
280
+ % generate.01
281
+ % MEDLINE No.1
282
+ Prnd generates transcripts
283
+ % MEDLINE No.7
284
+ Molecules are generated by an alternative splicing
285
+
286
+ % inhibit.01 MEDLINE No.1
287
+ This peptide inhibited binding
288
+ % inhibit.01 MEDLINE No.2 (passive)
289
+ Isoforms are inhibited by rolipram
290
+
291
+ % initiate.01 MEDLINE No.1 (?)
292
+ Tumours had altered mRNAs , initiated within intron 1
293
+ % initiate.01 MEDLINE No.2
294
+ Cells initiate transcription at multiple sites
295
+ % initiate.01 MEDLINE No.3 (intransitive)
296
+ Translation initiates from an internal codon
297
+
298
+ % lead.01 MEDLINE No.1
299
+ A mutation leads to ligation
300
+ % lead.01 ? (passive)
301
+
302
+ % lose.01 MEDLINE No.1
303
+ A variant which lost a site has been characterized
304
+ % lose.01 EMBO No.3 (passive)
305
+ Anchoring ability was lost
306
+
307
+ % modify.01 MEDLINE No.1 (passive)
308
+ Genes were modified
309
+ % modify.01 EMBO No.2
310
+ Factors that can modify the binding may regulate binding
311
+
312
+ % mutate.01 MEDLINE No.1 (adj?)
313
+ % [there may be a problem here with "deficiency"]
314
+ The mutated allele resulted in deficiency
315
+ % mutate.01 MEDLINE No.2 (passive participle postmodifier)
316
+ The gene mutated in mice encodes a protein
317
+ % mutate.01 EMBO No.4
318
+ The fragments were mutated by the sequence
319
+ % mutate.01 ? (active)
320
+
321
+ % proliferate.01 MEDLINE No.1
322
+ Cells are characterized by an ability to proliferate
323
+ % proliferate.01 MEDLINE No.2
324
+ Cells proliferate
325
+ % proliferate.01 ? (passive)
326
+
327
+ % recognize.01 MEDLINE No.1 (passive participle postmodifier)
328
+ The protein would lack epitopes recognized by the serum
329
+ % recognize.01 MEDLINE No.3
330
+ A number recognized by cells have been isolated
331
+ % recognize.01 MEDLINE No.5
332
+ Antibodies recognize specifically a polypeptide
333
+
334
+ % result.01 MEDLINE No.1
335
+ We report the existence of isoforms which result from splicing
336
+ % result.01 MEDLINE No.2
337
+ Both mutations result in high proportions of mRNAs
338
+ % result.01 ? (passive)
339
+
340
+ % skip.01 MEDLINE No.1
341
+ sequencing revealed a mutation, which skipped exon 3
342
+ % skip.01 MEDLINE No.2
343
+ An exon can be skipped by splicing
344
+
345
+ % splice.01 MEDLINE No.1
346
+ exon 30 is spliced together with the intron
347
+ % splice.01 PNAS No.4
348
+ 3I spliced 20% as efficiently as 3F
349
+ % splice.01 PNAS No.4 (oversimplified?)
350
+ 3I spliced
351
+
352
+ % splice.02 MEDLINE No.1
353
+ CD1c has a form that is thought to be spliced out
354
+ % splice.02 MEDLINE No.2
355
+ One exon is spliced out of the transcript
356
+ % splice.02 MEDLINE No.9
357
+ Exon 16 can be spliced out
358
+
359
+ % transcribe.01 MEDLINE No.1
360
+ The gene is transcribed
361
+ % transcribe.01 MEDLINE No.2
362
+ KLK41 transcribes two alternative transcripts
363
+
364
+ % transform.01 MEDLINE No.1
365
+ FGF8b can transform the midbrain into a cerebellum fate
366
+ % transform.01 MEDLINE No.3
367
+ Phospholipase D is known to transform cells into tumorigenic forms
368
+
369
+ % transform.02 MEDLINE No.1
370
+ The DNA was used to transform E. coli
371
+
372
+ % translate.01 MEDLINE No.1
373
+ Splicing results in a transcript which would be translated into a protein
374
+ % translate.01 EMBO No.1
375
+ Stat1 was translated
376
+ % translate.01 EMBO No.2 (oversimplified? slightly modified)
377
+ Stat1 translated can become a dimer
378
+
379
+ % translate.02 MEDLINE No.1
380
+ This review examines technologies that can be used to translate information
381
+ % translate.02 MEDLINE No.3
382
+ Acj6 translates information into specificity
383
+
384
+ % translate.03 MEDLINE No.1
385
+ The functions translate into modulations
386
+
387
+ % truncate.01 MEDLINE No.1
388
+ Changes were predicted to truncate the protein
389
+ % truncate.01 MEDLINE No.2
390
+ The domains were truncated
391
+
392
+ % verbs taking particles
393
+ The recombinant plasmid was screened out
394
+ The adenine bulge is looped out
395
+ A monomer is built up of strands
396
+
397
+ % "in gel"
398
+ % should have been in-gel to be grammatical -- could a spell cheker guess this?
399
+ They were measured by in gel kinase assays
400
+
401
+
402
+ % IMPORTANT: CAPITALIZED-WORDS SHOULD ALLOW "that" ETC. (<noun-sub-s> missing)
403
+ it encodes a GPCR that is homologous to the chemokine
404
+ % compare
405
+ it encodes a gPCR that is homologous to the chemokine
406
+
407
+ % "ORDERED" as an adjective
408
+ The complex plays a role in the construction of ordered multicellular structures
409
+
410
+ % CONCENTRATED as an adjective
411
+ the genes were most concentrated in the cell
@@ -0,0 +1,127 @@
1
+ DOMAIN_STARTER_LINKS: S##* S##i SF SX Ce* Cet RS O OX Js Jp J* MVp Mp R* Rn Pa TH Pg Pv I PP Cr
2
+ Cs MX#* Wc Wd Wi
3
+ TI BIt MVs TO Mv Mg* MVa OF ON IN QI Ma CP* CPi CPx MVt S##w MX#r Pp MVi MVg Mgp MVx SI MX#p Cc
4
+ S##t S##h S##b S##q L MX#a MG JG MX#x U S##d JT MVh Mr B#w B#d MVb COq Mj OD CX S##g PF Zc MX#d Bc
5
+ K NIax
6
+
7
+ DOMAIN_CONTAINS_LINKS:
8
+
9
+ URFL_ONLY_DOMAIN_STARTER_LINKS:
10
+
11
+ LEFT_DOMAIN_STARTER_LINKS: A EA E CO#s CO* COd* YS YP Yt Yd GN DTie CO#n COp D##n ND
12
+
13
+ STARTING_LINK_TYPE_TABLE:
14
+ S##* v
15
+ S##i v
16
+ S##t v
17
+ S##h v
18
+ S##b v
19
+ S##q v
20
+ S##d z
21
+ S##g v
22
+ SF v
23
+ SX v
24
+ Wd s
25
+ Wc s
26
+ Wi z
27
+ Ce* s
28
+ Cet s
29
+ TH b
30
+ RS z
31
+ O n
32
+ OX n
33
+ Js n
34
+ Jp n
35
+ J* n
36
+ MVp p
37
+ Mp p
38
+ R* b
39
+ Rn s
40
+ Pa a
41
+ A u
42
+ Pg v
43
+ Pv v
44
+ I v
45
+ PP v
46
+ Cr s
47
+ Cs s
48
+ MX#* n
49
+ TI n
50
+ BIt n
51
+ MVs b
52
+ EA e
53
+ E e
54
+ TO t
55
+ MVi t
56
+ Mv v
57
+ Mg* v
58
+ CO#s f
59
+ CO* g
60
+ COd* g
61
+ CO#n g
62
+ MVa i
63
+ MVb i
64
+ OF p
65
+ YS y
66
+ YP y
67
+ IN n
68
+ ON n
69
+ QI b
70
+ Ma a
71
+ CP* s
72
+ CPi s
73
+ CPx q
74
+ MVt p
75
+ MX#r b
76
+ S##w z
77
+ Pp p
78
+ MVg z
79
+ Mgp z
80
+ MVx p
81
+ SI n
82
+ MX#p v
83
+ Cc s
84
+ L a
85
+ MX#a a
86
+ Yt y
87
+ Yd y
88
+ MG p
89
+ JG n
90
+ GN y
91
+ MX#x p
92
+ U n
93
+ JT n
94
+ MVh b
95
+ Mr b
96
+ MX#d b
97
+ B#w s
98
+ B#d s
99
+ DTie e
100
+ COq s
101
+ Mj b
102
+ OD n
103
+ CX s
104
+ COp c
105
+ PF v
106
+ Zc s
107
+ Bc s
108
+ K k
109
+ D##n d
110
+ ND d
111
+ NIax h
112
+
113
+ ; ----------------------------------------------------------------------
114
+ ; These links are not put in the word/link graph. They also cannot be the
115
+ ; starter links for a domain.
116
+
117
+ IGNORE_THESE_LINKS: Xca HA
118
+
119
+
120
+
121
+ ; ----------------------------------------------------------------------
122
+ ; these links are not traced further if they point back before the root word
123
+
124
+ RESTRICTED_LINKS:
125
+ B#* D##w B#w B#d AFh MVt Xx HL SFsic AFd Bc CX EAh
126
+ H HA PFc B#j Wd PF Z BW
127
+