grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,849 @@
1
+ /***************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* Copyright (c) 2008 Linas Vepstas */
5
+ /* All rights reserved */
6
+ /* */
7
+ /* Use of the link grammar parsing system is subject to the terms of the */
8
+ /* license set forth in the LICENSE file included with this software, */
9
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
10
+ /* This license allows free redistribution and use in source and binary */
11
+ /* forms, with or without modification, subject to certain conditions. */
12
+ /* */
13
+ /***************************************************************************/
14
+
15
+ /****************************************************************************
16
+ *
17
+ * This is a simple example of the link parser API. It similates most of
18
+ * the functionality of the original link grammar parser, allowing sentences
19
+ * to be typed in either interactively or in "batch" mode (if -batch is
20
+ * specified on the command line, and stdin is redirected to a file).
21
+ * The program:
22
+ * Opens up a dictionary
23
+ * Iterates:
24
+ * 1. Reads from stdin to get an input string to parse
25
+ * 2. Tokenizes the string to form a Sentence
26
+ * 3. Tries to parse it with cost 0
27
+ * 4. Tries to parse with increasing cost
28
+ * When a parse is found:
29
+ * 1. Extracts each Linkage
30
+ * 2. Passes it to process_some_linkages()
31
+ * 3. Deletes linkage
32
+ * After parsing each Sentence is deleted by making a call to
33
+ * sentence_delete.
34
+ *
35
+ ****************************************************************************/
36
+
37
+ #include <errno.h>
38
+ #include <locale.h>
39
+ #include <stdio.h>
40
+ #include <stdlib.h>
41
+ #include <string.h>
42
+ #include <wchar.h>
43
+
44
+ /* Used for terminal resizing */
45
+ #ifndef _WIN32
46
+ #include <termios.h>
47
+ #include <sys/ioctl.h>
48
+ #include <fcntl.h>
49
+ #include <unistd.h>
50
+ #endif
51
+
52
+ #ifdef HAVE_EDITLINE
53
+ #include <editline/readline.h>
54
+ #endif
55
+
56
+ #ifdef _MSC_VER
57
+ #define LINK_GRAMMAR_DLL_EXPORT 0
58
+ #endif
59
+
60
+ #include <link-grammar/link-includes.h>
61
+ #include <link-grammar/structures.h>
62
+ #include <link-grammar/error.h>
63
+
64
+ #include "command-line.h"
65
+ #include "expand.h"
66
+
67
+ #define MAX_INPUT 1024
68
+ #define DISPLAY_MAX 1024
69
+ #define COMMENT_CHAR '%' /* input lines beginning with this are ignored */
70
+
71
+ static int batch_errors = 0;
72
+ static int input_pending=FALSE;
73
+ static Parse_Options opts;
74
+ static Parse_Options panic_parse_opts;
75
+ static int verbosity = 0;
76
+
77
+ typedef enum
78
+ {
79
+ UNGRAMMATICAL='*',
80
+ PARSE_WITH_DISJUNCT_COST_GT_0=':', /* Not used anywhere, currently ... */
81
+ NO_LABEL=' '
82
+ } Label;
83
+
84
+ static char *
85
+ fget_input_string(FILE *in, FILE *out, Parse_Options opts)
86
+ {
87
+ #ifdef HAVE_EDITLINE
88
+ static char * pline = NULL;
89
+ const char * prompt = "linkparser> ";
90
+
91
+ if (in != stdin)
92
+ {
93
+ static char input_string[MAX_INPUT];
94
+ input_pending = FALSE;
95
+ if (fgets(input_string, MAX_INPUT, in)) return input_string;
96
+ return NULL;
97
+ }
98
+
99
+ if (input_pending && pline != NULL)
100
+ {
101
+ input_pending = FALSE;
102
+ return pline;
103
+ }
104
+ if (parse_options_get_batch_mode(opts) ||
105
+ (verbosity == 0) ||
106
+ input_pending)
107
+ {
108
+ prompt = "";
109
+ }
110
+ input_pending = FALSE;
111
+ if (pline) free(pline);
112
+ pline = readline(prompt);
113
+
114
+ /* Save non-blank lines */
115
+ if (pline && *pline)
116
+ {
117
+ if (*pline) add_history(pline);
118
+ }
119
+ return pline;
120
+
121
+ #else
122
+ static char input_string[MAX_INPUT];
123
+
124
+ if ((!parse_options_get_batch_mode(opts)) &&
125
+ (verbosity > 0) &&
126
+ (!input_pending))
127
+ {
128
+ fprintf(out, "linkparser> ");
129
+ fflush(out);
130
+ }
131
+ input_pending = FALSE;
132
+
133
+ /* For UTF-8 input, I think its still technically correct to
134
+ * use fgets() and not fgetws() at this point. */
135
+ if (fgets(input_string, MAX_INPUT, in)) return input_string;
136
+ else return NULL;
137
+ #endif
138
+ }
139
+
140
+ static int fget_input_char(FILE *in, FILE *out, Parse_Options opts)
141
+ {
142
+ #ifdef HAVE_EDITLINE
143
+ char * pline = fget_input_string(in, out, opts);
144
+ if (NULL == pline) return EOF;
145
+ if (*pline)
146
+ {
147
+ input_pending = TRUE;
148
+ return *pline;
149
+ }
150
+ return '\n';
151
+
152
+ #else
153
+ int c;
154
+
155
+ if (!parse_options_get_batch_mode(opts) && (verbosity > 0))
156
+ fprintf(out, "linkparser> ");
157
+ fflush(out);
158
+
159
+ /* For UTF-8 input, I think its still technically correct to
160
+ * use fgetc() and not fgetwc() at this point. */
161
+ c = fgetc(in);
162
+ if (c != '\n')
163
+ {
164
+ ungetc(c, in);
165
+ input_pending = TRUE;
166
+ }
167
+ return c;
168
+ #endif
169
+ }
170
+
171
+ /**************************************************************************
172
+ *
173
+ * This procedure displays a linkage graphically. Since the diagrams
174
+ * are passed as character strings, they need to be deleted with a
175
+ * call to free.
176
+ *
177
+ **************************************************************************/
178
+
179
+ static void process_linkage(Linkage linkage, Parse_Options opts)
180
+ {
181
+ char * string;
182
+ int j, mode, first_sublinkage;
183
+ int nlink;
184
+
185
+ if (!linkage) return; /* Can happen in timeout mode */
186
+
187
+ if (parse_options_get_use_fat_links(opts) &&
188
+ parse_options_get_display_union(opts))
189
+ {
190
+ linkage_compute_union(linkage);
191
+ first_sublinkage = linkage_get_num_sublinkages(linkage)-1;
192
+ }
193
+ else
194
+ {
195
+ first_sublinkage = 0;
196
+ }
197
+
198
+ nlink = linkage_get_num_sublinkages(linkage);
199
+ for (j=first_sublinkage; j<nlink; ++j)
200
+ {
201
+ linkage_set_current_sublinkage(linkage, j);
202
+ if (parse_options_get_display_on(opts))
203
+ {
204
+ string = linkage_print_diagram(linkage);
205
+ fprintf(stdout, "%s", string);
206
+ linkage_free_diagram(string);
207
+ }
208
+ if (parse_options_get_display_links(opts))
209
+ {
210
+ string = linkage_print_links_and_domains(linkage);
211
+ fprintf(stdout, "%s", string);
212
+ linkage_free_links_and_domains(string);
213
+ }
214
+ if (parse_options_get_display_senses(opts))
215
+ {
216
+ string = linkage_print_senses(linkage);
217
+ fprintf(stdout, "%s", string);
218
+ linkage_free_senses(string);
219
+ }
220
+ if (parse_options_get_display_disjuncts(opts))
221
+ {
222
+ string = linkage_print_disjuncts(linkage);
223
+ fprintf(stdout, "%s", string);
224
+ linkage_free_disjuncts(string);
225
+ }
226
+ if (parse_options_get_display_postscript(opts))
227
+ {
228
+ string = linkage_print_postscript(linkage, FALSE);
229
+ fprintf(stdout, "%s\n", string);
230
+ linkage_free_postscript(string);
231
+ }
232
+ }
233
+ if ((mode = parse_options_get_display_constituents(opts)))
234
+ {
235
+ string = linkage_print_constituent_tree(linkage, mode);
236
+ if (string != NULL)
237
+ {
238
+ fprintf(stdout, "%s\n", string);
239
+ linkage_free_constituent_tree_str(string);
240
+ }
241
+ else
242
+ {
243
+ fprintf(stderr, "Can't generate constituents.\n");
244
+ fprintf(stderr, "Constituent processing has been turned off.\n");
245
+ }
246
+ }
247
+ }
248
+
249
+ static void print_parse_statistics(Sentence sent, Parse_Options opts)
250
+ {
251
+ if (sentence_num_linkages_found(sent) > 0)
252
+ {
253
+ if (sentence_num_linkages_found(sent) >
254
+ parse_options_get_linkage_limit(opts))
255
+ {
256
+ fprintf(stdout, "Found %d linkage%s (%d of %d random " \
257
+ "linkages had no P.P. violations)",
258
+ sentence_num_linkages_found(sent),
259
+ sentence_num_linkages_found(sent) == 1 ? "" : "s",
260
+ sentence_num_valid_linkages(sent),
261
+ sentence_num_linkages_post_processed(sent));
262
+ }
263
+ else
264
+ {
265
+ fprintf(stdout, "Found %d linkage%s (%d had no P.P. violations)",
266
+ sentence_num_linkages_post_processed(sent),
267
+ sentence_num_linkages_found(sent) == 1 ? "" : "s",
268
+ sentence_num_valid_linkages(sent));
269
+ }
270
+ if (sentence_null_count(sent) > 0)
271
+ {
272
+ fprintf(stdout, " at null count %d", sentence_null_count(sent));
273
+ }
274
+ fprintf(stdout, "\n");
275
+ }
276
+ }
277
+
278
+
279
+ static int process_some_linkages(Sentence sent, Parse_Options opts)
280
+ {
281
+ int c;
282
+ int i, num_to_query, num_to_display, num_displayed;
283
+ Linkage linkage;
284
+ double corpus_cost;
285
+
286
+ if (verbosity > 0) print_parse_statistics(sent, opts);
287
+ num_to_query = MIN(sentence_num_linkages_post_processed(sent),
288
+ DISPLAY_MAX);
289
+ if (!parse_options_get_display_bad(opts))
290
+ {
291
+ num_to_display = MIN(sentence_num_valid_linkages(sent),
292
+ DISPLAY_MAX);
293
+ }
294
+ else
295
+ {
296
+ num_to_display = MIN(sentence_num_linkages_post_processed(sent),
297
+ DISPLAY_MAX);
298
+ }
299
+
300
+ for (i=0, num_displayed=0; i<num_to_query; i++)
301
+ {
302
+ if ((sentence_num_violations(sent, i) > 0) &&
303
+ (!parse_options_get_display_bad(opts)))
304
+ {
305
+ continue;
306
+ }
307
+
308
+ linkage = linkage_create(i, sent, opts);
309
+
310
+ /* Currently, sat solver returns NULL when there ain't no more */
311
+ if (!linkage) break;
312
+
313
+ if (verbosity > 0)
314
+ {
315
+ if ((sentence_num_valid_linkages(sent) == 1) &&
316
+ (!parse_options_get_display_bad(opts)))
317
+ {
318
+ fprintf(stdout, " Unique linkage, ");
319
+ }
320
+ else if ((parse_options_get_display_bad(opts)) &&
321
+ (sentence_num_violations(sent, i) > 0))
322
+ {
323
+ fprintf(stdout, " Linkage %d (bad), ", num_displayed+1);
324
+ }
325
+ else
326
+ {
327
+ fprintf(stdout, " Linkage %d, ", num_displayed+1);
328
+ }
329
+
330
+ if (!linkage_is_canonical(linkage))
331
+ {
332
+ fprintf(stdout, "non-canonical, ");
333
+ }
334
+ if (linkage_is_improper(linkage))
335
+ {
336
+ fprintf(stdout, "improper fat linkage, ");
337
+ }
338
+ if (linkage_has_inconsistent_domains(linkage))
339
+ {
340
+ fprintf(stdout, "inconsistent domains, ");
341
+ }
342
+
343
+ corpus_cost = linkage_corpus_cost(linkage);
344
+ if (corpus_cost < 0.0f)
345
+ {
346
+ fprintf(stdout, "cost vector = (UNUSED=%d DIS=%d FAT=%d AND=%d LEN=%d)\n",
347
+ linkage_unused_word_cost(linkage),
348
+ linkage_disjunct_cost(linkage),
349
+ linkage_is_fat(linkage),
350
+ linkage_and_cost(linkage),
351
+ linkage_link_cost(linkage));
352
+ }
353
+ else
354
+ {
355
+ fprintf(stdout, "cost vector = (CORP=%6.4f UNUSED=%d DIS=%d FAT=%d AND=%d LEN=%d)\n",
356
+ corpus_cost,
357
+ linkage_unused_word_cost(linkage),
358
+ linkage_disjunct_cost(linkage),
359
+ linkage_is_fat(linkage),
360
+ linkage_and_cost(linkage),
361
+ linkage_link_cost(linkage));
362
+ }
363
+ }
364
+
365
+ process_linkage(linkage, opts);
366
+ linkage_delete(linkage);
367
+
368
+ if (++num_displayed < num_to_display)
369
+ {
370
+ if (verbosity > 0)
371
+ {
372
+ fprintf(stdout, "Press RETURN for the next linkage.\n");
373
+ }
374
+ c = fget_input_char(stdin, stdout, opts);
375
+ if (c != '\n') return c;
376
+ }
377
+ else
378
+ {
379
+ break;
380
+ }
381
+ }
382
+ return 'x';
383
+ }
384
+
385
+ static int there_was_an_error(Label label, Sentence sent, Parse_Options opts)
386
+ {
387
+ if (sentence_num_valid_linkages(sent) > 0) {
388
+ if (label == UNGRAMMATICAL) {
389
+ batch_errors++;
390
+ return UNGRAMMATICAL;
391
+ }
392
+ if ((sentence_disjunct_cost(sent, 0) == 0) &&
393
+ (label == PARSE_WITH_DISJUNCT_COST_GT_0)) {
394
+ batch_errors++;
395
+ return PARSE_WITH_DISJUNCT_COST_GT_0;
396
+ }
397
+ } else {
398
+ if (label != UNGRAMMATICAL) {
399
+ batch_errors++;
400
+ return UNGRAMMATICAL;
401
+ }
402
+ }
403
+ return FALSE;
404
+ }
405
+
406
+ static void batch_process_some_linkages(Label label,
407
+ Sentence sent,
408
+ Parse_Options opts)
409
+ {
410
+ Linkage linkage;
411
+
412
+ if (there_was_an_error(label, sent, opts)) {
413
+ /* Note: sentence_num_linkages_found returns total linkages
414
+ * not valid linkages. So the printed linkage might be bad...
415
+ */
416
+ if (sentence_num_linkages_found(sent) > 0) {
417
+ linkage = linkage_create(0, sent, opts);
418
+ process_linkage(linkage, opts);
419
+ linkage_delete(linkage);
420
+ }
421
+ fprintf(stdout, "+++++ error %d\n", batch_errors);
422
+ }
423
+ }
424
+
425
+ static int special_command(char *input_string, Dictionary dict)
426
+ {
427
+ if (input_string[0] == '\n') return TRUE;
428
+ if (input_string[0] == COMMENT_CHAR) return TRUE;
429
+ if (input_string[0] == '!') {
430
+ if (strncmp(input_string, "!panic_", 7) == 0)
431
+ {
432
+ issue_special_command(input_string+7, panic_parse_opts, dict);
433
+ return TRUE;
434
+ }
435
+
436
+ issue_special_command(input_string+1, opts, dict);
437
+ return TRUE;
438
+ }
439
+ return FALSE;
440
+ }
441
+
442
+ static Label strip_off_label(char * input_string)
443
+ {
444
+ Label c;
445
+
446
+ c = (Label) input_string[0];
447
+ switch(c) {
448
+ case UNGRAMMATICAL:
449
+ case PARSE_WITH_DISJUNCT_COST_GT_0:
450
+ input_string[0] = ' ';
451
+ return c;
452
+ case NO_LABEL:
453
+ default:
454
+ return NO_LABEL;
455
+ }
456
+ }
457
+
458
+ static void setup_panic_parse_options(Parse_Options opts)
459
+ {
460
+ parse_options_set_disjunct_costf(opts, 3.0f);
461
+ parse_options_set_min_null_count(opts, 1);
462
+ parse_options_set_max_null_count(opts, MAX_SENTENCE);
463
+ parse_options_set_max_parse_time(opts, 60);
464
+ parse_options_set_use_fat_links(opts, 0);
465
+ parse_options_set_islands_ok(opts, 1);
466
+ parse_options_set_short_length(opts, 6);
467
+ parse_options_set_all_short_connectors(opts, 1);
468
+ parse_options_set_linkage_limit(opts, 100);
469
+ parse_options_set_spell_guess(opts, FALSE);
470
+ }
471
+
472
+ static void print_usage(char *str) {
473
+ fprintf(stderr,
474
+ "Usage: %s [language|dictionary location]\n"
475
+ " [-<special \"!\" command>]\n"
476
+ " [--version]\n", str);
477
+
478
+ fprintf(stderr, "\nSpecial commands are:\n");
479
+ opts = parse_options_create();
480
+ issue_special_command("var", opts, NULL);
481
+ exit(-1);
482
+ }
483
+
484
+ /**
485
+ * On Unix, this checks for the current window size,
486
+ * and sets the output screen width accordingly.
487
+ * Not sure how MS Windows does this.
488
+ */
489
+ static void check_winsize(Parse_Options popts)
490
+ {
491
+ /* Neither windows nor MSYS have the ioctl support needed for this. */
492
+ #ifdef _WIN32
493
+ parse_options_set_screen_width(popts, 79);
494
+ #else
495
+ struct winsize ws;
496
+ int fd = open("/dev/tty", O_RDWR);
497
+
498
+ if (0 != ioctl(fd, TIOCGWINSZ, &ws))
499
+ {
500
+ perror("ioctl(/dev/tty, TIOCGWINSZ)");
501
+ close(fd);
502
+ return;
503
+ }
504
+ close(fd);
505
+
506
+ /* printf("rows %i\n", ws.ws_row); */
507
+ /* printf("cols %i\n", ws.ws_col); */
508
+
509
+ /* Set the screen width only if the returned value seems
510
+ * rational: its positive and not insanely tiny.
511
+ */
512
+ if ((10 < ws.ws_col) && (16123 > ws.ws_col))
513
+ {
514
+ parse_options_set_screen_width(popts, ws.ws_col - 1);
515
+ }
516
+ #endif /* _WIN32 */
517
+ }
518
+
519
+ int main(int argc, char * argv[])
520
+ {
521
+ FILE *input_fh = stdin;
522
+ Dictionary dict;
523
+ const char *language="en"; /* default to english, and not locale */
524
+ int num_linkages, i;
525
+ Label label = NO_LABEL;
526
+ const char *codeset;
527
+
528
+ #if LATER
529
+ /* Try to catch the SIGNWINCH ... except this is not working. */
530
+ struct sigaction winch_act;
531
+ winch_act.sa_handler = winch_handler;
532
+ winch_act.sa_sigaction = NULL;
533
+ sigemptyset (&winch_act.sa_mask);
534
+ winch_act.sa_flags = 0;
535
+ sigaction (SIGWINCH, &winch_act, NULL);
536
+ #endif
537
+
538
+ i = 1;
539
+ if ((argc > 1) && (argv[1][0] != '-')) {
540
+ /* the dictionary is the first argument if it doesn't begin with "-" */
541
+ language = argv[1];
542
+ i++;
543
+ }
544
+
545
+ /* Get the locale from the environment...
546
+ * perhaps we should someday get it from the dictionary ??
547
+ */
548
+ setlocale(LC_ALL, "");
549
+
550
+ /* Check to make sure the current locale is UTF8; if its not,
551
+ * then force-set this to the english utf8 locale
552
+ */
553
+ codeset = nl_langinfo(CODESET);
554
+ if (!strstr(codeset, "UTF") && !strstr(codeset, "utf"))
555
+ {
556
+ fprintf(stderr,
557
+ "%s: Warning: locale %s was not UTF-8; force-setting to en_US.UTF-8\n",
558
+ argv[0], codeset);
559
+ setlocale(LC_CTYPE, "en_US.UTF-8");
560
+ }
561
+
562
+ for (; i<argc; i++)
563
+ {
564
+ if (argv[i][0] == '-')
565
+ {
566
+ if (strcmp("--version", argv[i])==0)
567
+ {
568
+ printf("Version: %s\n", linkgrammar_get_version());
569
+ exit(0);
570
+ }
571
+ /* TBD remove these in version 5.0 */
572
+ else if ((strcmp("-ppoff", argv[i])==0) ||
573
+ (strcmp("-coff", argv[i])==0) ||
574
+ (strcmp("-aoff", argv[i])==0))
575
+ {
576
+ fprintf(stderr, "%s: Warning: %s flag ignored\n", argv[0], argv[i]);
577
+ }
578
+ }
579
+ else
580
+ {
581
+ print_usage(argv[0]);
582
+ }
583
+ }
584
+
585
+ opts = parse_options_create();
586
+ if (opts == NULL)
587
+ {
588
+ fprintf(stderr, "%s: Fatal error: unable to create parse options\n", argv[0]);
589
+ exit(-1);
590
+ }
591
+
592
+ panic_parse_opts = parse_options_create();
593
+ if (panic_parse_opts == NULL)
594
+ {
595
+ fprintf(stderr, "%s: Fatal error: unable to create panic parse options\n", argv[0]);
596
+ exit(-1);
597
+ }
598
+ setup_panic_parse_options(panic_parse_opts);
599
+ parse_options_set_max_sentence_length(opts, 170);
600
+ parse_options_set_panic_mode(opts, TRUE);
601
+ parse_options_set_max_parse_time(opts, 30);
602
+ parse_options_set_linkage_limit(opts, 1000);
603
+ parse_options_set_short_length(opts, 10);
604
+ parse_options_set_disjunct_costf(opts, 2.0f);
605
+ parse_options_set_min_null_count(opts, 0);
606
+ parse_options_set_max_null_count(opts, 0);
607
+
608
+ if (language && *language)
609
+ dict = dictionary_create_lang(language);
610
+ else
611
+ dict = dictionary_create_default_lang();
612
+
613
+ if (dict == NULL)
614
+ {
615
+ fprintf(stderr, "%s: Fatal error: Unable to open dictionary.\n", argv[0]);
616
+ exit(-1);
617
+ }
618
+
619
+ /* Process the command line like commands */
620
+ for (i=1; i<argc; i++)
621
+ {
622
+ /* TBD remove these in version 5.0 */
623
+ if ((strcmp("-pp", argv[i]) == 0) ||
624
+ (strcmp("-c", argv[i]) == 0) ||
625
+ (strcmp("-a", argv[i]) == 0) ||
626
+ (strcmp("-ppoff", argv[i]) == 0) ||
627
+ (strcmp("-coff", argv[i]) == 0) ||
628
+ (strcmp("-aoff", argv[i]) == 0))
629
+ {
630
+ i++;
631
+ }
632
+ else if (argv[i][0] == '-')
633
+ {
634
+ int rc;
635
+ if (argv[i][1] == '!')
636
+ rc = issue_special_command(argv[i]+2, opts, dict);
637
+ else
638
+ rc = issue_special_command(argv[i]+1, opts, dict);
639
+
640
+ if (rc)
641
+ print_usage(argv[0]);
642
+ }
643
+ }
644
+
645
+ verbosity = parse_options_get_verbosity(opts);
646
+ check_winsize(opts);
647
+
648
+ prt_error("Info: Dictionary version %s.\n",
649
+ linkgrammar_get_dict_version(dict));
650
+ prt_error("Info: Library version %s. Enter \"!help\" for help.\n",
651
+ linkgrammar_get_version());
652
+
653
+ /* Main input loop */
654
+ while (1)
655
+ {
656
+ char *input_string;
657
+ Sentence sent = NULL;
658
+
659
+ input_string = fget_input_string(input_fh, stdout, opts);
660
+ check_winsize(opts);
661
+
662
+ if (NULL == input_string)
663
+ {
664
+ if (input_fh == stdin) break;
665
+ fclose (input_fh);
666
+ input_fh = stdin;
667
+ continue;
668
+ }
669
+
670
+ if ((strcmp(input_string, "!quit") == 0) ||
671
+ (strcmp(input_string, "!exit") == 0)) break;
672
+
673
+ /* We have to handle the !file command inline; its too hairy
674
+ * otherwise ... */
675
+ if (strncmp(input_string, "!file", 5) == 0)
676
+ {
677
+ char * filename = &input_string[6];
678
+ input_fh = fopen(filename, "r");
679
+ if (NULL == input_fh)
680
+ {
681
+ int perr = errno;
682
+ fprintf(stderr, "Error: %s (%d) %s\n",
683
+ filename, perr, strerror(perr));
684
+ input_fh = stdin;
685
+ continue;
686
+ }
687
+ continue;
688
+ }
689
+
690
+ if (special_command(input_string, dict)) continue;
691
+ if (parse_options_get_echo_on(opts))
692
+ {
693
+ printf("%s ", input_string);
694
+ }
695
+
696
+ if (parse_options_get_batch_mode(opts))
697
+ {
698
+ label = strip_off_label(input_string);
699
+ }
700
+
701
+ sent = sentence_create(input_string, dict);
702
+
703
+ if (sent == NULL) continue;
704
+
705
+ if (sentence_length(sent) > parse_options_get_max_sentence_length(opts))
706
+ {
707
+ if (verbosity > 0)
708
+ {
709
+ fprintf(stdout,
710
+ "Sentence length (%d words) exceeds maximum allowable (%d words)\n",
711
+ sentence_length(sent), parse_options_get_max_sentence_length(opts));
712
+ }
713
+ sentence_delete(sent);
714
+ sent = NULL;
715
+ continue;
716
+ }
717
+
718
+ /* First parse with cost 0 or 1 and no null links */
719
+ // parse_options_set_disjunct_costf(opts, 2.0f);
720
+ parse_options_set_min_null_count(opts, 0);
721
+ parse_options_set_max_null_count(opts, 0);
722
+ parse_options_reset_resources(opts);
723
+
724
+ num_linkages = sentence_parse(sent, opts);
725
+ if (num_linkages < 0)
726
+ {
727
+ sentence_delete(sent);
728
+ sent = NULL;
729
+ continue;
730
+ }
731
+
732
+ #if 0
733
+ /* Try again, this time ommitting the requirement for
734
+ * definite articles, etc. This should allow for the parsing
735
+ * of newspaper headlines and other clipped speech.
736
+ *
737
+ * XXX Unfortunately, this also allows for the parsing of
738
+ * all sorts of ungrammatical sentences which should not
739
+ * parse, and leads to bad parses of many other unparsable
740
+ * but otherwise grammatical sentences. Thus, this trick
741
+ * pretty much fails; we leave it here to document the
742
+ * experiment.
743
+ */
744
+ if (num_linkages == 0)
745
+ {
746
+ parse_options_set_disjunct_costf(opts, 3.5f);
747
+ num_linkages = sentence_parse(sent, opts);
748
+ if (num_linkages < 0) continue;
749
+ }
750
+ #endif
751
+
752
+ /* Try using a larger list of disjuncts */
753
+ if ((num_linkages == 0) && parse_options_get_use_cluster_disjuncts(opts))
754
+ {
755
+ int expanded;
756
+ if (verbosity > 0) fprintf(stdout, "No standard linkages, expanding disjunct set.\n");
757
+ parse_options_set_disjunct_costf(opts, 2.9f);
758
+ expanded = lg_expand_disjunct_list(sent);
759
+ if (expanded)
760
+ {
761
+ num_linkages = sentence_parse(sent, opts);
762
+ }
763
+ if (0 < num_linkages) printf("Got One !!!!!!!!!!!!!!!!!\n");
764
+ }
765
+
766
+ /* If asked to show bad linkages, then show them. */
767
+ if ((num_linkages == 0) && (!parse_options_get_batch_mode(opts)))
768
+ {
769
+ if (parse_options_get_display_bad(opts))
770
+ {
771
+ num_linkages = sentence_num_linkages_found(sent);
772
+ }
773
+ }
774
+
775
+ /* Now parse with null links */
776
+ if ((num_linkages == 0) && (!parse_options_get_batch_mode(opts)))
777
+ {
778
+ if (verbosity > 0) fprintf(stdout, "No complete linkages found.\n");
779
+
780
+ if (parse_options_get_allow_null(opts))
781
+ {
782
+ /* XXX should use expanded disjunct list here too */
783
+ parse_options_set_min_null_count(opts, 1);
784
+ parse_options_set_max_null_count(opts, sentence_length(sent));
785
+ num_linkages = sentence_parse(sent, opts);
786
+ }
787
+ }
788
+
789
+ if (parse_options_timer_expired(opts))
790
+ {
791
+ if (verbosity > 0) fprintf(stdout, "Timer is expired!\n");
792
+ }
793
+ if (parse_options_memory_exhausted(opts))
794
+ {
795
+ if (verbosity > 0) fprintf(stdout, "Memory is exhausted!\n");
796
+ }
797
+
798
+ if ((num_linkages == 0) &&
799
+ parse_options_resources_exhausted(opts) &&
800
+ parse_options_get_panic_mode(opts))
801
+ {
802
+ /* print_total_time(opts); */
803
+ batch_errors++;
804
+ if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n");
805
+ parse_options_reset_resources(panic_parse_opts);
806
+ parse_options_set_verbosity(panic_parse_opts, verbosity);
807
+ num_linkages = sentence_parse(sent, panic_parse_opts);
808
+ if (parse_options_timer_expired(panic_parse_opts)) {
809
+ if (verbosity > 0) fprintf(stdout, "Timer is expired!\n");
810
+ }
811
+ }
812
+
813
+ /* print_total_time(opts); */
814
+
815
+ if (parse_options_get_batch_mode(opts))
816
+ {
817
+ batch_process_some_linkages(label, sent, opts);
818
+ }
819
+ else
820
+ {
821
+ int c = process_some_linkages(sent, opts);
822
+ if (c == EOF)
823
+ {
824
+ sentence_delete(sent);
825
+ sent = NULL;
826
+ break;
827
+ }
828
+ }
829
+ fflush(stdout);
830
+
831
+ sentence_delete(sent);
832
+ sent = NULL;
833
+ }
834
+
835
+ if (parse_options_get_batch_mode(opts))
836
+ {
837
+ /* print_time(opts, "Total"); */
838
+ fprintf(stderr,
839
+ "%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s");
840
+ }
841
+
842
+ /* Free stuff, so that mem-leak detectors don't commplain. */
843
+ parse_options_delete(panic_parse_opts);
844
+ parse_options_delete(opts);
845
+ dictionary_delete(dict);
846
+
847
+ printf ("Bye.\n");
848
+ return 0;
849
+ }