grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,281 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ #ifndef _LINK_GRAMMAR_UTILITIES_H_
14
+ #define _LINK_GRAMMAR_UTILITIES_H_
15
+
16
+ #ifdef __CYGWIN__
17
+ #define _WIN32 1
18
+ #endif /* __CYGWIN__ */
19
+
20
+ #ifndef _WIN32
21
+ #include <langinfo.h>
22
+ #endif
23
+
24
+ #include <ctype.h>
25
+ #include <stdio.h>
26
+ #include <stdlib.h>
27
+ #include <string.h>
28
+
29
+ #ifndef __CYGWIN__
30
+ /* I was told that cygwin does not have these files. */
31
+ #include <wchar.h>
32
+ #include <wctype.h>
33
+ #endif
34
+
35
+ #if defined(__CYGWIN__) && defined(__MINGW32__)
36
+ /* Some users have CygWin and MinGW installed!
37
+ * In this case, use the MinGW versions of UTF-8 support. */
38
+ #include <wchar.h>
39
+ #include <wctype.h>
40
+ #endif
41
+
42
+ #include "error.h"
43
+
44
+
45
+ #ifdef _WIN32
46
+ #include <windows.h>
47
+
48
+ #ifdef _MSC_VER
49
+ /* The Microsoft Visual C compiler doesn't support the "inline" keyword. */
50
+ #define inline
51
+
52
+ /* MS Visual C does not have any function normally found in strings.h */
53
+ /* In particular, be careful to avoid including strings.h */
54
+
55
+ /* MS Visual C uses non-standard string function names */
56
+ #define snprintf _snprintf
57
+ #define vsnprintf _vsnprintf
58
+ #define strcasecmp _stricmp
59
+ #define strdup _strdup
60
+ #define strncasecmp(a,b,s) strnicmp((a),(b),(s))
61
+
62
+ /* MS Visual C does not support some C99 standard floating-point functions */
63
+ #define fmaxf(a,b) ((a) > (b) ? (a) : (b))
64
+
65
+ #endif /* _MSC_VER */
66
+
67
+ /* Appearently, MinGW is also missing a variety of standard fuctions.
68
+ * Not surprising, since MinGW is intended for compiling Windows
69
+ * programs on Windows.
70
+ * MINGW is also known as MSYS */
71
+ #if defined(_MSC_VER) || defined(__MINGW32__)
72
+
73
+ /* No langinfo in Windows or MinGW */
74
+ #define nl_langinfo(X) ""
75
+
76
+ /* strtok_r is missing in Windows */
77
+ char * strtok_r (char *s, const char *delim, char **saveptr);
78
+
79
+ /* Windows doesn't have a thread-safe rand (???) */
80
+ /* Surely not, there must be something */
81
+ /* XXX FIXME -- this breaks thread safety on windows */
82
+ #define rand_r(seedp) rand()
83
+ #endif /* _MSC_VER || __MINGW32__ */
84
+
85
+ /*
86
+ * CYGWIN on Windows doesn't have UTF8 support, or wide chars ...
87
+ * However, MS Visual C appearently does, as does MinGW. Since
88
+ * some users have both cygwin and MinGW installed, crap out the
89
+ * UTF8 code only when MinGW is missing.
90
+ */
91
+ #if defined (__CYGWIN__) && !defined(__MINGW32__)
92
+ #define mbstate_t char
93
+ #define mbrtowc(w,s,n,x) ({*((char *)(w)) = *(s); 1;})
94
+ #define wcrtomb(s,w,x) ({*((char *)(s)) = ((char)(w)); 1;})
95
+ #define iswupper isupper
96
+ #define iswalpha isalpha
97
+ #define iswdigit isdigit
98
+ #define iswspace isspace
99
+ #define wchar_t char
100
+ #define wint_t int
101
+ #define fgetwc fgetc
102
+ #define WEOF EOF
103
+ #define towlower tolower
104
+ #define towupper toupper
105
+ #endif /* __CYGWIN__ and not __MINGW32__ */
106
+
107
+ #endif /* _WIN32 */
108
+
109
+ #if defined(__sun__)
110
+ int strncasecmp(const char *s1, const char *s2, size_t n);
111
+ /* This does not appear to be in string.h header file in sunos
112
+ (Or in linux when I compile with -ansi) */
113
+ #endif
114
+
115
+ #ifndef FALSE
116
+ #define FALSE 0
117
+ #endif
118
+
119
+ #ifndef TRUE
120
+ #define TRUE 1
121
+ #endif
122
+
123
+ #define assert(ex,string) { \
124
+ if (!(ex)) { \
125
+ prt_error("Assertion failed: %s\n", string); \
126
+ exit(1); \
127
+ } \
128
+ }
129
+
130
+ #if !defined(MIN)
131
+ #define MIN(X,Y) ( ((X) < (Y)) ? (X) : (Y))
132
+ #endif
133
+ #if !defined(MAX)
134
+ #define MAX(X,Y) ( ((X) > (Y)) ? (X) : (Y))
135
+ #endif
136
+
137
+
138
+ static inline int wctomb_check(char *s, wchar_t wc, mbstate_t *ps)
139
+ {
140
+ int nr = wcrtomb(s, wc, ps);
141
+ if (nr < 0) {
142
+ prt_error("Fatal Error: unknown character set %s\n", nl_langinfo(CODESET));
143
+ exit(1);
144
+ }
145
+ return nr;
146
+ }
147
+
148
+ static inline int is_utf8_upper(const char *s)
149
+ {
150
+ mbstate_t mbs;
151
+ wchar_t c;
152
+ int nbytes;
153
+
154
+ memset(&mbs, 0, sizeof(mbs));
155
+ nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
156
+ if (iswupper(c)) return nbytes;
157
+ return 0;
158
+ }
159
+
160
+ static inline int is_utf8_alpha(const char *s)
161
+ {
162
+ mbstate_t mbs;
163
+ wchar_t c;
164
+ int nbytes;
165
+
166
+ memset(&mbs, 0, sizeof(mbs));
167
+ nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
168
+ if (iswalpha(c)) return nbytes;
169
+ return 0;
170
+ }
171
+
172
+ static inline int is_utf8_digit(const char *s)
173
+ {
174
+ mbstate_t mbs;
175
+ wchar_t c;
176
+ int nbytes;
177
+
178
+ memset(&mbs, 0, sizeof(mbs));
179
+ nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
180
+ if (iswdigit(c)) return nbytes;
181
+ return 0;
182
+ }
183
+
184
+ static inline int is_utf8_space(const char *s)
185
+ {
186
+ mbstate_t mbs;
187
+ wchar_t c;
188
+ int nbytes;
189
+
190
+ memset(&mbs, 0, sizeof(mbs));
191
+ nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
192
+ if (iswspace(c)) return nbytes;
193
+ return 0;
194
+ }
195
+
196
+ static inline const char * skip_utf8_upper(const char * s)
197
+ {
198
+ int nb = is_utf8_upper(s);
199
+ while (nb)
200
+ {
201
+ s += nb;
202
+ nb = is_utf8_upper(s);
203
+ }
204
+ return s;
205
+ }
206
+
207
+ /**
208
+ * Return true if the intial upper-case letters of the
209
+ * two input strings match. Comparison stops when
210
+ * both srings descend to lowercase.
211
+ */
212
+ static inline int utf8_upper_match(const char * s, const char * t)
213
+ {
214
+ mbstate_t mbs, mbt;
215
+ wchar_t ws, wt;
216
+ int ns, nt;
217
+
218
+ memset(&mbs, 0, sizeof(mbs));
219
+ memset(&mbt, 0, sizeof(mbt));
220
+
221
+ ns = mbrtowc(&ws, s, MB_CUR_MAX, &mbs);
222
+ nt = mbrtowc(&wt, t, MB_CUR_MAX, &mbt);
223
+ while (iswupper(ws) || iswupper(wt))
224
+ {
225
+ if (ws != wt) return FALSE;
226
+ s += ns;
227
+ t += nt;
228
+ ns = mbrtowc(&ws, s, MB_CUR_MAX, &mbs);
229
+ nt = mbrtowc(&wt, t, MB_CUR_MAX, &mbt);
230
+ }
231
+ return TRUE;
232
+ }
233
+
234
+ void downcase_utf8_str(char *to, const char * from, size_t usize);
235
+ void upcase_utf8_str(char *to, const char * from, size_t usize);
236
+
237
+ size_t lg_strlcpy(char * dest, const char *src, size_t size);
238
+ void safe_strcpy(char *u, const char * v, size_t usize);
239
+ void safe_strcat(char *u, const char *v, size_t usize);
240
+ char *safe_strdup(const char *u);
241
+
242
+ void left_print_string(FILE* fp, const char *, const char *);
243
+
244
+ /* routines for allocating basic objects */
245
+ void init_memusage(void);
246
+ void * xalloc(size_t);
247
+ void * xrealloc(void *, size_t oldsize, size_t newsize);
248
+ void * exalloc(size_t);
249
+
250
+ #define TRACK_SPACE_USAGE
251
+ #ifdef TRACK_SPACE_USAGE
252
+ void xfree(void *, size_t);
253
+ void exfree(void *, size_t);
254
+ #else /* TRACK_SPACE_USAGE */
255
+ static inline void xfree(void *p, size_t sz) { free(p); }
256
+ static inline void exfree(void *p, size_t sz) { free(p); };
257
+ #endif /* TRACK_SPACE_USAGE */
258
+
259
+ size_t get_space_in_use(void);
260
+ size_t get_max_space_used(void);
261
+
262
+
263
+ char * get_default_locale(void);
264
+ char * join_path(const char * prefix, const char * suffix);
265
+
266
+ FILE * dictopen(const char *filename, const char *how);
267
+ void * object_open(const char *filename,
268
+ void * (*opencb)(const char *, void *),
269
+ void * user_data);
270
+
271
+ /**
272
+ * Returns the smallest power of two that is at least i and at least 1
273
+ */
274
+ static inline int next_power_of_two_up(int i)
275
+ {
276
+ int j=1;
277
+ while(j<i) j = j<<1;
278
+ return j;
279
+ }
280
+
281
+ #endif
@@ -0,0 +1,124 @@
1
+ /***************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /***************************************************************************/
13
+
14
+ #include <wchar.h>
15
+ #include <wctype.h>
16
+ #include "api.h"
17
+ #include "error.h"
18
+
19
+ /**
20
+ * Reads in one word from the file, allocates space for it,
21
+ * and returns it.
22
+ */
23
+ static const char * get_a_word(Dictionary dict, FILE * fp)
24
+ {
25
+ char word[MAX_WORD+4]; /* allow for 4-byte wide chars */
26
+ const char * s;
27
+ wint_t c;
28
+ mbstate_t mbss;
29
+ int j;
30
+
31
+ do {
32
+ c = fgetwc(fp);
33
+ } while ((c != WEOF) && iswspace(c));
34
+ if (c == WEOF) return NULL;
35
+
36
+ memset(&mbss, 0, sizeof(mbss));
37
+ for (j=0; (j <= MAX_WORD-1) && (!iswspace(c)) && (c != WEOF);)
38
+ {
39
+ j += wctomb_check(&word[j], c, &mbss);
40
+ c = fgetwc(fp);
41
+ }
42
+
43
+ if (j >= MAX_WORD) {
44
+ word[MAX_WORD] = 0x0;
45
+ prt_error("Fatal Error: The dictionary contains a word that "
46
+ "is too long. The word was: %s", word);
47
+ exit(1);
48
+ }
49
+ word[j] = '\0';
50
+ s = string_set_add(word, dict->string_set);
51
+ return s;
52
+ }
53
+
54
+ /**
55
+ *
56
+ * (1) opens the word file and adds it to the word file list
57
+ * (2) reads in the words
58
+ * (3) puts each word in a Dict_node
59
+ * (4) links these together by their left pointers at the
60
+ * front of the list pointed to by dn
61
+ * (5) returns a pointer to the first of this list
62
+ */
63
+ Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename)
64
+ {
65
+ Dict_node * dn_new;
66
+ Word_file * wf;
67
+ FILE * fp;
68
+ const char * s;
69
+ char file_name_copy[MAX_PATH_NAME+1];
70
+
71
+ safe_strcpy(file_name_copy, filename+1, sizeof(file_name_copy)); /* get rid of leading '/' */
72
+
73
+ if ((fp = dictopen(file_name_copy, "r")) == NULL) {
74
+ prt_error("Error opening word file %s\n", file_name_copy);
75
+ return NULL;
76
+ }
77
+
78
+ /*printf(" Reading \"%s\"\n", file_name_copy);*/
79
+ /*printf("*"); fflush(stdout);*/
80
+
81
+ wf = (Word_file *) xalloc(sizeof (Word_file));
82
+ safe_strcpy(wf->file, file_name_copy, sizeof(wf->file));
83
+ wf->changed = FALSE;
84
+ wf->next = dict->word_file_header;
85
+ dict->word_file_header = wf;
86
+
87
+ while ((s = get_a_word(dict, fp)) != NULL) {
88
+ dn_new = (Dict_node *) xalloc(sizeof(Dict_node));
89
+ dn_new->left = dn;
90
+ dn = dn_new;
91
+ dn->string = s;
92
+ dn->file = wf;
93
+ }
94
+ fclose(fp);
95
+ return dn;
96
+ }
97
+
98
+ void save_files(Dictionary dict)
99
+ {
100
+ Word_file *wf;
101
+ FILE *fp;
102
+ for (wf = dict->word_file_header; wf != NULL; wf = wf->next) {
103
+ if (wf->changed) {
104
+ if ((fp = fopen(wf->file, "w")) == NULL) {
105
+ printf("\nCannot open %s. Gee, this shouldn't happen.\n", wf->file);
106
+ printf("file not saved\n");
107
+ return;
108
+ }
109
+ printf(" saving file \"%s\"\n", wf->file);
110
+ /*output_dictionary(dict_root, fp, wf);*/
111
+ fclose(fp);
112
+ wf->changed = FALSE;
113
+ }
114
+ }
115
+ }
116
+
117
+ int files_need_saving(Dictionary dict)
118
+ {
119
+ Word_file *wf;
120
+ for (wf = dict->word_file_header; wf != NULL; wf = wf->next) {
121
+ if (wf->changed) return TRUE;
122
+ }
123
+ return FALSE;
124
+ }
@@ -0,0 +1,15 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ int files_need_saving(Dictionary dict);
14
+ void save_files(Dictionary dict);
15
+ Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename);
@@ -0,0 +1,526 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ /*
14
+ * Miscellaneous utilities for dealing with word types.
15
+ */
16
+
17
+ #include <math.h>
18
+ #include <stdio.h>
19
+
20
+ #include "api.h"
21
+ #include "disjunct-utils.h"
22
+ #include "word-utils.h"
23
+
24
+ /* ======================================================== */
25
+ /* Exp utilities ... */
26
+
27
+ void free_E_list(E_list *);
28
+ void free_Exp(Exp * e)
29
+ {
30
+ if (e->type != CONNECTOR_type) {
31
+ free_E_list(e->u.l);
32
+ }
33
+ xfree((char *)e, sizeof(Exp));
34
+ }
35
+
36
+ void free_E_list(E_list * l)
37
+ {
38
+ if (l == NULL) return;
39
+ free_E_list(l->next);
40
+ free_Exp(l->e);
41
+ xfree((char *)l, sizeof(E_list));
42
+ }
43
+
44
+ /* Returns the number of connectors in the expression e */
45
+ int size_of_expression(Exp * e)
46
+ {
47
+ int size;
48
+ E_list * l;
49
+ if (e->type == CONNECTOR_type) return 1;
50
+ size = 0;
51
+ for (l=e->u.l; l!=NULL; l=l->next) {
52
+ size += size_of_expression(l->e);
53
+ }
54
+ return size;
55
+ }
56
+
57
+ /**
58
+ * Build a copy of the given expression (don't copy strings, of course)
59
+ */
60
+ static E_list * copy_E_list(E_list * l);
61
+ Exp * copy_Exp(Exp * e)
62
+ {
63
+ Exp * n;
64
+ if (e == NULL) return NULL;
65
+ n = (Exp *) xalloc(sizeof(Exp));
66
+ *n = *e;
67
+ if (e->type != CONNECTOR_type) {
68
+ n->u.l = copy_E_list(e->u.l);
69
+ }
70
+ return n;
71
+ }
72
+
73
+ static E_list * copy_E_list(E_list * l)
74
+ {
75
+ E_list * nl;
76
+ if (l == NULL) return NULL;
77
+ nl = (E_list *) xalloc(sizeof(E_list));
78
+ nl->next = copy_E_list(l->next);
79
+ nl->e = copy_Exp(l->e);
80
+ return nl;
81
+ }
82
+
83
+ /**
84
+ * Compare two expressions, return 1 for equal, 0 for unequal
85
+ */
86
+ static int exp_compare(Exp * e1, Exp * e2)
87
+ {
88
+ E_list *el1, *el2;
89
+
90
+ if ((e1 == NULL) && (e2 == NULL))
91
+ return 1; /* they are equal */
92
+ if ((e1 == NULL) || (e2 == NULL))
93
+ return 0; /* they are not equal */
94
+ if (e1->type != e2->type)
95
+ return 0;
96
+ if (fabs (e1->cost - e2->cost) > 0.001)
97
+ return 0;
98
+ if (e1->type == CONNECTOR_type)
99
+ {
100
+ if (e1->dir != e2->dir)
101
+ return 0;
102
+ /* printf("%s %s\n",e1->u.string,e2->u.string); */
103
+ if (strcmp(e1->u.string,e2->u.string)!=0)
104
+ return 0;
105
+ }
106
+ else
107
+ {
108
+ el1 = e1->u.l;
109
+ el2 = e2->u.l;
110
+ /* while at least 1 is non-null */
111
+ for (;(el1!=NULL)||(el2!=NULL);) {
112
+ /*fail if 1 is null */
113
+ if ((el1==NULL)||(el2==NULL))
114
+ return 0;
115
+ /* fail if they are not compared */
116
+ if (exp_compare(el1->e, el2->e) == 0)
117
+ return 0;
118
+ if (el1!=NULL)
119
+ el1 = el1->next;
120
+ if (el2!=NULL)
121
+ el2 = el2->next;
122
+ }
123
+ }
124
+ return 1; /* if never returned 0, return 1 */
125
+ }
126
+
127
+ /**
128
+ * Sub-expression matcher -- return 1 if sub is non-NULL and
129
+ * contained in super, 0 otherwise.
130
+ */
131
+ static int exp_contains(Exp * super, Exp * sub)
132
+ {
133
+ E_list * el;
134
+
135
+ #ifdef DEBUG
136
+ printf("SUP: ");
137
+ if (super) print_expression(super);
138
+ printf("\n");
139
+ #endif
140
+
141
+ if (sub==NULL || super==NULL)
142
+ return 0;
143
+ if (exp_compare(sub,super)==1)
144
+ return 1;
145
+ if (super->type==CONNECTOR_type)
146
+ return 0; /* super is a leaf */
147
+
148
+ /* proceed through supers children and return 1 if sub
149
+ is contained in any of them */
150
+ for(el = super->u.l; el!=NULL; el=el->next) {
151
+ if (exp_contains(el->e, sub)==1)
152
+ return 1;
153
+ }
154
+ return 0;
155
+ }
156
+
157
+ /* ======================================================== */
158
+ /* X_node utilities ... */
159
+ /**
160
+ * frees the list of X_nodes pointed to by x, and all of the expressions
161
+ */
162
+ void free_X_nodes(X_node * x)
163
+ {
164
+ X_node * y;
165
+ for (; x!= NULL; x = y) {
166
+ y = x->next;
167
+ free_Exp(x->exp);
168
+ xfree((char *)x, sizeof(X_node));
169
+ }
170
+ }
171
+
172
+ /**
173
+ * Destructively catenates the two disjunct lists d1 followed by d2.
174
+ * Doesn't change the contents of the disjuncts.
175
+ * Traverses the first list, but not the second.
176
+ */
177
+ X_node * catenate_X_nodes(X_node *d1, X_node *d2)
178
+ {
179
+ X_node * dis = d1;
180
+
181
+ if (d1 == NULL) return d2;
182
+ if (d2 == NULL) return d1;
183
+ while (dis->next != NULL) dis = dis->next;
184
+ dis->next = d2;
185
+ return d1;
186
+ }
187
+
188
+ /* ======================================================== */
189
+ /* Connector utilities ... */
190
+
191
+ /**
192
+ * free_connectors() -- free the list of connectors pointed to by e
193
+ * (does not free any strings)
194
+ */
195
+ void free_connectors(Connector *e)
196
+ {
197
+ Connector * n;
198
+ for (; e != NULL; e = n)
199
+ {
200
+ n = e->next;
201
+ xfree((char *)e, sizeof(Connector));
202
+ }
203
+ }
204
+
205
+ void exfree_connectors(Connector *e)
206
+ {
207
+ Connector * n;
208
+ for(;e != NULL; e = n) {
209
+ n = e->next;
210
+ exfree((void *) e->string, sizeof(char)*(strlen(e->string)+1));
211
+ exfree(e, sizeof(Connector));
212
+ }
213
+ }
214
+
215
+ Connector * connector_new(void)
216
+ {
217
+ Connector *c = (Connector *) xalloc(sizeof(Connector));
218
+ c->length_limit = UNLIMITED_LEN;
219
+ c->string = "";
220
+ c->label = NORMAL_LABEL;
221
+ c->hash = -1;
222
+ c->priority = THIN_priority;
223
+ c->multi = FALSE;
224
+ c->next = NULL;
225
+ c->tableNext = NULL;
226
+ return c;
227
+ }
228
+
229
+ Connector * init_connector(Connector *c)
230
+ {
231
+ c->hash = -1;
232
+ c->length_limit = UNLIMITED_LEN;
233
+ return c;
234
+ }
235
+
236
+ /**
237
+ * This builds a new copy of the connector list pointed to by c.
238
+ * Strings, as usual, are not copied.
239
+ */
240
+ Connector * copy_connectors(Connector * c)
241
+ {
242
+ Connector *c1;
243
+ if (c == NULL) return NULL;
244
+ c1 = connector_new();
245
+ *c1 = *c;
246
+ c1->next = copy_connectors(c->next);
247
+ return c1;
248
+ }
249
+
250
+ Connector * excopy_connectors(Connector * c)
251
+ {
252
+ char * s;
253
+ Connector *c1;
254
+
255
+ if (c == NULL) return NULL;
256
+
257
+ c1 = connector_new();
258
+ *c1 = *c;
259
+ s = (char *) exalloc(sizeof(char)*(strlen(c->string)+1));
260
+ strcpy(s, c->string);
261
+ c1->string = s;
262
+ c1->next = excopy_connectors(c->next);
263
+
264
+ return c1;
265
+ }
266
+
267
+ /* ======================================================== */
268
+ /* Link utilities ... */
269
+
270
+ Link * excopy_link(Link * l)
271
+ {
272
+ char * s;
273
+ Link * newl;
274
+
275
+ if (l == NULL) return NULL;
276
+
277
+ newl = (Link *) exalloc(sizeof(Link));
278
+ s = (char *) exalloc(sizeof(char)*(strlen(l->name)+1));
279
+ strcpy(s, l->name);
280
+ newl->name = s;
281
+ newl->l = l->l;
282
+ newl->r = l->r;
283
+ newl->lc = excopy_connectors(l->lc);
284
+ newl->rc = excopy_connectors(l->rc);
285
+
286
+ return newl;
287
+ }
288
+
289
+ void exfree_link(Link * l)
290
+ {
291
+ exfree_connectors(l->rc);
292
+ exfree_connectors(l->lc);
293
+ exfree((void *)l->name, sizeof(char)*(strlen(l->name)+1));
294
+ exfree(l, sizeof(Link));
295
+ }
296
+
297
+ /* ======================================================== */
298
+ /* Connector-set utilities ... */
299
+ /**
300
+ * This hash function only looks at the leading upper case letters of
301
+ * the string, and the direction, '+' or '-'.
302
+ */
303
+ static int connector_set_hash(Connector_set *conset, const char * s, int d)
304
+ {
305
+ unsigned int i;
306
+ /* djb2 hash */
307
+ i = 5381;
308
+ i = ((i << 5) + i) + d;
309
+ while (isupper((int) *s)) /* connector tables cannot contain UTF8, yet */
310
+ {
311
+ i = ((i << 5) + i) + *s;
312
+ s++;
313
+ }
314
+ return (i & (conset->table_size-1));
315
+ }
316
+
317
+ static void build_connector_set_from_expression(Connector_set * conset, Exp * e)
318
+ {
319
+ E_list * l;
320
+ Connector * c;
321
+ int h;
322
+ if (e->type == CONNECTOR_type)
323
+ {
324
+ c = connector_new();
325
+ c->string = e->u.string;
326
+ c->word = e->dir; /* just use the word field to give the dir */
327
+ h = connector_set_hash(conset, c->string, c->word);
328
+ c->next = conset->hash_table[h];
329
+ conset->hash_table[h] = c;
330
+ } else {
331
+ for (l=e->u.l; l!=NULL; l=l->next) {
332
+ build_connector_set_from_expression(conset, l->e);
333
+ }
334
+ }
335
+ }
336
+
337
+ Connector_set * connector_set_create(Exp *e)
338
+ {
339
+ int i;
340
+ Connector_set *conset;
341
+
342
+ conset = (Connector_set *) xalloc(sizeof(Connector_set));
343
+ conset->table_size = next_power_of_two_up(size_of_expression(e));
344
+ conset->hash_table =
345
+ (Connector **) xalloc(conset->table_size * sizeof(Connector *));
346
+ for (i=0; i<conset->table_size; i++) conset->hash_table[i] = NULL;
347
+ build_connector_set_from_expression(conset, e);
348
+ return conset;
349
+ }
350
+
351
+ void connector_set_delete(Connector_set * conset)
352
+ {
353
+ int i;
354
+ if (conset == NULL) return;
355
+ for (i=0; i<conset->table_size; i++) free_connectors(conset->hash_table[i]);
356
+ xfree(conset->hash_table, conset->table_size * sizeof(Connector *));
357
+ xfree(conset, sizeof(Connector_set));
358
+ }
359
+
360
+ /**
361
+ * Returns TRUE the given connector is in this conset. FALSE otherwise.
362
+ * d='+' means this connector is on the right side of the disjunct.
363
+ * d='-' means this connector is on the left side of the disjunct.
364
+ */
365
+ int match_in_connector_set(Sentence sent, Connector_set *conset, Connector * c, int d)
366
+ {
367
+ int h;
368
+ Connector * c1;
369
+ if (conset == NULL) return FALSE;
370
+ h = connector_set_hash(conset, c->string, d);
371
+ for (c1 = conset->hash_table[h]; c1 != NULL; c1 = c1->next)
372
+ {
373
+ if (x_match(sent, c1, c) && (d == c1->word)) return TRUE;
374
+ }
375
+ return FALSE;
376
+ }
377
+
378
+ /* ======================================================== */
379
+ /* More connector utilities ... */
380
+
381
+ /**
382
+ * This is like the basic "match" function in count.c - the basic
383
+ * connector-matching function used in parsing - except it ignores
384
+ * "priority" (used to handle fat links)
385
+ */
386
+ static int easy_match(const char * s, const char * t)
387
+ {
388
+ while(isupper((int)*s) || isupper((int)*t)) {
389
+ if (*s != *t) return FALSE;
390
+ s++;
391
+ t++;
392
+ }
393
+
394
+ while ((*s!='\0') && (*t!='\0')) {
395
+ if ((*s == '*') || (*t == '*') ||
396
+ ((*s == *t) && (*s != '^'))) {
397
+ s++;
398
+ t++;
399
+ } else return FALSE;
400
+ }
401
+ return TRUE;
402
+ }
403
+
404
+ /**
405
+ * word_has_connector() -- return TRUE if dictionary expression has connector
406
+ * This function takes a dict_node (corresponding to an entry in a
407
+ * given dictionary), a string (representing a connector), and a
408
+ * direction (0 = right-pointing, 1 = left-pointing); it returns 1
409
+ * if the dictionary expression for the word includes the connector,
410
+ * 0 otherwise. This can be used to see if a word is in a certain
411
+ * category (checking for a category connector in a table), or to see
412
+ * if a word has a connector in a normal dictionary. The connector
413
+ * check uses a "smart-match", the same kind used by the parser.
414
+ */
415
+ int word_has_connector(Dict_node * dn, const char * cs, int direction)
416
+ {
417
+ Connector * c2=NULL;
418
+ Disjunct * d, *d0;
419
+ if(dn == NULL) return -1;
420
+ d0 = d = build_disjuncts_for_dict_node(dn);
421
+ if(d == NULL) return 0;
422
+ for(; d!=NULL; d=d->next) {
423
+ if(direction==0) c2 = d->right;
424
+ if(direction==1) c2 = d->left;
425
+ for(; c2!=NULL; c2=c2->next) {
426
+ if(easy_match(c2->string, cs)==1) {
427
+ free_disjuncts(d0);
428
+ return 1;
429
+ }
430
+ }
431
+ }
432
+ free_disjuncts(d0);
433
+ return 0;
434
+ }
435
+
436
+ /* ======================================================== */
437
+ /* Dictionary utilities ... */
438
+
439
+ static int dn_word_contains(Dictionary dict,
440
+ Dict_node * w_dn, const char * macro)
441
+ {
442
+ Exp * m_exp;
443
+ Dict_node *m_dn;
444
+
445
+ if (w_dn == NULL) return 0;
446
+
447
+ m_dn = dictionary_lookup_list(dict, macro);
448
+ if (m_dn == NULL) return 0;
449
+
450
+ m_exp = m_dn->exp;
451
+ free_lookup_list(m_dn);
452
+
453
+ #ifdef DEBUG
454
+ printf("\nWORD: ");
455
+ print_expression(w_dn->exp);
456
+ printf("\nMACR: ");
457
+ print_expression(m_exp);
458
+ printf("\n");
459
+ #endif
460
+
461
+ for (;w_dn != NULL; w_dn = w_dn->right)
462
+ {
463
+ if (1 == exp_contains(w_dn->exp, m_exp))
464
+ return 1;
465
+ }
466
+ return 0;
467
+ }
468
+
469
+ /**
470
+ * word_contains: return true if the word may involve application of
471
+ * a rule.
472
+ *
473
+ * @return: true if word's expression contains macro's expression,
474
+ * false otherwise.
475
+ */
476
+ int word_contains(Dictionary dict, const char * word, const char * macro)
477
+ {
478
+ Dict_node *w_dn;
479
+ int ret;
480
+ w_dn = abridged_lookup_list(dict, word);
481
+ ret = dn_word_contains(dict, w_dn, macro);
482
+ free_lookup_list(w_dn);
483
+ return ret;
484
+ }
485
+
486
+ Dict_node * list_whole_dictionary(Dict_node *root, Dict_node *dn)
487
+ {
488
+ Dict_node *c, *d;
489
+ if (root == NULL) return dn;
490
+ c = (Dict_node *) xalloc(sizeof(Dict_node));
491
+ *c = *root;
492
+ d = list_whole_dictionary(root->left, dn);
493
+ c->right = list_whole_dictionary(root->right, d);
494
+ return c;
495
+ }
496
+
497
+ #define PAST_TENSE_FORM_MARKER "<marker-past>"
498
+ #define ENTITY_MARKER "<marker-entity>"
499
+ #define COMMON_ENTITY_MARKER "<marker-common-entity>"
500
+
501
+ /* This is exported to public API (for Java)
502
+ * @deprecated -- past-tense verbs are tagged with .v-d or .w-d or .q-d
503
+ * subscripts. use those instead to figure out if a verb is past tense.
504
+ */
505
+ int dictionary_is_past_tense_form(Dictionary dict, const char * str)
506
+ {
507
+ if (word_contains(dict, str, PAST_TENSE_FORM_MARKER) == 1)
508
+ return 1;
509
+ return 0;
510
+ }
511
+
512
+ /**
513
+ * dictionary_is_entity - Return true if word is entity.
514
+ * Entities are proper names (geographical names,
515
+ * names of people), street addresses, phone numbers,
516
+ * etc.
517
+ */
518
+ /* This is exported to public API (for Java) */
519
+ int dictionary_is_entity(Dictionary dict, const char * str)
520
+ {
521
+ if (word_contains(dict, str, ENTITY_MARKER) == 1)
522
+ return 1;
523
+ return 0;
524
+ }
525
+
526
+ /* ========================= END OF FILE ============================== */