grammar_cop 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,281 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ #ifndef _LINK_GRAMMAR_UTILITIES_H_
14
+ #define _LINK_GRAMMAR_UTILITIES_H_
15
+
16
+ #ifdef __CYGWIN__
17
+ #define _WIN32 1
18
+ #endif /* __CYGWIN__ */
19
+
20
+ #ifndef _WIN32
21
+ #include <langinfo.h>
22
+ #endif
23
+
24
+ #include <ctype.h>
25
+ #include <stdio.h>
26
+ #include <stdlib.h>
27
+ #include <string.h>
28
+
29
+ #ifndef __CYGWIN__
30
+ /* I was told that cygwin does not have these files. */
31
+ #include <wchar.h>
32
+ #include <wctype.h>
33
+ #endif
34
+
35
+ #if defined(__CYGWIN__) && defined(__MINGW32__)
36
+ /* Some users have CygWin and MinGW installed!
37
+ * In this case, use the MinGW versions of UTF-8 support. */
38
+ #include <wchar.h>
39
+ #include <wctype.h>
40
+ #endif
41
+
42
+ #include "error.h"
43
+
44
+
45
+ #ifdef _WIN32
46
+ #include <windows.h>
47
+
48
+ #ifdef _MSC_VER
49
+ /* The Microsoft Visual C compiler doesn't support the "inline" keyword. */
50
+ #define inline
51
+
52
+ /* MS Visual C does not have any function normally found in strings.h */
53
+ /* In particular, be careful to avoid including strings.h */
54
+
55
+ /* MS Visual C uses non-standard string function names */
56
+ #define snprintf _snprintf
57
+ #define vsnprintf _vsnprintf
58
+ #define strcasecmp _stricmp
59
+ #define strdup _strdup
60
+ #define strncasecmp(a,b,s) strnicmp((a),(b),(s))
61
+
62
+ /* MS Visual C does not support some C99 standard floating-point functions */
63
+ #define fmaxf(a,b) ((a) > (b) ? (a) : (b))
64
+
65
+ #endif /* _MSC_VER */
66
+
67
+ /* Appearently, MinGW is also missing a variety of standard fuctions.
68
+ * Not surprising, since MinGW is intended for compiling Windows
69
+ * programs on Windows.
70
+ * MINGW is also known as MSYS */
71
+ #if defined(_MSC_VER) || defined(__MINGW32__)
72
+
73
+ /* No langinfo in Windows or MinGW */
74
+ #define nl_langinfo(X) ""
75
+
76
+ /* strtok_r is missing in Windows */
77
+ char * strtok_r (char *s, const char *delim, char **saveptr);
78
+
79
+ /* Windows doesn't have a thread-safe rand (???) */
80
+ /* Surely not, there must be something */
81
+ /* XXX FIXME -- this breaks thread safety on windows */
82
+ #define rand_r(seedp) rand()
83
+ #endif /* _MSC_VER || __MINGW32__ */
84
+
85
+ /*
86
+ * CYGWIN on Windows doesn't have UTF8 support, or wide chars ...
87
+ * However, MS Visual C appearently does, as does MinGW. Since
88
+ * some users have both cygwin and MinGW installed, crap out the
89
+ * UTF8 code only when MinGW is missing.
90
+ */
91
+ #if defined (__CYGWIN__) && !defined(__MINGW32__)
92
+ #define mbstate_t char
93
+ #define mbrtowc(w,s,n,x) ({*((char *)(w)) = *(s); 1;})
94
+ #define wcrtomb(s,w,x) ({*((char *)(s)) = ((char)(w)); 1;})
95
+ #define iswupper isupper
96
+ #define iswalpha isalpha
97
+ #define iswdigit isdigit
98
+ #define iswspace isspace
99
+ #define wchar_t char
100
+ #define wint_t int
101
+ #define fgetwc fgetc
102
+ #define WEOF EOF
103
+ #define towlower tolower
104
+ #define towupper toupper
105
+ #endif /* __CYGWIN__ and not __MINGW32__ */
106
+
107
+ #endif /* _WIN32 */
108
+
109
+ #if defined(__sun__)
110
+ int strncasecmp(const char *s1, const char *s2, size_t n);
111
+ /* This does not appear to be in string.h header file in sunos
112
+ (Or in linux when I compile with -ansi) */
113
+ #endif
114
+
115
+ #ifndef FALSE
116
+ #define FALSE 0
117
+ #endif
118
+
119
+ #ifndef TRUE
120
+ #define TRUE 1
121
+ #endif
122
+
123
+ #define assert(ex,string) { \
124
+ if (!(ex)) { \
125
+ prt_error("Assertion failed: %s\n", string); \
126
+ exit(1); \
127
+ } \
128
+ }
129
+
130
+ #if !defined(MIN)
131
+ #define MIN(X,Y) ( ((X) < (Y)) ? (X) : (Y))
132
+ #endif
133
+ #if !defined(MAX)
134
+ #define MAX(X,Y) ( ((X) > (Y)) ? (X) : (Y))
135
+ #endif
136
+
137
+
138
+ static inline int wctomb_check(char *s, wchar_t wc, mbstate_t *ps)
139
+ {
140
+ int nr = wcrtomb(s, wc, ps);
141
+ if (nr < 0) {
142
+ prt_error("Fatal Error: unknown character set %s\n", nl_langinfo(CODESET));
143
+ exit(1);
144
+ }
145
+ return nr;
146
+ }
147
+
148
+ static inline int is_utf8_upper(const char *s)
149
+ {
150
+ mbstate_t mbs;
151
+ wchar_t c;
152
+ int nbytes;
153
+
154
+ memset(&mbs, 0, sizeof(mbs));
155
+ nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
156
+ if (iswupper(c)) return nbytes;
157
+ return 0;
158
+ }
159
+
160
+ static inline int is_utf8_alpha(const char *s)
161
+ {
162
+ mbstate_t mbs;
163
+ wchar_t c;
164
+ int nbytes;
165
+
166
+ memset(&mbs, 0, sizeof(mbs));
167
+ nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
168
+ if (iswalpha(c)) return nbytes;
169
+ return 0;
170
+ }
171
+
172
+ static inline int is_utf8_digit(const char *s)
173
+ {
174
+ mbstate_t mbs;
175
+ wchar_t c;
176
+ int nbytes;
177
+
178
+ memset(&mbs, 0, sizeof(mbs));
179
+ nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
180
+ if (iswdigit(c)) return nbytes;
181
+ return 0;
182
+ }
183
+
184
+ static inline int is_utf8_space(const char *s)
185
+ {
186
+ mbstate_t mbs;
187
+ wchar_t c;
188
+ int nbytes;
189
+
190
+ memset(&mbs, 0, sizeof(mbs));
191
+ nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
192
+ if (iswspace(c)) return nbytes;
193
+ return 0;
194
+ }
195
+
196
+ static inline const char * skip_utf8_upper(const char * s)
197
+ {
198
+ int nb = is_utf8_upper(s);
199
+ while (nb)
200
+ {
201
+ s += nb;
202
+ nb = is_utf8_upper(s);
203
+ }
204
+ return s;
205
+ }
206
+
207
+ /**
208
+ * Return true if the intial upper-case letters of the
209
+ * two input strings match. Comparison stops when
210
+ * both srings descend to lowercase.
211
+ */
212
+ static inline int utf8_upper_match(const char * s, const char * t)
213
+ {
214
+ mbstate_t mbs, mbt;
215
+ wchar_t ws, wt;
216
+ int ns, nt;
217
+
218
+ memset(&mbs, 0, sizeof(mbs));
219
+ memset(&mbt, 0, sizeof(mbt));
220
+
221
+ ns = mbrtowc(&ws, s, MB_CUR_MAX, &mbs);
222
+ nt = mbrtowc(&wt, t, MB_CUR_MAX, &mbt);
223
+ while (iswupper(ws) || iswupper(wt))
224
+ {
225
+ if (ws != wt) return FALSE;
226
+ s += ns;
227
+ t += nt;
228
+ ns = mbrtowc(&ws, s, MB_CUR_MAX, &mbs);
229
+ nt = mbrtowc(&wt, t, MB_CUR_MAX, &mbt);
230
+ }
231
+ return TRUE;
232
+ }
233
+
234
+ void downcase_utf8_str(char *to, const char * from, size_t usize);
235
+ void upcase_utf8_str(char *to, const char * from, size_t usize);
236
+
237
+ size_t lg_strlcpy(char * dest, const char *src, size_t size);
238
+ void safe_strcpy(char *u, const char * v, size_t usize);
239
+ void safe_strcat(char *u, const char *v, size_t usize);
240
+ char *safe_strdup(const char *u);
241
+
242
+ void left_print_string(FILE* fp, const char *, const char *);
243
+
244
+ /* routines for allocating basic objects */
245
+ void init_memusage(void);
246
+ void * xalloc(size_t);
247
+ void * xrealloc(void *, size_t oldsize, size_t newsize);
248
+ void * exalloc(size_t);
249
+
250
+ #define TRACK_SPACE_USAGE
251
+ #ifdef TRACK_SPACE_USAGE
252
+ void xfree(void *, size_t);
253
+ void exfree(void *, size_t);
254
+ #else /* TRACK_SPACE_USAGE */
255
+ static inline void xfree(void *p, size_t sz) { free(p); }
256
+ static inline void exfree(void *p, size_t sz) { free(p); };
257
+ #endif /* TRACK_SPACE_USAGE */
258
+
259
+ size_t get_space_in_use(void);
260
+ size_t get_max_space_used(void);
261
+
262
+
263
+ char * get_default_locale(void);
264
+ char * join_path(const char * prefix, const char * suffix);
265
+
266
+ FILE * dictopen(const char *filename, const char *how);
267
+ void * object_open(const char *filename,
268
+ void * (*opencb)(const char *, void *),
269
+ void * user_data);
270
+
271
+ /**
272
+ * Returns the smallest power of two that is at least i and at least 1
273
+ */
274
+ static inline int next_power_of_two_up(int i)
275
+ {
276
+ int j=1;
277
+ while(j<i) j = j<<1;
278
+ return j;
279
+ }
280
+
281
+ #endif
@@ -0,0 +1,124 @@
1
+ /***************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /***************************************************************************/
13
+
14
+ #include <wchar.h>
15
+ #include <wctype.h>
16
+ #include "api.h"
17
+ #include "error.h"
18
+
19
+ /**
20
+ * Reads in one word from the file, allocates space for it,
21
+ * and returns it.
22
+ */
23
+ static const char * get_a_word(Dictionary dict, FILE * fp)
24
+ {
25
+ char word[MAX_WORD+4]; /* allow for 4-byte wide chars */
26
+ const char * s;
27
+ wint_t c;
28
+ mbstate_t mbss;
29
+ int j;
30
+
31
+ do {
32
+ c = fgetwc(fp);
33
+ } while ((c != WEOF) && iswspace(c));
34
+ if (c == WEOF) return NULL;
35
+
36
+ memset(&mbss, 0, sizeof(mbss));
37
+ for (j=0; (j <= MAX_WORD-1) && (!iswspace(c)) && (c != WEOF);)
38
+ {
39
+ j += wctomb_check(&word[j], c, &mbss);
40
+ c = fgetwc(fp);
41
+ }
42
+
43
+ if (j >= MAX_WORD) {
44
+ word[MAX_WORD] = 0x0;
45
+ prt_error("Fatal Error: The dictionary contains a word that "
46
+ "is too long. The word was: %s", word);
47
+ exit(1);
48
+ }
49
+ word[j] = '\0';
50
+ s = string_set_add(word, dict->string_set);
51
+ return s;
52
+ }
53
+
54
+ /**
55
+ *
56
+ * (1) opens the word file and adds it to the word file list
57
+ * (2) reads in the words
58
+ * (3) puts each word in a Dict_node
59
+ * (4) links these together by their left pointers at the
60
+ * front of the list pointed to by dn
61
+ * (5) returns a pointer to the first of this list
62
+ */
63
+ Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename)
64
+ {
65
+ Dict_node * dn_new;
66
+ Word_file * wf;
67
+ FILE * fp;
68
+ const char * s;
69
+ char file_name_copy[MAX_PATH_NAME+1];
70
+
71
+ safe_strcpy(file_name_copy, filename+1, sizeof(file_name_copy)); /* get rid of leading '/' */
72
+
73
+ if ((fp = dictopen(file_name_copy, "r")) == NULL) {
74
+ prt_error("Error opening word file %s\n", file_name_copy);
75
+ return NULL;
76
+ }
77
+
78
+ /*printf(" Reading \"%s\"\n", file_name_copy);*/
79
+ /*printf("*"); fflush(stdout);*/
80
+
81
+ wf = (Word_file *) xalloc(sizeof (Word_file));
82
+ safe_strcpy(wf->file, file_name_copy, sizeof(wf->file));
83
+ wf->changed = FALSE;
84
+ wf->next = dict->word_file_header;
85
+ dict->word_file_header = wf;
86
+
87
+ while ((s = get_a_word(dict, fp)) != NULL) {
88
+ dn_new = (Dict_node *) xalloc(sizeof(Dict_node));
89
+ dn_new->left = dn;
90
+ dn = dn_new;
91
+ dn->string = s;
92
+ dn->file = wf;
93
+ }
94
+ fclose(fp);
95
+ return dn;
96
+ }
97
+
98
+ void save_files(Dictionary dict)
99
+ {
100
+ Word_file *wf;
101
+ FILE *fp;
102
+ for (wf = dict->word_file_header; wf != NULL; wf = wf->next) {
103
+ if (wf->changed) {
104
+ if ((fp = fopen(wf->file, "w")) == NULL) {
105
+ printf("\nCannot open %s. Gee, this shouldn't happen.\n", wf->file);
106
+ printf("file not saved\n");
107
+ return;
108
+ }
109
+ printf(" saving file \"%s\"\n", wf->file);
110
+ /*output_dictionary(dict_root, fp, wf);*/
111
+ fclose(fp);
112
+ wf->changed = FALSE;
113
+ }
114
+ }
115
+ }
116
+
117
+ int files_need_saving(Dictionary dict)
118
+ {
119
+ Word_file *wf;
120
+ for (wf = dict->word_file_header; wf != NULL; wf = wf->next) {
121
+ if (wf->changed) return TRUE;
122
+ }
123
+ return FALSE;
124
+ }
@@ -0,0 +1,15 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ int files_need_saving(Dictionary dict);
14
+ void save_files(Dictionary dict);
15
+ Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename);
@@ -0,0 +1,526 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ /*
14
+ * Miscellaneous utilities for dealing with word types.
15
+ */
16
+
17
+ #include <math.h>
18
+ #include <stdio.h>
19
+
20
+ #include "api.h"
21
+ #include "disjunct-utils.h"
22
+ #include "word-utils.h"
23
+
24
+ /* ======================================================== */
25
+ /* Exp utilities ... */
26
+
27
+ void free_E_list(E_list *);
28
+ void free_Exp(Exp * e)
29
+ {
30
+ if (e->type != CONNECTOR_type) {
31
+ free_E_list(e->u.l);
32
+ }
33
+ xfree((char *)e, sizeof(Exp));
34
+ }
35
+
36
+ void free_E_list(E_list * l)
37
+ {
38
+ if (l == NULL) return;
39
+ free_E_list(l->next);
40
+ free_Exp(l->e);
41
+ xfree((char *)l, sizeof(E_list));
42
+ }
43
+
44
+ /* Returns the number of connectors in the expression e */
45
+ int size_of_expression(Exp * e)
46
+ {
47
+ int size;
48
+ E_list * l;
49
+ if (e->type == CONNECTOR_type) return 1;
50
+ size = 0;
51
+ for (l=e->u.l; l!=NULL; l=l->next) {
52
+ size += size_of_expression(l->e);
53
+ }
54
+ return size;
55
+ }
56
+
57
+ /**
58
+ * Build a copy of the given expression (don't copy strings, of course)
59
+ */
60
+ static E_list * copy_E_list(E_list * l);
61
+ Exp * copy_Exp(Exp * e)
62
+ {
63
+ Exp * n;
64
+ if (e == NULL) return NULL;
65
+ n = (Exp *) xalloc(sizeof(Exp));
66
+ *n = *e;
67
+ if (e->type != CONNECTOR_type) {
68
+ n->u.l = copy_E_list(e->u.l);
69
+ }
70
+ return n;
71
+ }
72
+
73
+ static E_list * copy_E_list(E_list * l)
74
+ {
75
+ E_list * nl;
76
+ if (l == NULL) return NULL;
77
+ nl = (E_list *) xalloc(sizeof(E_list));
78
+ nl->next = copy_E_list(l->next);
79
+ nl->e = copy_Exp(l->e);
80
+ return nl;
81
+ }
82
+
83
+ /**
84
+ * Compare two expressions, return 1 for equal, 0 for unequal
85
+ */
86
+ static int exp_compare(Exp * e1, Exp * e2)
87
+ {
88
+ E_list *el1, *el2;
89
+
90
+ if ((e1 == NULL) && (e2 == NULL))
91
+ return 1; /* they are equal */
92
+ if ((e1 == NULL) || (e2 == NULL))
93
+ return 0; /* they are not equal */
94
+ if (e1->type != e2->type)
95
+ return 0;
96
+ if (fabs (e1->cost - e2->cost) > 0.001)
97
+ return 0;
98
+ if (e1->type == CONNECTOR_type)
99
+ {
100
+ if (e1->dir != e2->dir)
101
+ return 0;
102
+ /* printf("%s %s\n",e1->u.string,e2->u.string); */
103
+ if (strcmp(e1->u.string,e2->u.string)!=0)
104
+ return 0;
105
+ }
106
+ else
107
+ {
108
+ el1 = e1->u.l;
109
+ el2 = e2->u.l;
110
+ /* while at least 1 is non-null */
111
+ for (;(el1!=NULL)||(el2!=NULL);) {
112
+ /*fail if 1 is null */
113
+ if ((el1==NULL)||(el2==NULL))
114
+ return 0;
115
+ /* fail if they are not compared */
116
+ if (exp_compare(el1->e, el2->e) == 0)
117
+ return 0;
118
+ if (el1!=NULL)
119
+ el1 = el1->next;
120
+ if (el2!=NULL)
121
+ el2 = el2->next;
122
+ }
123
+ }
124
+ return 1; /* if never returned 0, return 1 */
125
+ }
126
+
127
+ /**
128
+ * Sub-expression matcher -- return 1 if sub is non-NULL and
129
+ * contained in super, 0 otherwise.
130
+ */
131
+ static int exp_contains(Exp * super, Exp * sub)
132
+ {
133
+ E_list * el;
134
+
135
+ #ifdef DEBUG
136
+ printf("SUP: ");
137
+ if (super) print_expression(super);
138
+ printf("\n");
139
+ #endif
140
+
141
+ if (sub==NULL || super==NULL)
142
+ return 0;
143
+ if (exp_compare(sub,super)==1)
144
+ return 1;
145
+ if (super->type==CONNECTOR_type)
146
+ return 0; /* super is a leaf */
147
+
148
+ /* proceed through supers children and return 1 if sub
149
+ is contained in any of them */
150
+ for(el = super->u.l; el!=NULL; el=el->next) {
151
+ if (exp_contains(el->e, sub)==1)
152
+ return 1;
153
+ }
154
+ return 0;
155
+ }
156
+
157
+ /* ======================================================== */
158
+ /* X_node utilities ... */
159
+ /**
160
+ * frees the list of X_nodes pointed to by x, and all of the expressions
161
+ */
162
+ void free_X_nodes(X_node * x)
163
+ {
164
+ X_node * y;
165
+ for (; x!= NULL; x = y) {
166
+ y = x->next;
167
+ free_Exp(x->exp);
168
+ xfree((char *)x, sizeof(X_node));
169
+ }
170
+ }
171
+
172
+ /**
173
+ * Destructively catenates the two disjunct lists d1 followed by d2.
174
+ * Doesn't change the contents of the disjuncts.
175
+ * Traverses the first list, but not the second.
176
+ */
177
+ X_node * catenate_X_nodes(X_node *d1, X_node *d2)
178
+ {
179
+ X_node * dis = d1;
180
+
181
+ if (d1 == NULL) return d2;
182
+ if (d2 == NULL) return d1;
183
+ while (dis->next != NULL) dis = dis->next;
184
+ dis->next = d2;
185
+ return d1;
186
+ }
187
+
188
+ /* ======================================================== */
189
+ /* Connector utilities ... */
190
+
191
+ /**
192
+ * free_connectors() -- free the list of connectors pointed to by e
193
+ * (does not free any strings)
194
+ */
195
+ void free_connectors(Connector *e)
196
+ {
197
+ Connector * n;
198
+ for (; e != NULL; e = n)
199
+ {
200
+ n = e->next;
201
+ xfree((char *)e, sizeof(Connector));
202
+ }
203
+ }
204
+
205
+ void exfree_connectors(Connector *e)
206
+ {
207
+ Connector * n;
208
+ for(;e != NULL; e = n) {
209
+ n = e->next;
210
+ exfree((void *) e->string, sizeof(char)*(strlen(e->string)+1));
211
+ exfree(e, sizeof(Connector));
212
+ }
213
+ }
214
+
215
+ Connector * connector_new(void)
216
+ {
217
+ Connector *c = (Connector *) xalloc(sizeof(Connector));
218
+ c->length_limit = UNLIMITED_LEN;
219
+ c->string = "";
220
+ c->label = NORMAL_LABEL;
221
+ c->hash = -1;
222
+ c->priority = THIN_priority;
223
+ c->multi = FALSE;
224
+ c->next = NULL;
225
+ c->tableNext = NULL;
226
+ return c;
227
+ }
228
+
229
+ Connector * init_connector(Connector *c)
230
+ {
231
+ c->hash = -1;
232
+ c->length_limit = UNLIMITED_LEN;
233
+ return c;
234
+ }
235
+
236
+ /**
237
+ * This builds a new copy of the connector list pointed to by c.
238
+ * Strings, as usual, are not copied.
239
+ */
240
+ Connector * copy_connectors(Connector * c)
241
+ {
242
+ Connector *c1;
243
+ if (c == NULL) return NULL;
244
+ c1 = connector_new();
245
+ *c1 = *c;
246
+ c1->next = copy_connectors(c->next);
247
+ return c1;
248
+ }
249
+
250
+ Connector * excopy_connectors(Connector * c)
251
+ {
252
+ char * s;
253
+ Connector *c1;
254
+
255
+ if (c == NULL) return NULL;
256
+
257
+ c1 = connector_new();
258
+ *c1 = *c;
259
+ s = (char *) exalloc(sizeof(char)*(strlen(c->string)+1));
260
+ strcpy(s, c->string);
261
+ c1->string = s;
262
+ c1->next = excopy_connectors(c->next);
263
+
264
+ return c1;
265
+ }
266
+
267
+ /* ======================================================== */
268
+ /* Link utilities ... */
269
+
270
+ Link * excopy_link(Link * l)
271
+ {
272
+ char * s;
273
+ Link * newl;
274
+
275
+ if (l == NULL) return NULL;
276
+
277
+ newl = (Link *) exalloc(sizeof(Link));
278
+ s = (char *) exalloc(sizeof(char)*(strlen(l->name)+1));
279
+ strcpy(s, l->name);
280
+ newl->name = s;
281
+ newl->l = l->l;
282
+ newl->r = l->r;
283
+ newl->lc = excopy_connectors(l->lc);
284
+ newl->rc = excopy_connectors(l->rc);
285
+
286
+ return newl;
287
+ }
288
+
289
+ void exfree_link(Link * l)
290
+ {
291
+ exfree_connectors(l->rc);
292
+ exfree_connectors(l->lc);
293
+ exfree((void *)l->name, sizeof(char)*(strlen(l->name)+1));
294
+ exfree(l, sizeof(Link));
295
+ }
296
+
297
+ /* ======================================================== */
298
+ /* Connector-set utilities ... */
299
+ /**
300
+ * This hash function only looks at the leading upper case letters of
301
+ * the string, and the direction, '+' or '-'.
302
+ */
303
+ static int connector_set_hash(Connector_set *conset, const char * s, int d)
304
+ {
305
+ unsigned int i;
306
+ /* djb2 hash */
307
+ i = 5381;
308
+ i = ((i << 5) + i) + d;
309
+ while (isupper((int) *s)) /* connector tables cannot contain UTF8, yet */
310
+ {
311
+ i = ((i << 5) + i) + *s;
312
+ s++;
313
+ }
314
+ return (i & (conset->table_size-1));
315
+ }
316
+
317
+ static void build_connector_set_from_expression(Connector_set * conset, Exp * e)
318
+ {
319
+ E_list * l;
320
+ Connector * c;
321
+ int h;
322
+ if (e->type == CONNECTOR_type)
323
+ {
324
+ c = connector_new();
325
+ c->string = e->u.string;
326
+ c->word = e->dir; /* just use the word field to give the dir */
327
+ h = connector_set_hash(conset, c->string, c->word);
328
+ c->next = conset->hash_table[h];
329
+ conset->hash_table[h] = c;
330
+ } else {
331
+ for (l=e->u.l; l!=NULL; l=l->next) {
332
+ build_connector_set_from_expression(conset, l->e);
333
+ }
334
+ }
335
+ }
336
+
337
+ Connector_set * connector_set_create(Exp *e)
338
+ {
339
+ int i;
340
+ Connector_set *conset;
341
+
342
+ conset = (Connector_set *) xalloc(sizeof(Connector_set));
343
+ conset->table_size = next_power_of_two_up(size_of_expression(e));
344
+ conset->hash_table =
345
+ (Connector **) xalloc(conset->table_size * sizeof(Connector *));
346
+ for (i=0; i<conset->table_size; i++) conset->hash_table[i] = NULL;
347
+ build_connector_set_from_expression(conset, e);
348
+ return conset;
349
+ }
350
+
351
+ void connector_set_delete(Connector_set * conset)
352
+ {
353
+ int i;
354
+ if (conset == NULL) return;
355
+ for (i=0; i<conset->table_size; i++) free_connectors(conset->hash_table[i]);
356
+ xfree(conset->hash_table, conset->table_size * sizeof(Connector *));
357
+ xfree(conset, sizeof(Connector_set));
358
+ }
359
+
360
+ /**
361
+ * Returns TRUE the given connector is in this conset. FALSE otherwise.
362
+ * d='+' means this connector is on the right side of the disjunct.
363
+ * d='-' means this connector is on the left side of the disjunct.
364
+ */
365
+ int match_in_connector_set(Sentence sent, Connector_set *conset, Connector * c, int d)
366
+ {
367
+ int h;
368
+ Connector * c1;
369
+ if (conset == NULL) return FALSE;
370
+ h = connector_set_hash(conset, c->string, d);
371
+ for (c1 = conset->hash_table[h]; c1 != NULL; c1 = c1->next)
372
+ {
373
+ if (x_match(sent, c1, c) && (d == c1->word)) return TRUE;
374
+ }
375
+ return FALSE;
376
+ }
377
+
378
+ /* ======================================================== */
379
+ /* More connector utilities ... */
380
+
381
+ /**
382
+ * This is like the basic "match" function in count.c - the basic
383
+ * connector-matching function used in parsing - except it ignores
384
+ * "priority" (used to handle fat links)
385
+ */
386
+ static int easy_match(const char * s, const char * t)
387
+ {
388
+ while(isupper((int)*s) || isupper((int)*t)) {
389
+ if (*s != *t) return FALSE;
390
+ s++;
391
+ t++;
392
+ }
393
+
394
+ while ((*s!='\0') && (*t!='\0')) {
395
+ if ((*s == '*') || (*t == '*') ||
396
+ ((*s == *t) && (*s != '^'))) {
397
+ s++;
398
+ t++;
399
+ } else return FALSE;
400
+ }
401
+ return TRUE;
402
+ }
403
+
404
+ /**
405
+ * word_has_connector() -- return TRUE if dictionary expression has connector
406
+ * This function takes a dict_node (corresponding to an entry in a
407
+ * given dictionary), a string (representing a connector), and a
408
+ * direction (0 = right-pointing, 1 = left-pointing); it returns 1
409
+ * if the dictionary expression for the word includes the connector,
410
+ * 0 otherwise. This can be used to see if a word is in a certain
411
+ * category (checking for a category connector in a table), or to see
412
+ * if a word has a connector in a normal dictionary. The connector
413
+ * check uses a "smart-match", the same kind used by the parser.
414
+ */
415
+ int word_has_connector(Dict_node * dn, const char * cs, int direction)
416
+ {
417
+ Connector * c2=NULL;
418
+ Disjunct * d, *d0;
419
+ if(dn == NULL) return -1;
420
+ d0 = d = build_disjuncts_for_dict_node(dn);
421
+ if(d == NULL) return 0;
422
+ for(; d!=NULL; d=d->next) {
423
+ if(direction==0) c2 = d->right;
424
+ if(direction==1) c2 = d->left;
425
+ for(; c2!=NULL; c2=c2->next) {
426
+ if(easy_match(c2->string, cs)==1) {
427
+ free_disjuncts(d0);
428
+ return 1;
429
+ }
430
+ }
431
+ }
432
+ free_disjuncts(d0);
433
+ return 0;
434
+ }
435
+
436
+ /* ======================================================== */
437
+ /* Dictionary utilities ... */
438
+
439
+ static int dn_word_contains(Dictionary dict,
440
+ Dict_node * w_dn, const char * macro)
441
+ {
442
+ Exp * m_exp;
443
+ Dict_node *m_dn;
444
+
445
+ if (w_dn == NULL) return 0;
446
+
447
+ m_dn = dictionary_lookup_list(dict, macro);
448
+ if (m_dn == NULL) return 0;
449
+
450
+ m_exp = m_dn->exp;
451
+ free_lookup_list(m_dn);
452
+
453
+ #ifdef DEBUG
454
+ printf("\nWORD: ");
455
+ print_expression(w_dn->exp);
456
+ printf("\nMACR: ");
457
+ print_expression(m_exp);
458
+ printf("\n");
459
+ #endif
460
+
461
+ for (;w_dn != NULL; w_dn = w_dn->right)
462
+ {
463
+ if (1 == exp_contains(w_dn->exp, m_exp))
464
+ return 1;
465
+ }
466
+ return 0;
467
+ }
468
+
469
+ /**
470
+ * word_contains: return true if the word may involve application of
471
+ * a rule.
472
+ *
473
+ * @return: true if word's expression contains macro's expression,
474
+ * false otherwise.
475
+ */
476
+ int word_contains(Dictionary dict, const char * word, const char * macro)
477
+ {
478
+ Dict_node *w_dn;
479
+ int ret;
480
+ w_dn = abridged_lookup_list(dict, word);
481
+ ret = dn_word_contains(dict, w_dn, macro);
482
+ free_lookup_list(w_dn);
483
+ return ret;
484
+ }
485
+
486
+ Dict_node * list_whole_dictionary(Dict_node *root, Dict_node *dn)
487
+ {
488
+ Dict_node *c, *d;
489
+ if (root == NULL) return dn;
490
+ c = (Dict_node *) xalloc(sizeof(Dict_node));
491
+ *c = *root;
492
+ d = list_whole_dictionary(root->left, dn);
493
+ c->right = list_whole_dictionary(root->right, d);
494
+ return c;
495
+ }
496
+
497
+ #define PAST_TENSE_FORM_MARKER "<marker-past>"
498
+ #define ENTITY_MARKER "<marker-entity>"
499
+ #define COMMON_ENTITY_MARKER "<marker-common-entity>"
500
+
501
+ /* This is exported to public API (for Java)
502
+ * @deprecated -- past-tense verbs are tagged with .v-d or .w-d or .q-d
503
+ * subscripts. use those instead to figure out if a verb is past tense.
504
+ */
505
+ int dictionary_is_past_tense_form(Dictionary dict, const char * str)
506
+ {
507
+ if (word_contains(dict, str, PAST_TENSE_FORM_MARKER) == 1)
508
+ return 1;
509
+ return 0;
510
+ }
511
+
512
+ /**
513
+ * dictionary_is_entity - Return true if word is entity.
514
+ * Entities are proper names (geographical names,
515
+ * names of people), street addresses, phone numbers,
516
+ * etc.
517
+ */
518
+ /* This is exported to public API (for Java) */
519
+ int dictionary_is_entity(Dictionary dict, const char * str)
520
+ {
521
+ if (word_contains(dict, str, ENTITY_MARKER) == 1)
522
+ return 1;
523
+ return 0;
524
+ }
525
+
526
+ /* ========================= END OF FILE ============================== */