grammar_cop 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,17 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ void prune(Sentence sent);
14
+ int power_prune(Sentence sent, int mode, Parse_Options opts);
15
+ void pp_and_power_prune(Sentence sent, int mode, Parse_Options opts);
16
+ int prune_match(int dist, Connector * left, Connector * right);
17
+ void expression_prune(Sentence sent);
@@ -0,0 +1,1785 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #include <limits.h>
15
+ #include <string.h>
16
+ #include <wchar.h>
17
+ #include <wctype.h>
18
+ #include "api.h"
19
+ #include "disjunct-utils.h"
20
+ #include "error.h"
21
+
22
+ const char * linkgrammar_get_version(void)
23
+ {
24
+ const char *s = "link-grammar-" LINK_VERSION_STRING;
25
+ return s;
26
+ }
27
+
28
+ const char * linkgrammar_get_dict_version(Dictionary dict)
29
+ {
30
+ static char * ver = NULL;
31
+ char * p;
32
+ Dict_node *dn;
33
+ Exp *e;
34
+
35
+ if (ver) return ver;
36
+
37
+ /* The newer dictionaries should contain a macro of the form:
38
+ * <dictionary-version-number>: V4v6v6+;
39
+ * which would indicate dictionary verison 4.6.6
40
+ * Older dictionaries contain no version info.
41
+ */
42
+ dn = dictionary_lookup_list(dict, "<dictionary-version-number>");
43
+ if (NULL == dn) return "[unknown]";
44
+
45
+ e = dn->exp;
46
+ ver = strdup(&e->u.string[1]);
47
+ p = strchr(ver, 'v');
48
+ while (p)
49
+ {
50
+ *p = '.';
51
+ p = strchr(p+1, 'v');
52
+ }
53
+
54
+ free_lookup_list(dn);
55
+ return ver;
56
+ }
57
+
58
+
59
+ /*
60
+ The dictionary format:
61
+
62
+ In what follows:
63
+ Every "%" symbol and everything after it is ignored on every line.
64
+ Every newline or tab is replaced by a space.
65
+
66
+ The dictionary file is a sequence of ENTRIES. Each ENTRY is one or
67
+ more WORDS (a sequence of upper or lower case letters) separated by
68
+ spaces, followed by a ":", followed by an EXPRESSION followed by a
69
+ ";". An EXPRESSION is an expression where the operators are "&"
70
+ or "and" or "|" or "or", and there are three types of parentheses:
71
+ "()", "{}", and "[]". The terminal symbols of this grammar are the
72
+ connectors, which are strings of letters or numbers or *s.
73
+ Expressions may be written in prefix or infix form. In prefix-form,
74
+ the expressions are lisp-like, with the operators &, | preceeding
75
+ the operands. In infix-form, the operators are in the middle. The
76
+ current dictionaries are in infix form. If the C preprocessor
77
+ constant INFIX_NOTATION is defined, then the dictionary is assumed
78
+ to be in infix form.
79
+
80
+ The connector begins with an optinal @, which is followed by an upper
81
+ case sequence of letters. Each subsequent *, lower case letter or
82
+ number is a subscript. At the end is a + or - sign. The "@" allows
83
+ this connector to attach to one or more other connectors.
84
+
85
+ Here is a sample dictionary entry (in infix form):
86
+
87
+ gone: T- & {@EV+};
88
+
89
+ (See our paper for more about how to interpret the meaning of the
90
+ dictionary expressions.)
91
+
92
+ A previously defined word (such as "gone" above) may be used instead
93
+ of a connector to specify the expression it was defined to be. Of
94
+ course, in this case, it must uniquely specify a word in the
95
+ dictionary, and have been previously defined.
96
+
97
+ If a word is of the form "/foo", then the file current-dir/foo
98
+ is a so-called word file, and is read in as a list of words.
99
+ A word file is just a list of words separted by blanks or newlines.
100
+
101
+ A word that contains the character "_" defines an idiomatic use of
102
+ the words separated by the "_". For example "kind of" is an idiomatic
103
+ expression, so a word "kind_of" is defined in the dictionary.
104
+ Idomatic expressions of any number of words can be defined in this way.
105
+ When the word "kind" is encountered, all the idiomatic uses of the word
106
+ are considered.
107
+
108
+ An expresion enclosed in "[..]" is give a cost of 1. This means
109
+ that if any of the connectors inside the square braces are used,
110
+ a cost of 1 is incurred. (This cost is the first element of the cost
111
+ vector printed when a sentence is parsed.) Of course if something is
112
+ inside of 10 levels of "[..]" then using it incurs a cost of 10.
113
+ These costs are called "disjunct costs". The linkages are printed out
114
+ in order of non-increasing disjunct cost.
115
+
116
+ The expression "(A+ or ())" means that you can choose either "A+" or
117
+ the empty expression "()", that is, that the connector "A+" is
118
+ optional. This is more compactly expressed as "{A+}". In other words,
119
+ curly braces indicate an optional expression.
120
+
121
+ The expression "(A+ or [])" is the same as that above, but there is a
122
+ cost of 1 incurred for choosing not to use "A+". The expression
123
+ "(EXP1 & [EXP2])" is exactly the same as "[EXP1 & EXP2]". The difference
124
+ between "({[A+]} & B+)" and "([{A+}] & B+)" is that the latter always
125
+ incurrs a cost of 1, while the former only gets a cost of 1 if "A+" is
126
+ used.
127
+
128
+ The dictionary writer is not allowed to use connectors that begin in
129
+ "ID". This is reserved for the connectors automatically
130
+ generated for idioms.
131
+
132
+ Dictionary words may be followed by a dot (period, "."), and a "subscript"
133
+ identifying the word type. The subscript may be one or more letters or
134
+ numbers, but must begin with a letter. Currently, the dictionary contains
135
+ (mostly?) subscripts consisting of a single letter, and these serve mostly
136
+ to identify the part-of-speech. In general, subscripts can also be used
137
+ to distinguish different word senses.
138
+ */
139
+
140
+ static int link_advance(Dictionary dict);
141
+
142
+ static void dict_error2(Dictionary dict, const char * s, const char *s2)
143
+ {
144
+ int i;
145
+ char tokens[1024], t[128];
146
+
147
+ if (dict->recursive_error) return;
148
+ dict->recursive_error = TRUE;
149
+
150
+ tokens[0] = '\0';
151
+ for (i=0; i<5 && dict->token[0] != '\0' ; i++)
152
+ {
153
+ sprintf(t, "\"%s\" ", dict->token);
154
+ strcat(tokens, t);
155
+ link_advance(dict);
156
+ }
157
+ if (s2)
158
+ {
159
+ err_ctxt ec;
160
+ ec.sent = NULL;
161
+ err_msg(&ec, Error, "Error parsing dictionary %s.\n"
162
+ "%s %s\n\t line %d, tokens = %s\n",
163
+ dict->name,
164
+ s, s2, dict->line_number, tokens);
165
+ }
166
+ else
167
+ {
168
+ err_ctxt ec;
169
+ ec.sent = NULL;
170
+ err_msg(&ec, Error, "Error parsing dictionary %s.\n"
171
+ "%s\n\t line %d, tokens = %s\n",
172
+ dict->name,
173
+ s, dict->line_number, tokens);
174
+ }
175
+ dict->recursive_error = FALSE;
176
+ }
177
+
178
+ static void dict_error(Dictionary dict, const char * s)
179
+ {
180
+ dict_error2(dict, s, NULL);
181
+ }
182
+
183
+ static void warning(Dictionary dict, const char * s)
184
+ {
185
+ err_ctxt ec;
186
+ ec.sent = NULL;
187
+ err_msg(&ec, Warn, "Warning: %s\n"
188
+ "\tline %d, current token = \"%s\"\n",
189
+ s, dict->line_number, dict->token);
190
+ }
191
+
192
+ /**
193
+ * This gets the next character from the input, eliminating comments.
194
+ * If we're in quote mode, it does not consider the % character for
195
+ * comments.
196
+ */
197
+ static wint_t get_character(Dictionary dict, int quote_mode)
198
+ {
199
+ wint_t c;
200
+
201
+ c = fgetwc(dict->fp);
202
+ if ((c == '%') && (!quote_mode)) {
203
+ while((c != WEOF) && (c != '\n')) c = fgetwc(dict->fp);
204
+ }
205
+ if (c == '\n') dict->line_number++;
206
+ return c;
207
+ }
208
+
209
+
210
+ /*
211
+ * This set of 10 characters are the ones defining the syntax of the
212
+ * dictionary.
213
+ */
214
+ #define SPECIAL "(){};[]&|:"
215
+
216
+ /**
217
+ * Return true if the input wide-character is one of the special
218
+ * characters used to define the syntax of the dictionary.
219
+ */
220
+ static int is_special(wint_t wc, mbstate_t *ps)
221
+ {
222
+ char buff[MB_LEN_MAX];
223
+ int nr = wcrtomb(buff, wc, ps);
224
+ if (1 != nr) return FALSE;
225
+ return (NULL != strchr(SPECIAL, buff[0]));
226
+ }
227
+
228
+ /**
229
+ * This reads the next token from the input into token.
230
+ * Return 1 if a character was read, else return 0 (and print a warning).
231
+ */
232
+ static int link_advance(Dictionary dict)
233
+ {
234
+ wint_t c;
235
+ int nr, i;
236
+ int quote_mode;
237
+
238
+ dict->is_special = FALSE;
239
+
240
+ if (dict->already_got_it != '\0')
241
+ {
242
+ dict->is_special = is_special(dict->already_got_it, &dict->mbss);
243
+ if (dict->already_got_it == WEOF) {
244
+ dict->token[0] = '\0';
245
+ } else {
246
+ dict->token[0] = dict->already_got_it; /* specials are one byte */
247
+ dict->token[1] = '\0';
248
+ }
249
+ dict->already_got_it = '\0';
250
+ return 1;
251
+ }
252
+
253
+ do { c = get_character(dict, FALSE); } while (iswspace(c));
254
+
255
+ quote_mode = FALSE;
256
+
257
+ i = 0;
258
+ for (;;)
259
+ {
260
+ if (i > MAX_TOKEN_LENGTH-3) { /* 3 for multi-byte tokens */
261
+ dict_error(dict, "Token too long");
262
+ return 0;
263
+ }
264
+ if (quote_mode) {
265
+ if (c == '\"') {
266
+ quote_mode = FALSE;
267
+ dict->token[i] = '\0';
268
+ return 1;
269
+ }
270
+ if (iswspace(c)) {
271
+ dict_error(dict, "White space inside of token");
272
+ return 0;
273
+ }
274
+
275
+ /* Although we read wide chars, we store UTF8 internally, always. */
276
+ nr = wcrtomb(&dict->token[i], c, &dict->mbss);
277
+ if (nr < 0) {
278
+ #ifndef _WIN32
279
+ dict_error2(dict, "Unable to read UTF8 string in current locale",
280
+ nl_langinfo(CODESET));
281
+ fprintf (stderr, "\tTry setting the locale with \"export LANG=en_US.UTF-8\"\n");
282
+ #else
283
+ dict_error(dict, "Unable to read UTF8 string in current locale");
284
+ #endif
285
+ return 0;
286
+ }
287
+ i += nr;
288
+ } else {
289
+ if (is_special(c, &dict->mbss))
290
+ {
291
+ if (i == 0)
292
+ {
293
+ dict->token[0] = c; /* special toks are one char always */
294
+ dict->token[1] = '\0';
295
+ dict->is_special = TRUE;
296
+ return 1;
297
+ }
298
+ dict->token[i] = '\0';
299
+ dict->already_got_it = c;
300
+ return 1;
301
+ }
302
+ if (c == WEOF) {
303
+ if (i == 0) {
304
+ dict->token[0] = '\0';
305
+ return 1;
306
+ }
307
+ dict->token[i] = '\0';
308
+ dict->already_got_it = c;
309
+ return 1;
310
+ }
311
+ if (iswspace(c)) {
312
+ dict->token[i] = '\0';
313
+ return 1;
314
+ }
315
+ if (c == '\"') {
316
+ quote_mode = TRUE;
317
+ } else {
318
+ /* store UTF8 internally, always. */
319
+ nr = wctomb_check(&dict->token[i], c, &dict->mbss);
320
+ if (nr < 0) {
321
+ #ifndef _WIN32
322
+ dict_error2(dict, "Unable to read UTF8 string in current locale",
323
+ nl_langinfo(CODESET));
324
+ fprintf (stderr, "\tTry setting the locale with \"export LANG=en_US.UTF-8\"\n");
325
+ #else
326
+ dict_error(dict, "Unable to read UTF8 string in current locale");
327
+ #endif
328
+ return 0;
329
+ }
330
+ i += nr;
331
+ }
332
+ }
333
+ c = get_character(dict, quote_mode);
334
+ }
335
+ return 1;
336
+ }
337
+
338
+ /**
339
+ * Returns TRUE if this token is a special token and it is equal to c
340
+ */
341
+ static int is_equal(Dictionary dict, wint_t c)
342
+ {
343
+ return (dict->is_special &&
344
+ wctob(c) == dict->token[0] &&
345
+ dict->token[1] == '\0');
346
+ }
347
+
348
+ /**
349
+ * Make sure the string s is a valid connector.
350
+ * Return 1 if the connector is valid, else return 0,
351
+ * and print an appropriate warning message.
352
+ */
353
+ static int check_connector(Dictionary dict, const char * s)
354
+ {
355
+ int i;
356
+ i = strlen(s);
357
+ if (i < 1) {
358
+ dict_error(dict, "Expecting a connector.");
359
+ return 0;
360
+ }
361
+ i = s[i-1]; /* the last character of the token */
362
+ if ((i!='+') && (i!='-')) {
363
+ dict_error(dict, "A connector must end in a \"+\" or \"-\".");
364
+ return 0;
365
+ }
366
+ if (*s == '@') s++;
367
+ if (!isupper((int)*s)) {
368
+ dict_error(dict, "The first letter of a connector must be in [A--Z].");
369
+ return 0;
370
+ }
371
+ if ((*s == 'I') && (*(s+1) == 'D')) {
372
+ dict_error(dict, "Connectors beginning with \"ID\" are forbidden");
373
+ return 0;
374
+ }
375
+ while (*(s+1)) {
376
+ if ((!isalnum((int)*s)) && (*s != '*') && (*s != '^')) {
377
+ dict_error(dict, "All letters of a connector must be ASCII alpha-numeric.");
378
+ return 0;
379
+ }
380
+ s++;
381
+ }
382
+ return 1;
383
+ }
384
+
385
+ /* ======================================================================== */
386
+ /**
387
+ * Dictionary entry comparison and ordering functions.
388
+ *
389
+ * The data structure storing the dictionary is simply a binary tree.
390
+ * The entries in the binary tree are sorted by alphabetical order.
391
+ * There is one catch, however: words may have suffixes (a dot, followed
392
+ * by the suffix), and these suffixes are to be handled appripriately
393
+ * during sorting and comparison.
394
+ *
395
+ * The use of suffixes means that the ordering of the words is not
396
+ * exactly the order given by strcmp. The order must be such that, for
397
+ * example, "make" < "make.n" < "make-up" -- suffixed words come after
398
+ * the bare words, but before any other other words with non-ascii-alpha
399
+ * characters (such as the hyphen in "make-up", or possibly UTF8
400
+ * characters). Thus, stright "strcmp" can't be used to determine
401
+ * dictionary order.
402
+ *
403
+ * Thus, a set of specialized string comparison and ordering functions
404
+ * are provided. These "do the right thing" when matching string with
405
+ * and without suffixes.
406
+ */
407
+ /**
408
+ * dict_order - order two dictionary words in proper sort order.
409
+ * Return zero if the strings match, else return standard
410
+ * (locale-dependent) UTF8 sort order.
411
+ */
412
+ /* verbose version */
413
+ /*
414
+ int dict_order(char *s, char *t)
415
+ {
416
+ int ss, tt;
417
+ while (*s != '\0' && *s == *t) {
418
+ s++;
419
+ t++;
420
+ }
421
+ if (*s == '.') {
422
+ ss = 1;
423
+ } else {
424
+ ss = (*s)<<1;
425
+ }
426
+ if (*t == '.') {
427
+ tt = 1;
428
+ } else {
429
+ tt = (*t)<<1;
430
+ }
431
+ return (ss - tt);
432
+ }
433
+ */
434
+
435
+ /* terse version */
436
+ static inline int dict_order(const char *s, const char *t)
437
+ {
438
+ while (*s != '\0' && *s == *t) {s++; t++;}
439
+ return (((*s == '.')?(1):((*s)<<1)) - ((*t == '.')?(1):((*t)<<1)));
440
+ }
441
+
442
+ /**
443
+ * dict_order_wild() -- order dictionary strings, with wildcard.
444
+ * Assuming that s is a pointer to a dictionary string, and that
445
+ * t is a pointer to a search string, this returns 0 if they
446
+ * match, >0 if s>t, and <0 if s<t.
447
+ *
448
+ * The matching is done as follows. Walk down the strings until
449
+ * you come to the end of one of them, or until you find unequal
450
+ * characters. A "*" matches anything. Otherwise, replace "."
451
+ * by "\0", and take the difference. This behavior matches that
452
+ * of the function dict_order().
453
+ */
454
+ static inline int dict_order_wild(const char * s, const char * t)
455
+ {
456
+ while((*s != '\0') && (*s == *t)) {s++; t++;}
457
+ if ((*s == '*') || (*t == '*')) return 0;
458
+ return (((*s == '.')?('\0'):(*s)) - ((*t == '.')?('\0'):(*t)));
459
+ }
460
+
461
+ /**
462
+ * dict_match -- return true if strings match, else false.
463
+ * A "bare" string (one without a suffix) will match any corresponding
464
+ * string with a suffix; so, for example, "make" and "make.n" are
465
+ * a match. If both strings have suffixes, then the suffixes must match.
466
+ *
467
+ * A subscript is the part that followes the last "." in the word, and
468
+ * that does not begin with a digit.
469
+ */
470
+ static int dict_match(const char * s, const char * t)
471
+ {
472
+ char *ds, *dt;
473
+ ds = strrchr(s, '.');
474
+ dt = strrchr(t, '.');
475
+
476
+ /* a dot at the end or a dot followed by a number is NOT
477
+ * considered a subscript */
478
+ if ((dt != NULL) && ((*(dt+1) == '\0') ||
479
+ (isdigit((int)*(dt+1))))) dt = NULL;
480
+ if ((ds != NULL) && ((*(ds+1) == '\0') ||
481
+ (isdigit((int)*(ds+1))))) ds = NULL;
482
+
483
+ /* dt is NULL when there's no prefix ... */
484
+ if (dt == NULL && ds != NULL) {
485
+ if (((int)strlen(t)) > (ds-s)) return FALSE; /* we need to do this to ensure that */
486
+ return (strncmp(s, t, ds-s) == 0); /* "i.e." does not match "i.e" */
487
+ } else if (dt != NULL && ds == NULL) {
488
+ if (((int)strlen(s)) > (dt-t)) return FALSE;
489
+ return (strncmp(s, t, dt-t) == 0);
490
+ } else {
491
+ return (strcmp(s, t) == 0);
492
+ }
493
+ }
494
+
495
+ /* ======================================================================== */
496
+
497
+ static inline Dict_node * dict_node_new(void)
498
+ {
499
+ return (Dict_node*) xalloc(sizeof(Dict_node));
500
+ }
501
+
502
+ static inline void free_dict_node(Dict_node *dn)
503
+ {
504
+ xfree((char *)dn, sizeof(Dict_node));
505
+ }
506
+
507
+ /**
508
+ * prune_lookup_list -- discard all list entries that don't match string
509
+ * Walk the lookup list (of right links), discarding all nodes that do
510
+ * not match the dictionary string s. The matching is dictionary matching:
511
+ * suffixed entries will match "bare" entries.
512
+ */
513
+ static Dict_node * prune_lookup_list(Dict_node *llist, const char * s)
514
+ {
515
+ Dict_node *dn, *dnx, *list_new;
516
+
517
+ list_new = NULL;
518
+ for (dn = llist; dn != NULL; dn = dnx)
519
+ {
520
+ dnx = dn->right;
521
+ /* now put dn onto the answer list, or free it */
522
+ if (dict_match(dn->string, s))
523
+ {
524
+ dn->right = list_new;
525
+ list_new = dn;
526
+ }
527
+ else
528
+ {
529
+ free_dict_node(dn);
530
+ }
531
+ }
532
+
533
+ /* now reverse the list back */
534
+ llist = NULL;
535
+ for (dn = list_new; dn != NULL; dn = dnx)
536
+ {
537
+ dnx = dn->right;
538
+ dn->right = llist;
539
+ llist = dn;
540
+ }
541
+ return llist;
542
+ }
543
+
544
+ void free_lookup_list(Dict_node *llist)
545
+ {
546
+ Dict_node * n;
547
+ while(llist != NULL)
548
+ {
549
+ n = llist->right;
550
+ free_dict_node(llist);
551
+ llist = n;
552
+ }
553
+ }
554
+
555
+ static void free_dict_node_recursive(Dict_node * dn)
556
+ {
557
+ if (dn == NULL) return;
558
+ free_dict_node_recursive(dn->left);
559
+ free_dict_node_recursive(dn->right);
560
+ free_dict_node(dn);
561
+ }
562
+
563
+ /* ======================================================================== */
564
+ /**
565
+ * rdictionary_lookup() -- recursive dictionary lookup
566
+ * Walk binary tree, given by 'dn', looking for the string 's'.
567
+ * For every node in the tree where 's' matches (including wildcards)
568
+ * make a copy of that node, and append it to llist.
569
+ */
570
+ static Dict_node * rdictionary_lookup(Dict_node *llist,
571
+ Dict_node * dn, const char * s, int match_idiom)
572
+ {
573
+ /* see comment in dictionary_lookup below */
574
+ int m;
575
+ Dict_node * dn_new;
576
+ if (dn == NULL) return llist;
577
+ m = dict_order_wild(s, dn->string);
578
+ if (m >= 0)
579
+ {
580
+ llist = rdictionary_lookup(llist, dn->right, s, match_idiom);
581
+ }
582
+ if ((m == 0) && (match_idiom || !is_idiom_word(dn->string)))
583
+ {
584
+ dn_new = dict_node_new();
585
+ *dn_new = *dn;
586
+ dn_new->right = llist;
587
+ llist = dn_new;
588
+ }
589
+ if (m <= 0)
590
+ {
591
+ llist = rdictionary_lookup(llist, dn->left, s, match_idiom);
592
+ }
593
+ return llist;
594
+ }
595
+
596
+ /**
597
+ * dictionary_lookup_list() - return lookup list of words in the dictionary
598
+ *
599
+ * Returns a pointer to a lookup list of the words in the dictionary.
600
+ * Matches include word that appear in idioms. Use
601
+ * abridged_lookup_list() to obtain matches, excluding idioms.
602
+ *
603
+ * This list is made up of Dict_nodes, linked by their right pointers.
604
+ * The node, file and string fields are copied from the dictionary.
605
+ *
606
+ * The returned list must be freed with free_lookup_list().
607
+ */
608
+ Dict_node * dictionary_lookup_list(Dictionary dict, const char *s)
609
+ {
610
+ Dict_node * llist = rdictionary_lookup(NULL, dict->root, s, TRUE);
611
+ llist = prune_lookup_list(llist, s);
612
+ return llist;
613
+ }
614
+
615
+ /**
616
+ * abridged_lookup_list() - return lookup list of words in the dictionary
617
+ *
618
+ * Returns a pointer to a lookup list of the words in the dictionary.
619
+ * Excludes any idioms that contain the word; use
620
+ * dictionary_lookup_list() to obtain the complete list.
621
+ *
622
+ * This list is made up of Dict_nodes, linked by their right pointers.
623
+ * The node, file and string fields are copied from the dictionary.
624
+ *
625
+ * The returned list must be freed with free_lookup_list().
626
+ */
627
+ Dict_node * abridged_lookup_list(Dictionary dict, const char *s)
628
+ {
629
+ Dict_node *llist;
630
+ llist = rdictionary_lookup(NULL, dict->root, s, FALSE);
631
+ llist = prune_lookup_list(llist, s);
632
+ return llist;
633
+ }
634
+
635
+ int boolean_dictionary_lookup(Dictionary dict, const char *s)
636
+ {
637
+ Dict_node *llist = dictionary_lookup_list(dict, s);
638
+ int boool = (llist != NULL);
639
+ free_lookup_list(llist);
640
+ return boool;
641
+ }
642
+
643
+ /* ======================================================================== */
644
+ /**
645
+ * Allocate a new Exp node and link it into the exp_list for freeing later.
646
+ */
647
+ Exp * Exp_create(Dictionary dict)
648
+ {
649
+ Exp * e;
650
+ e = (Exp *) xalloc(sizeof(Exp));
651
+ e->next = dict->exp_list;
652
+ dict->exp_list = e;
653
+ return e;
654
+ }
655
+
656
+ static inline void exp_free(Exp * e)
657
+ {
658
+ xfree((char *)e, sizeof(Exp));
659
+ }
660
+
661
+ /* ======================================================================== */
662
+ /**
663
+ * This creates a node with one child (namely e). Initializes
664
+ * the cost to zero.
665
+ */
666
+ static Exp * make_unary_node(Dictionary dict, Exp * e)
667
+ {
668
+ Exp * n;
669
+ n = Exp_create(dict);
670
+ n->type = AND_type; /* these must be AND types */
671
+ n->cost = 0.0f;
672
+ n->u.l = (E_list *) xalloc(sizeof(E_list));
673
+ n->u.l->next = NULL;
674
+ n->u.l->e = e;
675
+ return n;
676
+ }
677
+
678
+ /**
679
+ * connector() -- make a node for a connector or dictionary word.
680
+ *
681
+ * Assumes the current token is a connector or dictionary word.
682
+ */
683
+ static Exp * connector(Dictionary dict)
684
+ {
685
+ Exp * n;
686
+ Dict_node *dn, *dn_head;
687
+ int i;
688
+
689
+ i = strlen(dict->token) - 1; /* this must be + or - if a connector */
690
+ if ((dict->token[i] != '+') && (dict->token[i] != '-'))
691
+ {
692
+ /* If we are here, token is a word */
693
+ dn_head = abridged_lookup_list(dict, dict->token);
694
+ dn = dn_head;
695
+ while ((dn != NULL) && (strcmp(dn->string, dict->token) != 0))
696
+ {
697
+ dn = dn->right;
698
+ }
699
+ if (dn == NULL)
700
+ {
701
+ free_lookup_list(dn_head);
702
+ dict_error(dict, "\nPerhaps missing + or - in a connector.\n"
703
+ "Or perhaps you forgot the suffix on a word.\n"
704
+ "Or perhaps a word is used before it is defined.\n");
705
+ return NULL;
706
+ }
707
+ n = make_unary_node(dict, dn->exp);
708
+ free_lookup_list(dn_head);
709
+ }
710
+ else
711
+ {
712
+ /* If we are here, token is a connector */
713
+ if (!check_connector(dict, dict->token))
714
+ {
715
+ return NULL;
716
+ }
717
+ n = Exp_create(dict);
718
+ n->dir = dict->token[i];
719
+ dict->token[i] = '\0'; /* get rid of the + or - */
720
+ if (dict->token[0] == '@')
721
+ {
722
+ n->u.string = string_set_add(dict->token+1, dict->string_set);
723
+ n->multi = TRUE;
724
+ }
725
+ else
726
+ {
727
+ n->u.string = string_set_add(dict->token, dict->string_set);
728
+ n->multi = FALSE;
729
+ }
730
+ n->type = CONNECTOR_type;
731
+ n->cost = 0.0f;
732
+ }
733
+
734
+ if (!link_advance(dict))
735
+ {
736
+ exp_free(n);
737
+ return NULL;
738
+ }
739
+ return n;
740
+ }
741
+
742
+ /**
743
+ * This creates a node with zero children. Initializes
744
+ * the cost to zero.
745
+ */
746
+ static Exp * make_zeroary_node(Dictionary dict)
747
+ {
748
+ Exp * n;
749
+ n = Exp_create(dict);
750
+ n->type = AND_type; /* these must be AND types */
751
+ n->cost = 0.0f;
752
+ n->u.l = NULL;
753
+ return n;
754
+ }
755
+
756
+ /**
757
+ * This creates an OR node with two children, one the given node,
758
+ * and the other as zeroary node. This has the effect of creating
759
+ * what used to be called an optional node.
760
+ */
761
+ static Exp * make_optional_node(Dictionary dict, Exp * e)
762
+ {
763
+ Exp * n;
764
+ E_list *el, *elx;
765
+ n = Exp_create(dict);
766
+ n->type = OR_type;
767
+ n->cost = 0.0f;
768
+ n->u.l = el = (E_list *) xalloc(sizeof(E_list));
769
+ el->e = make_zeroary_node(dict);
770
+ el->next = elx = (E_list *) xalloc(sizeof(E_list));
771
+ elx->next = NULL;
772
+ elx->e = e;
773
+ return n;
774
+ }
775
+
776
+ /* ======================================================================== */
777
+
778
+ #if ! defined INFIX_NOTATION
779
+
780
+ Exp * expression(Dictionary dict);
781
+ /**
782
+ * We're looking at the first of the stuff after an "and" or "or".
783
+ * Build a Exp node for this expression. Set the cost and optional
784
+ * fields to the default values. Set the type field according to type
785
+ */
786
+ Exp * operator_exp(Dictionary dict, int type)
787
+ {
788
+ Exp * n;
789
+ E_list first;
790
+ E_list * elist;
791
+ n = Exp_create(dict);
792
+ n->type = type;
793
+ n->cost = 0.0f;
794
+ elist = &first;
795
+ while((!is_equal(dict, ')')) && (!is_equal(dict, ']')) && (!is_equal(dict, '}'))) {
796
+ elist->next = (E_list *) xalloc(sizeof(E_list));
797
+ elist = elist->next;
798
+ elist->next = NULL;
799
+ elist->e = expression(dict);
800
+ if (elist->e == NULL) {
801
+ return NULL;
802
+ }
803
+ }
804
+ if (elist == &first) {
805
+ dict_error(dict, "An \"or\" or \"and\" of nothing");
806
+ return NULL;
807
+ }
808
+ n->u.l = first.next;
809
+ return n;
810
+ }
811
+
812
+ /**
813
+ * Looks for the stuff that is allowed to be inside of parentheses
814
+ * either & or | followed by a list, or a terminal symbol.
815
+ */
816
+ Exp * in_parens(Dictionary dict)
817
+ {
818
+ Exp * e;
819
+
820
+ if (is_equal(dict, '&') || (strcmp(token, "and")==0)) {
821
+ if (!link_advance(dict)) {
822
+ return NULL;
823
+ }
824
+ return operator_exp(dict, AND_type);
825
+ } else if (is_equal(dict, '|') || (strcmp(dict->token, "or")==0)) {
826
+ if (!link_advance(dict)) {
827
+ return NULL;
828
+ }
829
+ return operator_exp(dict, OR_type);
830
+ } else {
831
+ return expression(dict);
832
+ }
833
+ }
834
+
835
+ /**
836
+ * Build (and return the root of) the tree for the expression beginning
837
+ * with the current token. At the end, the token is the first one not
838
+ * part of this expression.
839
+ */
840
+ Exp * expression(Dictionary dict)
841
+ {
842
+ Exp * n;
843
+ if (is_equal(dict, '(')) {
844
+ if (!link_advance(dict)) {
845
+ return NULL;
846
+ }
847
+ n = in_parens(dict);
848
+ if (!is_equal(dict, ')')) {
849
+ dict_error(dict, "Expecting a \")\".");
850
+ return NULL;
851
+ }
852
+ if (!link_advance(dict)) {
853
+ return NULL;
854
+ }
855
+ } else if (is_equal(dict, '{')) {
856
+ if (!link_advance(dict)) {
857
+ return NULL;
858
+ }
859
+ n = in_parens(dict);
860
+ if (!is_equal(dict, '}')) {
861
+ dict_error(dict, "Expecting a \"}\".");
862
+ return NULL;
863
+ }
864
+ if (!link_advance(dict)) {
865
+ return NULL;
866
+ }
867
+ n = make_optional_node(dict, n);
868
+ } else if (is_equal(dict, '[')) {
869
+ if (!link_advance(dict)) {
870
+ return NULL;
871
+ }
872
+ n = in_parens(dict);
873
+ if (!is_equal(dict, ']')) {
874
+ dict_error(dict, "Expecting a \"]\".");
875
+ return NULL;
876
+ }
877
+ if (!link_advance(dict)) {
878
+ return NULL;
879
+ }
880
+ n->cost += 1.0f;
881
+ } else if (!dict->is_special) {
882
+ n = connector(dict);
883
+ if (n == NULL) {
884
+ return NULL;
885
+ }
886
+ } else if (is_equal(dict, ')') || is_equal(dict, ']')) {
887
+ /* allows "()" or "[]" */
888
+ n = make_zeroary_node(dict);
889
+ } else {
890
+ dict_error(dict, "Connector, \"(\", \"[\", or \"{\" expected.");
891
+ return NULL;
892
+ }
893
+ return n;
894
+ }
895
+
896
+ /* ======================================================================== */
897
+ #else /* This is for infix notation */
898
+
899
+ static Exp * restricted_expression(Dictionary dict, int and_ok, int or_ok);
900
+
901
+ /**
902
+ * Build (and return the root of) the tree for the expression beginning
903
+ * with the current token. At the end, the token is the first one not
904
+ * part of this expression.
905
+ */
906
+ static Exp * expression(Dictionary dict)
907
+ {
908
+ return restricted_expression(dict, TRUE, TRUE);
909
+ }
910
+
911
+ static Exp * restricted_expression(Dictionary dict, int and_ok, int or_ok)
912
+ {
913
+ Exp *nl = NULL, *nr;
914
+ E_list *ell, *elr;
915
+
916
+ if (is_equal(dict, '('))
917
+ {
918
+ if (!link_advance(dict)) {
919
+ return NULL;
920
+ }
921
+ nl = expression(dict);
922
+ if (nl == NULL) {
923
+ return NULL;
924
+ }
925
+ if (!is_equal(dict, ')')) {
926
+ dict_error(dict, "Expecting a \")\".");
927
+ return NULL;
928
+ }
929
+ if (!link_advance(dict)) {
930
+ return NULL;
931
+ }
932
+ }
933
+ else if (is_equal(dict, '{'))
934
+ {
935
+ if (!link_advance(dict)) {
936
+ return NULL;
937
+ }
938
+ nl = expression(dict);
939
+ if (nl == NULL) {
940
+ return NULL;
941
+ }
942
+ if (!is_equal(dict, '}')) {
943
+ dict_error(dict, "Expecting a \"}\".");
944
+ return NULL;
945
+ }
946
+ if (!link_advance(dict)) {
947
+ return NULL;
948
+ }
949
+ nl = make_optional_node(dict, nl);
950
+ }
951
+ else if (is_equal(dict, '['))
952
+ {
953
+ if (!link_advance(dict)) {
954
+ return NULL;
955
+ }
956
+ nl = expression(dict);
957
+ if (nl == NULL) {
958
+ return NULL;
959
+ }
960
+ if (!is_equal(dict, ']')) {
961
+ dict_error(dict, "Expecting a \"]\".");
962
+ return NULL;
963
+ }
964
+ if (!link_advance(dict)) {
965
+ return NULL;
966
+ }
967
+ nl->cost += 1.0f;
968
+ }
969
+ else if (!dict->is_special)
970
+ {
971
+ nl = connector(dict);
972
+ if (nl == NULL) {
973
+ return NULL;
974
+ }
975
+ }
976
+ else if (is_equal(dict, ')') || is_equal(dict, ']'))
977
+ {
978
+ /* allows "()" or "[]" */
979
+ nl = make_zeroary_node(dict);
980
+ }
981
+ else
982
+ {
983
+ dict_error(dict, "Connector, \"(\", \"[\", or \"{\" expected.");
984
+ return NULL;
985
+ }
986
+
987
+ if (is_equal(dict, '&') || (strcmp(dict->token, "and") == 0))
988
+ {
989
+ Exp *n;
990
+
991
+ if (!and_ok) {
992
+ warning(dict, "\"and\" and \"or\" at the same level in an expression");
993
+ }
994
+ if (!link_advance(dict)) {
995
+ return NULL;
996
+ }
997
+ nr = restricted_expression(dict, TRUE, FALSE);
998
+ if (nr == NULL) {
999
+ return NULL;
1000
+ }
1001
+ n = Exp_create(dict);
1002
+ n->u.l = ell = (E_list *) xalloc(sizeof(E_list));
1003
+ ell->next = elr = (E_list *) xalloc(sizeof(E_list));
1004
+ elr->next = NULL;
1005
+
1006
+ ell->e = nl;
1007
+ elr->e = nr;
1008
+ n->type = AND_type;
1009
+ n->cost = 0.0f;
1010
+ return n;
1011
+ }
1012
+ else if (is_equal(dict, '|') || (strcmp(dict->token, "or") == 0))
1013
+ {
1014
+ Exp *n;
1015
+
1016
+ if (!or_ok) {
1017
+ warning(dict, "\"and\" and \"or\" at the same level in an expression");
1018
+ }
1019
+ if (!link_advance(dict)) {
1020
+ return NULL;
1021
+ }
1022
+ nr = restricted_expression(dict, FALSE,TRUE);
1023
+ if (nr == NULL) {
1024
+ return NULL;
1025
+ }
1026
+ n = Exp_create(dict);
1027
+ n->u.l = ell = (E_list *) xalloc(sizeof(E_list));
1028
+ ell->next = elr = (E_list *) xalloc(sizeof(E_list));
1029
+ elr->next = NULL;
1030
+
1031
+ ell->e = nl;
1032
+ elr->e = nr;
1033
+ n->type = OR_type;
1034
+ n->cost = 0.0f;
1035
+ return n;
1036
+ }
1037
+
1038
+ return nl;
1039
+ }
1040
+
1041
+ #endif
1042
+
1043
+ /* ======================================================================== */
1044
+ /* Tree balancing utilities, used to implement an AVL tree.
1045
+ * Unfortunately, AVL tree insertion is very slowww, unusably
1046
+ * slow for creating the dictionary. The code is thus ifdef'ed out
1047
+ * but is left here for debugging and other sundry purposes.
1048
+ * A better way to rebalance the tree is the DSW algo, implemented
1049
+ * further below.
1050
+ */
1051
+
1052
+ static Dict_node *rotate_right(Dict_node *root)
1053
+ {
1054
+ Dict_node *pivot = root->left;
1055
+ root->left = pivot->right;
1056
+ pivot->right = root;
1057
+ return pivot;
1058
+ }
1059
+
1060
+ #ifdef USE_AVL_TREE_FOR_INSERTION
1061
+
1062
+ static Dict_node *rotate_left(Dict_node *root)
1063
+ {
1064
+ Dict_node *pivot = root->right;
1065
+ root->right = pivot->left;
1066
+ pivot->left = root;
1067
+ return pivot;
1068
+ }
1069
+
1070
+ /* Return tree height. XXX this is not tail-recursive! */
1071
+ static int tree_depth (Dict_node *n)
1072
+ {
1073
+ int l, r;
1074
+ if (NULL == n) return 0;
1075
+ if (NULL == n->left) return 1+tree_depth(n->right);
1076
+ if (NULL == n->right) return 1+tree_depth(n->left);
1077
+ l = tree_depth(n->left);
1078
+ r = tree_depth(n->right);
1079
+ if (l < r) return r+1;
1080
+ return l+1;
1081
+ }
1082
+
1083
+ static int tree_balance(Dict_node *n)
1084
+ {
1085
+ int l = tree_depth(n->left);
1086
+ int r = tree_depth(n->right);
1087
+ return r-l;
1088
+ }
1089
+
1090
+ /**
1091
+ * Rebalance the dictionary tree.
1092
+ * This recomputes the tree depth wayy too often, but so what.. this
1093
+ * only wastes cpu time during the initial dictinary read.
1094
+ */
1095
+ static Dict_node *rebalance(Dict_node *root)
1096
+ {
1097
+ int bal = tree_balance(root);
1098
+ if (2 == bal)
1099
+ {
1100
+ bal = tree_balance(root->right);
1101
+ if (-1 == bal)
1102
+ {
1103
+ root->right = rotate_right (root->right);
1104
+ }
1105
+ return rotate_left(root);
1106
+ }
1107
+ else if (-2 == bal)
1108
+ {
1109
+ bal = tree_balance(root->left);
1110
+ if (1 == bal)
1111
+ {
1112
+ root->left = rotate_left (root->left);
1113
+ }
1114
+ return rotate_right(root);
1115
+ }
1116
+ return root;
1117
+ }
1118
+
1119
+ #endif /* USE_AVL_TREE_FOR_INSERTION */
1120
+
1121
+ /* ======================================================================== */
1122
+ /* Implementation of the DSW algo for rebalancing a binary tree.
1123
+ * The point is -- after building the dictionary tree, we rebalance it
1124
+ * once at the end. This is a **LOT LOT** quicker than maintaing an
1125
+ * AVL tree along the way (less than quarter-of-a-second vs. about
1126
+ * a minute or more!) FWIW, the DSW tree is even more balanced than
1127
+ * the AVL tree is (its less deep, more full).
1128
+ *
1129
+ * The DSW algo, with C++ code, is described in
1130
+ *
1131
+ * Timothy J. Rolfe, "One-Time Binary Search Tree Balancing:
1132
+ * The Day/Stout/Warren (DSW) Algorithm", inroads, Vol. 34, No. 4
1133
+ * (December 2002), pp. 85-88
1134
+ * http://penguin.ewu.edu/~trolfe/DSWpaper/
1135
+ */
1136
+
1137
+ static Dict_node * dsw_tree_to_vine (Dict_node *root)
1138
+ {
1139
+ Dict_node *vine_tail, *vine_head, *rest;
1140
+ Dict_node vh;
1141
+
1142
+ vine_head = &vh;
1143
+ vine_head->left = NULL;
1144
+ vine_head->right = root;
1145
+ vine_tail = vine_head;
1146
+ rest = root;
1147
+
1148
+ while (NULL != rest)
1149
+ {
1150
+ /* If no left, we are done, do the right */
1151
+ if (NULL == rest->left)
1152
+ {
1153
+ vine_tail = rest;
1154
+ rest = rest->right;
1155
+ }
1156
+ /* eliminate the left subtree */
1157
+ else
1158
+ {
1159
+ rest = rotate_right(rest);
1160
+ vine_tail->right = rest;
1161
+ }
1162
+ }
1163
+
1164
+ return vh.right;
1165
+ }
1166
+
1167
+ static void dsw_compression (Dict_node *root, unsigned int count)
1168
+ {
1169
+ unsigned int j;
1170
+ for (j = 0; j < count; j++)
1171
+ {
1172
+ /* Compound left rotation */
1173
+ Dict_node * pivot = root->right;
1174
+ root->right = pivot->right;
1175
+ root = pivot->right;
1176
+ pivot->right = root->left;
1177
+ root->left = pivot;
1178
+ }
1179
+ }
1180
+
1181
+ /* Return size of the full portion of the tree
1182
+ * Gets the next pow(2,k)-1
1183
+ */
1184
+ static inline unsigned int full_tree_size (unsigned int size)
1185
+ {
1186
+ unsigned int pk = 1;
1187
+ while (pk < size) pk = 2*pk + 1;
1188
+ return pk/2;
1189
+ }
1190
+
1191
+ static Dict_node * dsw_vine_to_tree (Dict_node *root, int size)
1192
+ {
1193
+ Dict_node vine_head;
1194
+ unsigned int full_count = full_tree_size(size +1);
1195
+
1196
+ vine_head.left = NULL;
1197
+ vine_head.right = root;
1198
+
1199
+ dsw_compression(&vine_head, size - full_count);
1200
+ for (size = full_count ; size > 1 ; size /= 2)
1201
+ {
1202
+ dsw_compression(&vine_head, size / 2);
1203
+ }
1204
+ return vine_head.right;
1205
+ }
1206
+
1207
+ /* ======================================================================== */
1208
+ /**
1209
+ * Insert the new node into the dictionary below node n.
1210
+ * Give error message if the new element's string is already there.
1211
+ * Assumes that the "n" field of new is already set, and the left
1212
+ * and right fields of it are NULL.
1213
+ *
1214
+ * The resulting tree is highly unbalanced. It needs to be rebalanced
1215
+ * before used.
1216
+ */
1217
+ Dict_node * insert_dict(Dictionary dict, Dict_node * n, Dict_node * newnode)
1218
+ {
1219
+ int comp;
1220
+
1221
+ if (NULL == n) return newnode;
1222
+
1223
+ comp = dict_order(newnode->string, n->string);
1224
+ if (comp < 0)
1225
+ {
1226
+ if (NULL == n->left)
1227
+ {
1228
+ n->left = newnode;
1229
+ return n;
1230
+ }
1231
+ n->left = insert_dict(dict, n->left, newnode);
1232
+ return n;
1233
+ /* return rebalance(n); Uncomment to get an AVL tree */
1234
+ }
1235
+ else if (comp > 0)
1236
+ {
1237
+ if (NULL == n->right)
1238
+ {
1239
+ n->right = newnode;
1240
+ return n;
1241
+ }
1242
+ n->right = insert_dict(dict, n->right, newnode);
1243
+ return n;
1244
+ /* return rebalance(n); Uncomment to get an AVL tree */
1245
+ }
1246
+ else
1247
+ {
1248
+ char t[256];
1249
+ snprintf(t, 256, "The word \"%s\" has been multiply defined\n", newnode->string);
1250
+ dict_error(dict, t);
1251
+ return NULL;
1252
+ }
1253
+ }
1254
+
1255
+ /**
1256
+ * insert_list() -
1257
+ * p points to a list of dict_nodes connected by their left pointers.
1258
+ * l is the length of this list (the last ptr may not be NULL).
1259
+ * It inserts the list into the dictionary.
1260
+ * It does the middle one first, then the left half, then the right.
1261
+ *
1262
+ * Note: I think this insert middle, then left, then right, has
1263
+ * its origins as a lame attempt to hack around the fact that the
1264
+ * resulting binary tree is rather badly unbalanced. This has been
1265
+ * fixed by using the DSW rebalancing algo. Now, that would seem
1266
+ * to render this crazy bisected-insertion algo obsoloete, but ..
1267
+ * oddly enough, it seems to make the DSW balancing go really fast!
1268
+ * Faster than a simple insertion. Go figure. I think this has
1269
+ * something to do with the fact that the dictionaries are in
1270
+ * alphabetical order! This subdivision helps randomize a bit.
1271
+ */
1272
+ static void insert_list(Dictionary dict, Dict_node * p, int l)
1273
+ {
1274
+ Dict_node * dn, *dn_head, *dn_second_half;
1275
+ int k, i; /* length of first half */
1276
+
1277
+ if (l == 0) return;
1278
+
1279
+ k = (l-1)/2;
1280
+ dn = p;
1281
+ for (i = 0; i < k; i++)
1282
+ {
1283
+ dn = dn->left;
1284
+ }
1285
+
1286
+ /* dn now points to the middle element */
1287
+ dn_second_half = dn->left;
1288
+ dn->left = dn->right = NULL;
1289
+
1290
+ if (contains_underbar(dn->string))
1291
+ {
1292
+ insert_idiom(dict, dn);
1293
+ }
1294
+ else if (is_idiom_word(dn->string))
1295
+ {
1296
+ err_ctxt ec;
1297
+ ec.sent = NULL;
1298
+ err_msg(&ec, Warn, "Warning: Word \"%s\" found near line %d.\n"
1299
+ "\tWords ending \".Ix\" (x a number) are reserved for idioms.\n"
1300
+ "\tThis word will be ignored.\n",
1301
+ dn->string, dict->line_number);
1302
+ free_dict_node(dn);
1303
+ }
1304
+ else if ((dn_head = abridged_lookup_list(dict, dn->string)) != NULL)
1305
+ {
1306
+ Dict_node *dnx;
1307
+ err_ctxt ec;
1308
+ ec.sent = NULL;
1309
+ err_msg(&ec, Warn, "Warning: The word \"%s\" "
1310
+ "found near line %d of %s matches the following words:\n",
1311
+ dn->string, dict->line_number, dict->name);
1312
+ for (dnx = dn_head; dnx != NULL; dnx = dnx->right) {
1313
+ fprintf(stderr, "\t%s", dnx->string);
1314
+ }
1315
+ fprintf(stderr, "\n\tThis word will be ignored.\n");
1316
+ free_lookup_list(dn_head);
1317
+ free_dict_node(dn);
1318
+ }
1319
+ else
1320
+ {
1321
+ dict->root = insert_dict(dict, dict->root, dn);
1322
+ dict->num_entries++;
1323
+ }
1324
+
1325
+ insert_list(dict, p, k);
1326
+ insert_list(dict, dn_second_half, l-k-1);
1327
+ }
1328
+
1329
+ /**
1330
+ * read_entry() -- read one dictionary entry
1331
+ * Starting with the current token parse one dictionary entry.
1332
+ * Add these words to the dictionary.
1333
+ */
1334
+ static int read_entry(Dictionary dict)
1335
+ {
1336
+ Exp *n;
1337
+ int i;
1338
+
1339
+ Dict_node *dn_new, *dnx, *dn = NULL;
1340
+
1341
+ /* Reset multi-byte shift state every line. */
1342
+ memset(&dict->mbss, 0, sizeof(dict->mbss));
1343
+
1344
+ while (!is_equal(dict, ':'))
1345
+ {
1346
+ if (dict->is_special)
1347
+ {
1348
+ dict_error(dict, "I expected a word but didn\'t get it.");
1349
+ return 0;
1350
+ }
1351
+
1352
+ /* if it's a word-file name */
1353
+ /* However, be careful to reject "/.v" which is the division symbol
1354
+ * used in equations (.v means verb-like) */
1355
+ if ((dict->token[0] == '/') && (dict->token[1] != '.'))
1356
+ {
1357
+ dn = read_word_file(dict, dn, dict->token);
1358
+ if (dn == NULL)
1359
+ {
1360
+ err_ctxt ec;
1361
+ ec.sent = NULL;
1362
+ err_msg(&ec, Error, "Error opening word file %s\n", dict->token);
1363
+ return 0;
1364
+ }
1365
+ }
1366
+ else
1367
+ {
1368
+ dn_new = dict_node_new();
1369
+ dn_new->left = dn;
1370
+ dn = dn_new;
1371
+ dn->file = NULL;
1372
+ dn->string = string_set_add(dict->token, dict->string_set);
1373
+ }
1374
+
1375
+ /* Advance to next entry, unless error */
1376
+ if (0 == link_advance(dict)) goto syntax_error;
1377
+ }
1378
+
1379
+ /* pass the : */
1380
+ if (!link_advance(dict))
1381
+ {
1382
+ goto syntax_error;
1383
+ }
1384
+
1385
+ n = expression(dict);
1386
+ if (n == NULL)
1387
+ {
1388
+ goto syntax_error;
1389
+ }
1390
+
1391
+ if (!is_equal(dict, ';'))
1392
+ {
1393
+ dict_error(dict, "Expecting \";\" at the end of an entry.");
1394
+ goto syntax_error;
1395
+ }
1396
+
1397
+ /* pass the ; */
1398
+ if (!link_advance(dict))
1399
+ {
1400
+ goto syntax_error;
1401
+ }
1402
+
1403
+ /* At this point, dn points to a list of Dict_nodes connected by
1404
+ * their left pointers. These are to be inserted into the dictionary */
1405
+ i = 0;
1406
+ for (dnx = dn; dnx != NULL; dnx = dnx->left)
1407
+ {
1408
+ dnx->exp = n;
1409
+ i++;
1410
+ }
1411
+ insert_list(dict, dn, i);
1412
+ return 1;
1413
+
1414
+ syntax_error:
1415
+ free_lookup_list(dn);
1416
+ return 0;
1417
+ }
1418
+
1419
+ #if ! defined INFIX_NOTATION
1420
+ /**
1421
+ * print the expression, in prefix-style
1422
+ */
1423
+ void print_expression(Exp * n)
1424
+ {
1425
+ E_list * el;
1426
+ int i, icost;
1427
+
1428
+ if (n == NULL)
1429
+ {
1430
+ printf("NULL expression");
1431
+ return;
1432
+ }
1433
+
1434
+ icost = (int) (n->cost);
1435
+ if (n->type == CONNECTOR_type)
1436
+ {
1437
+ for (i=0; i<icost; i++) printf("[");
1438
+ if (n->multi) printf("@");
1439
+ printf("%s%c",n->u.string, n->dir);
1440
+ for (i=0; i<icost; i++) printf("]");
1441
+ if (icost > 0) printf(" ");
1442
+ }
1443
+ else
1444
+ {
1445
+ for (i=0; i<icost; i++) printf("[");
1446
+ if (icost == 0) printf("(");
1447
+ if (n->type == AND_type) printf("& ");
1448
+ if (n->type == OR_type) printf("or ");
1449
+ for (el = n->u.l; el != NULL; el = el->next)
1450
+ {
1451
+ print_expression(el->e);
1452
+ }
1453
+ for (i=0; i<icost; i++) printf("]");
1454
+ if (icost > 0) printf(" ");
1455
+ if (icost == 0) printf(") ");
1456
+ }
1457
+ }
1458
+
1459
+ #else /* INFIX_NOTATION */
1460
+
1461
+ /**
1462
+ * print the expression, in infix-style
1463
+ */
1464
+ static void print_expression_parens(Exp * n, int need_parens)
1465
+ {
1466
+ E_list * el;
1467
+ int i, icost;
1468
+
1469
+ if (n == NULL)
1470
+ {
1471
+ printf("NULL expression");
1472
+ return;
1473
+ }
1474
+
1475
+ icost = (int) (n->cost);
1476
+ /* print the connector only */
1477
+ if (n->type == CONNECTOR_type)
1478
+ {
1479
+ for (i=0; i<icost; i++) printf("[");
1480
+ if (n->multi) printf("@");
1481
+ printf("%s%c",n->u.string, n->dir);
1482
+ for (i=0; i<icost; i++) printf("]");
1483
+ return;
1484
+ }
1485
+
1486
+ /* Look for optional, and print only that */
1487
+ el = n->u.l;
1488
+ if (el == NULL)
1489
+ {
1490
+ for (i=0; i<icost; i++) printf("[");
1491
+ printf ("()");
1492
+ for (i=0; i<icost; i++) printf("]");
1493
+ return;
1494
+ }
1495
+
1496
+ for (i=0; i<icost; i++) printf("[");
1497
+ if ((n->type == OR_type) &&
1498
+ el && el->e && (NULL == el->e->u.l))
1499
+ {
1500
+ printf ("{");
1501
+ print_expression_parens(el->next->e, FALSE);
1502
+ printf ("}");
1503
+ return;
1504
+ }
1505
+
1506
+ if ((icost == 0) && need_parens) printf("(");
1507
+
1508
+ /* print left side of binary expr */
1509
+ print_expression_parens(el->e, TRUE);
1510
+
1511
+ /* get a funny "and optional" when its a named expression thing. */
1512
+ if ((n->type == AND_type) && (el->next == NULL))
1513
+ {
1514
+ return;
1515
+ }
1516
+
1517
+ if (n->type == AND_type) printf(" & ");
1518
+ if (n->type == OR_type) printf(" or ");
1519
+
1520
+ /* print right side of binary expr */
1521
+ el = el->next;
1522
+ if (el == NULL)
1523
+ {
1524
+ printf ("()");
1525
+ }
1526
+ else
1527
+ {
1528
+ if (el->e->type == n->type)
1529
+ {
1530
+ print_expression_parens(el->e, FALSE);
1531
+ }
1532
+ else
1533
+ {
1534
+ print_expression_parens(el->e, TRUE);
1535
+ }
1536
+ if (el->next != NULL)
1537
+ printf ("\nERROR! Unexpected list!\n");
1538
+ }
1539
+
1540
+ for (i=0; i<icost; i++) printf("]");
1541
+ if ((icost == 0) && need_parens) printf(")");
1542
+ }
1543
+
1544
+ void print_expression(Exp * n)
1545
+ {
1546
+ print_expression_parens(n, FALSE);
1547
+ printf("\n");
1548
+ }
1549
+ #endif /* INFIX_NOTATION */
1550
+
1551
+ static void rprint_dictionary_data(Dict_node * n)
1552
+ {
1553
+ if (n == NULL) return;
1554
+ rprint_dictionary_data(n->left);
1555
+ printf("%s: ", n->string);
1556
+ print_expression(n->exp);
1557
+ printf("\n");
1558
+ rprint_dictionary_data(n->right);
1559
+ }
1560
+
1561
+ /**
1562
+ * Dump the entire contents of the dictionary
1563
+ * XXX This is not currently called by anything, but is a "good thing
1564
+ * to keep around".
1565
+ */
1566
+ void print_dictionary_data(Dictionary dict)
1567
+ {
1568
+ rprint_dictionary_data(dict->root);
1569
+ }
1570
+
1571
+ int read_dictionary(Dictionary dict)
1572
+ {
1573
+ if (!link_advance(dict))
1574
+ {
1575
+ return 0;
1576
+ }
1577
+ while (dict->token[0] != '\0')
1578
+ {
1579
+ if (!read_entry(dict))
1580
+ {
1581
+ return 0;
1582
+ }
1583
+ }
1584
+ dict->root = dsw_tree_to_vine(dict->root);
1585
+ dict->root = dsw_vine_to_tree(dict->root, dict->num_entries);
1586
+ return 1;
1587
+ }
1588
+
1589
+ /* ======================================================================= */
1590
+ /* the following functions are for handling deletion */
1591
+ /**
1592
+ * Returns true if it finds a non-idiom dict_node in a file that matches
1593
+ * the string s.
1594
+ *
1595
+ ** note: this now DOES include non-file words in its search.
1596
+ *
1597
+ * Also sets parent and to_be_deleted appropriately.
1598
+ */
1599
+ static int find_one_non_idiom_node(Dict_node * p, Dict_node * dn,
1600
+ const char * s,
1601
+ Dict_node **parent, Dict_node **to_be_deleted)
1602
+ {
1603
+ int m;
1604
+ if (dn == NULL) return FALSE;
1605
+ m = dict_order_wild(s, dn->string);
1606
+ if (m <= 0) {
1607
+ if (find_one_non_idiom_node(dn,dn->left, s, parent, to_be_deleted)) return TRUE;
1608
+ }
1609
+ /* if ((m == 0) && (!is_idiom_word(dn->string)) && (dn->file != NULL)) { */
1610
+ if ((m == 0) && (!is_idiom_word(dn->string))) {
1611
+ *to_be_deleted = dn;
1612
+ *parent = p;
1613
+ return TRUE;
1614
+ }
1615
+ if (m >= 0) {
1616
+ if (find_one_non_idiom_node(dn,dn->right, s, parent, to_be_deleted)) return TRUE;
1617
+ }
1618
+ return FALSE;
1619
+ }
1620
+
1621
+ static void set_parent_of_node(Dictionary dict,
1622
+ Dict_node *p,
1623
+ Dict_node * del,
1624
+ Dict_node * newnode)
1625
+ {
1626
+ if (p == NULL) {
1627
+ dict->root = newnode;
1628
+ } else {
1629
+ if (p->left == del) {
1630
+ p->left = newnode;
1631
+ } else if (p->right == del) {
1632
+ p->right = newnode;
1633
+ } else {
1634
+ assert(FALSE, "Dictionary broken?");
1635
+ }
1636
+ }
1637
+ }
1638
+
1639
+ /**
1640
+ * This deletes all the non-idiom words of the dictionary that match
1641
+ * the given string. Returns TRUE if some deleted, FALSE otherwise.
1642
+ *
1643
+ * XXX Note: this function is not currently used anywhere in the code,
1644
+ * but it could be useful for general dictionary editing.
1645
+ */
1646
+ int delete_dictionary_words(Dictionary dict, const char * s)
1647
+ {
1648
+ Dict_node *pred, *pred_parent;
1649
+ Dict_node *parent, *to_be_deleted;
1650
+
1651
+ if (!find_one_non_idiom_node(NULL, dict->root, s, &parent, &to_be_deleted)) return FALSE;
1652
+ for(;;) {
1653
+ /* now parent and to_be_deleted are set */
1654
+ if (to_be_deleted->file != NULL) {
1655
+ to_be_deleted->file->changed = TRUE;
1656
+ }
1657
+ if (to_be_deleted->left == NULL) {
1658
+ set_parent_of_node(dict, parent, to_be_deleted, to_be_deleted->right);
1659
+ free_dict_node(to_be_deleted);
1660
+ } else {
1661
+ pred_parent = to_be_deleted;
1662
+ pred = to_be_deleted->left;
1663
+ while(pred->right != NULL) {
1664
+ pred_parent = pred;
1665
+ pred = pred->right;
1666
+ }
1667
+ to_be_deleted->string = pred->string;
1668
+ to_be_deleted->file = pred->file;
1669
+ to_be_deleted->exp = pred->exp;
1670
+ set_parent_of_node(dict, pred_parent, pred, pred->left);
1671
+ free_dict_node(pred);
1672
+ }
1673
+ if (!find_one_non_idiom_node(NULL, dict->root, s, &parent, &to_be_deleted)) return TRUE;
1674
+ }
1675
+ }
1676
+
1677
+ static void free_Word_file(Word_file * wf)
1678
+ {
1679
+ Word_file *wf1;
1680
+
1681
+ for (;wf != NULL; wf = wf1) {
1682
+ wf1 = wf->next;
1683
+ xfree((char *) wf, sizeof(Word_file));
1684
+ }
1685
+ }
1686
+
1687
+ /**
1688
+ * The following two functions free the Exp s and the
1689
+ * E_lists of the dictionary. Not to be confused with
1690
+ * free_E_list in utilities.c
1691
+ */
1692
+ static void free_Elist(E_list * l)
1693
+ {
1694
+ E_list * l1;
1695
+
1696
+ for (; l != NULL; l = l1) {
1697
+ l1 = l->next;
1698
+ xfree(l, sizeof(E_list));
1699
+ }
1700
+ }
1701
+
1702
+ static void free_Exp_list(Exp * e)
1703
+ {
1704
+ Exp * e1;
1705
+ for (; e != NULL; e = e1)
1706
+ {
1707
+ e1 = e->next;
1708
+ if (e->type != CONNECTOR_type)
1709
+ {
1710
+ free_Elist(e->u.l);
1711
+ }
1712
+ exp_free(e);
1713
+ }
1714
+ }
1715
+
1716
+ void free_dictionary(Dictionary dict)
1717
+ {
1718
+ free_dict_node_recursive(dict->root);
1719
+ free_Word_file(dict->word_file_header);
1720
+ free_Exp_list(dict->exp_list);
1721
+ }
1722
+
1723
+ /**
1724
+ * dict_display_word_info() - display the information about the given word.
1725
+ */
1726
+ void dict_display_word_info(Dictionary dict, const char * s)
1727
+ {
1728
+ Dict_node *dn, *dn_head;
1729
+ Disjunct * d1, * d2;
1730
+ int len;
1731
+ dn_head = dictionary_lookup_list(dict, s);
1732
+ if (dn_head == NULL)
1733
+ {
1734
+ printf(" \"%s\" matches nothing in the dictionary.\n", s);
1735
+ return;
1736
+ }
1737
+ printf("Matches:\n");
1738
+ for (dn = dn_head; dn != NULL; dn = dn->right)
1739
+ {
1740
+ len = 0;
1741
+ d1 = build_disjuncts_for_dict_node(dn);
1742
+ for(d2 = d1 ; d2 != NULL; d2 = d2->next)
1743
+ {
1744
+ len++;
1745
+ }
1746
+ free_disjuncts(d1);
1747
+ printf(" ");
1748
+ left_print_string(stdout, dn->string,
1749
+ " ");
1750
+ printf(" %5d disjuncts ", len);
1751
+ if (dn->file != NULL)
1752
+ {
1753
+ printf("<%s>", dn->file->file);
1754
+ }
1755
+ printf("\n");
1756
+ }
1757
+ free_lookup_list(dn_head);
1758
+ return;
1759
+ }
1760
+
1761
+ /**
1762
+ * dict_display_word_expr() - display the connector info for a given word.
1763
+ */
1764
+ void dict_display_word_expr(Dictionary dict, const char * s)
1765
+ {
1766
+ Dict_node *dn, *dn_head;
1767
+
1768
+ dn_head = dictionary_lookup_list(dict, s);
1769
+ if (dn_head == NULL)
1770
+ {
1771
+ printf(" \"%s\" matches nothing in the dictionary.\n", s);
1772
+ return;
1773
+ }
1774
+ printf("\nExpressions:\n");
1775
+ for (dn = dn_head; dn != NULL; dn = dn->right)
1776
+ {
1777
+ printf(" ");
1778
+ left_print_string(stdout, dn->string,
1779
+ " ");
1780
+ print_expression(dn->exp);
1781
+ printf("\n\n");
1782
+ }
1783
+ free_lookup_list(dn_head);
1784
+ return;
1785
+ }