grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,17 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ void prune(Sentence sent);
14
+ int power_prune(Sentence sent, int mode, Parse_Options opts);
15
+ void pp_and_power_prune(Sentence sent, int mode, Parse_Options opts);
16
+ int prune_match(int dist, Connector * left, Connector * right);
17
+ void expression_prune(Sentence sent);
@@ -0,0 +1,1785 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #include <limits.h>
15
+ #include <string.h>
16
+ #include <wchar.h>
17
+ #include <wctype.h>
18
+ #include "api.h"
19
+ #include "disjunct-utils.h"
20
+ #include "error.h"
21
+
22
+ const char * linkgrammar_get_version(void)
23
+ {
24
+ const char *s = "link-grammar-" LINK_VERSION_STRING;
25
+ return s;
26
+ }
27
+
28
+ const char * linkgrammar_get_dict_version(Dictionary dict)
29
+ {
30
+ static char * ver = NULL;
31
+ char * p;
32
+ Dict_node *dn;
33
+ Exp *e;
34
+
35
+ if (ver) return ver;
36
+
37
+ /* The newer dictionaries should contain a macro of the form:
38
+ * <dictionary-version-number>: V4v6v6+;
39
+ * which would indicate dictionary verison 4.6.6
40
+ * Older dictionaries contain no version info.
41
+ */
42
+ dn = dictionary_lookup_list(dict, "<dictionary-version-number>");
43
+ if (NULL == dn) return "[unknown]";
44
+
45
+ e = dn->exp;
46
+ ver = strdup(&e->u.string[1]);
47
+ p = strchr(ver, 'v');
48
+ while (p)
49
+ {
50
+ *p = '.';
51
+ p = strchr(p+1, 'v');
52
+ }
53
+
54
+ free_lookup_list(dn);
55
+ return ver;
56
+ }
57
+
58
+
59
+ /*
60
+ The dictionary format:
61
+
62
+ In what follows:
63
+ Every "%" symbol and everything after it is ignored on every line.
64
+ Every newline or tab is replaced by a space.
65
+
66
+ The dictionary file is a sequence of ENTRIES. Each ENTRY is one or
67
+ more WORDS (a sequence of upper or lower case letters) separated by
68
+ spaces, followed by a ":", followed by an EXPRESSION followed by a
69
+ ";". An EXPRESSION is an expression where the operators are "&"
70
+ or "and" or "|" or "or", and there are three types of parentheses:
71
+ "()", "{}", and "[]". The terminal symbols of this grammar are the
72
+ connectors, which are strings of letters or numbers or *s.
73
+ Expressions may be written in prefix or infix form. In prefix-form,
74
+ the expressions are lisp-like, with the operators &, | preceeding
75
+ the operands. In infix-form, the operators are in the middle. The
76
+ current dictionaries are in infix form. If the C preprocessor
77
+ constant INFIX_NOTATION is defined, then the dictionary is assumed
78
+ to be in infix form.
79
+
80
+ The connector begins with an optinal @, which is followed by an upper
81
+ case sequence of letters. Each subsequent *, lower case letter or
82
+ number is a subscript. At the end is a + or - sign. The "@" allows
83
+ this connector to attach to one or more other connectors.
84
+
85
+ Here is a sample dictionary entry (in infix form):
86
+
87
+ gone: T- & {@EV+};
88
+
89
+ (See our paper for more about how to interpret the meaning of the
90
+ dictionary expressions.)
91
+
92
+ A previously defined word (such as "gone" above) may be used instead
93
+ of a connector to specify the expression it was defined to be. Of
94
+ course, in this case, it must uniquely specify a word in the
95
+ dictionary, and have been previously defined.
96
+
97
+ If a word is of the form "/foo", then the file current-dir/foo
98
+ is a so-called word file, and is read in as a list of words.
99
+ A word file is just a list of words separted by blanks or newlines.
100
+
101
+ A word that contains the character "_" defines an idiomatic use of
102
+ the words separated by the "_". For example "kind of" is an idiomatic
103
+ expression, so a word "kind_of" is defined in the dictionary.
104
+ Idomatic expressions of any number of words can be defined in this way.
105
+ When the word "kind" is encountered, all the idiomatic uses of the word
106
+ are considered.
107
+
108
+ An expresion enclosed in "[..]" is give a cost of 1. This means
109
+ that if any of the connectors inside the square braces are used,
110
+ a cost of 1 is incurred. (This cost is the first element of the cost
111
+ vector printed when a sentence is parsed.) Of course if something is
112
+ inside of 10 levels of "[..]" then using it incurs a cost of 10.
113
+ These costs are called "disjunct costs". The linkages are printed out
114
+ in order of non-increasing disjunct cost.
115
+
116
+ The expression "(A+ or ())" means that you can choose either "A+" or
117
+ the empty expression "()", that is, that the connector "A+" is
118
+ optional. This is more compactly expressed as "{A+}". In other words,
119
+ curly braces indicate an optional expression.
120
+
121
+ The expression "(A+ or [])" is the same as that above, but there is a
122
+ cost of 1 incurred for choosing not to use "A+". The expression
123
+ "(EXP1 & [EXP2])" is exactly the same as "[EXP1 & EXP2]". The difference
124
+ between "({[A+]} & B+)" and "([{A+}] & B+)" is that the latter always
125
+ incurrs a cost of 1, while the former only gets a cost of 1 if "A+" is
126
+ used.
127
+
128
+ The dictionary writer is not allowed to use connectors that begin in
129
+ "ID". This is reserved for the connectors automatically
130
+ generated for idioms.
131
+
132
+ Dictionary words may be followed by a dot (period, "."), and a "subscript"
133
+ identifying the word type. The subscript may be one or more letters or
134
+ numbers, but must begin with a letter. Currently, the dictionary contains
135
+ (mostly?) subscripts consisting of a single letter, and these serve mostly
136
+ to identify the part-of-speech. In general, subscripts can also be used
137
+ to distinguish different word senses.
138
+ */
139
+
140
+ static int link_advance(Dictionary dict);
141
+
142
+ static void dict_error2(Dictionary dict, const char * s, const char *s2)
143
+ {
144
+ int i;
145
+ char tokens[1024], t[128];
146
+
147
+ if (dict->recursive_error) return;
148
+ dict->recursive_error = TRUE;
149
+
150
+ tokens[0] = '\0';
151
+ for (i=0; i<5 && dict->token[0] != '\0' ; i++)
152
+ {
153
+ sprintf(t, "\"%s\" ", dict->token);
154
+ strcat(tokens, t);
155
+ link_advance(dict);
156
+ }
157
+ if (s2)
158
+ {
159
+ err_ctxt ec;
160
+ ec.sent = NULL;
161
+ err_msg(&ec, Error, "Error parsing dictionary %s.\n"
162
+ "%s %s\n\t line %d, tokens = %s\n",
163
+ dict->name,
164
+ s, s2, dict->line_number, tokens);
165
+ }
166
+ else
167
+ {
168
+ err_ctxt ec;
169
+ ec.sent = NULL;
170
+ err_msg(&ec, Error, "Error parsing dictionary %s.\n"
171
+ "%s\n\t line %d, tokens = %s\n",
172
+ dict->name,
173
+ s, dict->line_number, tokens);
174
+ }
175
+ dict->recursive_error = FALSE;
176
+ }
177
+
178
+ static void dict_error(Dictionary dict, const char * s)
179
+ {
180
+ dict_error2(dict, s, NULL);
181
+ }
182
+
183
+ static void warning(Dictionary dict, const char * s)
184
+ {
185
+ err_ctxt ec;
186
+ ec.sent = NULL;
187
+ err_msg(&ec, Warn, "Warning: %s\n"
188
+ "\tline %d, current token = \"%s\"\n",
189
+ s, dict->line_number, dict->token);
190
+ }
191
+
192
+ /**
193
+ * This gets the next character from the input, eliminating comments.
194
+ * If we're in quote mode, it does not consider the % character for
195
+ * comments.
196
+ */
197
+ static wint_t get_character(Dictionary dict, int quote_mode)
198
+ {
199
+ wint_t c;
200
+
201
+ c = fgetwc(dict->fp);
202
+ if ((c == '%') && (!quote_mode)) {
203
+ while((c != WEOF) && (c != '\n')) c = fgetwc(dict->fp);
204
+ }
205
+ if (c == '\n') dict->line_number++;
206
+ return c;
207
+ }
208
+
209
+
210
+ /*
211
+ * This set of 10 characters are the ones defining the syntax of the
212
+ * dictionary.
213
+ */
214
+ #define SPECIAL "(){};[]&|:"
215
+
216
+ /**
217
+ * Return true if the input wide-character is one of the special
218
+ * characters used to define the syntax of the dictionary.
219
+ */
220
+ static int is_special(wint_t wc, mbstate_t *ps)
221
+ {
222
+ char buff[MB_LEN_MAX];
223
+ int nr = wcrtomb(buff, wc, ps);
224
+ if (1 != nr) return FALSE;
225
+ return (NULL != strchr(SPECIAL, buff[0]));
226
+ }
227
+
228
+ /**
229
+ * This reads the next token from the input into token.
230
+ * Return 1 if a character was read, else return 0 (and print a warning).
231
+ */
232
+ static int link_advance(Dictionary dict)
233
+ {
234
+ wint_t c;
235
+ int nr, i;
236
+ int quote_mode;
237
+
238
+ dict->is_special = FALSE;
239
+
240
+ if (dict->already_got_it != '\0')
241
+ {
242
+ dict->is_special = is_special(dict->already_got_it, &dict->mbss);
243
+ if (dict->already_got_it == WEOF) {
244
+ dict->token[0] = '\0';
245
+ } else {
246
+ dict->token[0] = dict->already_got_it; /* specials are one byte */
247
+ dict->token[1] = '\0';
248
+ }
249
+ dict->already_got_it = '\0';
250
+ return 1;
251
+ }
252
+
253
+ do { c = get_character(dict, FALSE); } while (iswspace(c));
254
+
255
+ quote_mode = FALSE;
256
+
257
+ i = 0;
258
+ for (;;)
259
+ {
260
+ if (i > MAX_TOKEN_LENGTH-3) { /* 3 for multi-byte tokens */
261
+ dict_error(dict, "Token too long");
262
+ return 0;
263
+ }
264
+ if (quote_mode) {
265
+ if (c == '\"') {
266
+ quote_mode = FALSE;
267
+ dict->token[i] = '\0';
268
+ return 1;
269
+ }
270
+ if (iswspace(c)) {
271
+ dict_error(dict, "White space inside of token");
272
+ return 0;
273
+ }
274
+
275
+ /* Although we read wide chars, we store UTF8 internally, always. */
276
+ nr = wcrtomb(&dict->token[i], c, &dict->mbss);
277
+ if (nr < 0) {
278
+ #ifndef _WIN32
279
+ dict_error2(dict, "Unable to read UTF8 string in current locale",
280
+ nl_langinfo(CODESET));
281
+ fprintf (stderr, "\tTry setting the locale with \"export LANG=en_US.UTF-8\"\n");
282
+ #else
283
+ dict_error(dict, "Unable to read UTF8 string in current locale");
284
+ #endif
285
+ return 0;
286
+ }
287
+ i += nr;
288
+ } else {
289
+ if (is_special(c, &dict->mbss))
290
+ {
291
+ if (i == 0)
292
+ {
293
+ dict->token[0] = c; /* special toks are one char always */
294
+ dict->token[1] = '\0';
295
+ dict->is_special = TRUE;
296
+ return 1;
297
+ }
298
+ dict->token[i] = '\0';
299
+ dict->already_got_it = c;
300
+ return 1;
301
+ }
302
+ if (c == WEOF) {
303
+ if (i == 0) {
304
+ dict->token[0] = '\0';
305
+ return 1;
306
+ }
307
+ dict->token[i] = '\0';
308
+ dict->already_got_it = c;
309
+ return 1;
310
+ }
311
+ if (iswspace(c)) {
312
+ dict->token[i] = '\0';
313
+ return 1;
314
+ }
315
+ if (c == '\"') {
316
+ quote_mode = TRUE;
317
+ } else {
318
+ /* store UTF8 internally, always. */
319
+ nr = wctomb_check(&dict->token[i], c, &dict->mbss);
320
+ if (nr < 0) {
321
+ #ifndef _WIN32
322
+ dict_error2(dict, "Unable to read UTF8 string in current locale",
323
+ nl_langinfo(CODESET));
324
+ fprintf (stderr, "\tTry setting the locale with \"export LANG=en_US.UTF-8\"\n");
325
+ #else
326
+ dict_error(dict, "Unable to read UTF8 string in current locale");
327
+ #endif
328
+ return 0;
329
+ }
330
+ i += nr;
331
+ }
332
+ }
333
+ c = get_character(dict, quote_mode);
334
+ }
335
+ return 1;
336
+ }
337
+
338
+ /**
339
+ * Returns TRUE if this token is a special token and it is equal to c
340
+ */
341
+ static int is_equal(Dictionary dict, wint_t c)
342
+ {
343
+ return (dict->is_special &&
344
+ wctob(c) == dict->token[0] &&
345
+ dict->token[1] == '\0');
346
+ }
347
+
348
+ /**
349
+ * Make sure the string s is a valid connector.
350
+ * Return 1 if the connector is valid, else return 0,
351
+ * and print an appropriate warning message.
352
+ */
353
+ static int check_connector(Dictionary dict, const char * s)
354
+ {
355
+ int i;
356
+ i = strlen(s);
357
+ if (i < 1) {
358
+ dict_error(dict, "Expecting a connector.");
359
+ return 0;
360
+ }
361
+ i = s[i-1]; /* the last character of the token */
362
+ if ((i!='+') && (i!='-')) {
363
+ dict_error(dict, "A connector must end in a \"+\" or \"-\".");
364
+ return 0;
365
+ }
366
+ if (*s == '@') s++;
367
+ if (!isupper((int)*s)) {
368
+ dict_error(dict, "The first letter of a connector must be in [A--Z].");
369
+ return 0;
370
+ }
371
+ if ((*s == 'I') && (*(s+1) == 'D')) {
372
+ dict_error(dict, "Connectors beginning with \"ID\" are forbidden");
373
+ return 0;
374
+ }
375
+ while (*(s+1)) {
376
+ if ((!isalnum((int)*s)) && (*s != '*') && (*s != '^')) {
377
+ dict_error(dict, "All letters of a connector must be ASCII alpha-numeric.");
378
+ return 0;
379
+ }
380
+ s++;
381
+ }
382
+ return 1;
383
+ }
384
+
385
+ /* ======================================================================== */
386
+ /**
387
+ * Dictionary entry comparison and ordering functions.
388
+ *
389
+ * The data structure storing the dictionary is simply a binary tree.
390
+ * The entries in the binary tree are sorted by alphabetical order.
391
+ * There is one catch, however: words may have suffixes (a dot, followed
392
+ * by the suffix), and these suffixes are to be handled appripriately
393
+ * during sorting and comparison.
394
+ *
395
+ * The use of suffixes means that the ordering of the words is not
396
+ * exactly the order given by strcmp. The order must be such that, for
397
+ * example, "make" < "make.n" < "make-up" -- suffixed words come after
398
+ * the bare words, but before any other other words with non-ascii-alpha
399
+ * characters (such as the hyphen in "make-up", or possibly UTF8
400
+ * characters). Thus, stright "strcmp" can't be used to determine
401
+ * dictionary order.
402
+ *
403
+ * Thus, a set of specialized string comparison and ordering functions
404
+ * are provided. These "do the right thing" when matching string with
405
+ * and without suffixes.
406
+ */
407
+ /**
408
+ * dict_order - order two dictionary words in proper sort order.
409
+ * Return zero if the strings match, else return standard
410
+ * (locale-dependent) UTF8 sort order.
411
+ */
412
+ /* verbose version */
413
+ /*
414
+ int dict_order(char *s, char *t)
415
+ {
416
+ int ss, tt;
417
+ while (*s != '\0' && *s == *t) {
418
+ s++;
419
+ t++;
420
+ }
421
+ if (*s == '.') {
422
+ ss = 1;
423
+ } else {
424
+ ss = (*s)<<1;
425
+ }
426
+ if (*t == '.') {
427
+ tt = 1;
428
+ } else {
429
+ tt = (*t)<<1;
430
+ }
431
+ return (ss - tt);
432
+ }
433
+ */
434
+
435
+ /* terse version */
436
+ static inline int dict_order(const char *s, const char *t)
437
+ {
438
+ while (*s != '\0' && *s == *t) {s++; t++;}
439
+ return (((*s == '.')?(1):((*s)<<1)) - ((*t == '.')?(1):((*t)<<1)));
440
+ }
441
+
442
+ /**
443
+ * dict_order_wild() -- order dictionary strings, with wildcard.
444
+ * Assuming that s is a pointer to a dictionary string, and that
445
+ * t is a pointer to a search string, this returns 0 if they
446
+ * match, >0 if s>t, and <0 if s<t.
447
+ *
448
+ * The matching is done as follows. Walk down the strings until
449
+ * you come to the end of one of them, or until you find unequal
450
+ * characters. A "*" matches anything. Otherwise, replace "."
451
+ * by "\0", and take the difference. This behavior matches that
452
+ * of the function dict_order().
453
+ */
454
+ static inline int dict_order_wild(const char * s, const char * t)
455
+ {
456
+ while((*s != '\0') && (*s == *t)) {s++; t++;}
457
+ if ((*s == '*') || (*t == '*')) return 0;
458
+ return (((*s == '.')?('\0'):(*s)) - ((*t == '.')?('\0'):(*t)));
459
+ }
460
+
461
+ /**
462
+ * dict_match -- return true if strings match, else false.
463
+ * A "bare" string (one without a suffix) will match any corresponding
464
+ * string with a suffix; so, for example, "make" and "make.n" are
465
+ * a match. If both strings have suffixes, then the suffixes must match.
466
+ *
467
+ * A subscript is the part that followes the last "." in the word, and
468
+ * that does not begin with a digit.
469
+ */
470
+ static int dict_match(const char * s, const char * t)
471
+ {
472
+ char *ds, *dt;
473
+ ds = strrchr(s, '.');
474
+ dt = strrchr(t, '.');
475
+
476
+ /* a dot at the end or a dot followed by a number is NOT
477
+ * considered a subscript */
478
+ if ((dt != NULL) && ((*(dt+1) == '\0') ||
479
+ (isdigit((int)*(dt+1))))) dt = NULL;
480
+ if ((ds != NULL) && ((*(ds+1) == '\0') ||
481
+ (isdigit((int)*(ds+1))))) ds = NULL;
482
+
483
+ /* dt is NULL when there's no prefix ... */
484
+ if (dt == NULL && ds != NULL) {
485
+ if (((int)strlen(t)) > (ds-s)) return FALSE; /* we need to do this to ensure that */
486
+ return (strncmp(s, t, ds-s) == 0); /* "i.e." does not match "i.e" */
487
+ } else if (dt != NULL && ds == NULL) {
488
+ if (((int)strlen(s)) > (dt-t)) return FALSE;
489
+ return (strncmp(s, t, dt-t) == 0);
490
+ } else {
491
+ return (strcmp(s, t) == 0);
492
+ }
493
+ }
494
+
495
+ /* ======================================================================== */
496
+
497
+ static inline Dict_node * dict_node_new(void)
498
+ {
499
+ return (Dict_node*) xalloc(sizeof(Dict_node));
500
+ }
501
+
502
+ static inline void free_dict_node(Dict_node *dn)
503
+ {
504
+ xfree((char *)dn, sizeof(Dict_node));
505
+ }
506
+
507
+ /**
508
+ * prune_lookup_list -- discard all list entries that don't match string
509
+ * Walk the lookup list (of right links), discarding all nodes that do
510
+ * not match the dictionary string s. The matching is dictionary matching:
511
+ * suffixed entries will match "bare" entries.
512
+ */
513
+ static Dict_node * prune_lookup_list(Dict_node *llist, const char * s)
514
+ {
515
+ Dict_node *dn, *dnx, *list_new;
516
+
517
+ list_new = NULL;
518
+ for (dn = llist; dn != NULL; dn = dnx)
519
+ {
520
+ dnx = dn->right;
521
+ /* now put dn onto the answer list, or free it */
522
+ if (dict_match(dn->string, s))
523
+ {
524
+ dn->right = list_new;
525
+ list_new = dn;
526
+ }
527
+ else
528
+ {
529
+ free_dict_node(dn);
530
+ }
531
+ }
532
+
533
+ /* now reverse the list back */
534
+ llist = NULL;
535
+ for (dn = list_new; dn != NULL; dn = dnx)
536
+ {
537
+ dnx = dn->right;
538
+ dn->right = llist;
539
+ llist = dn;
540
+ }
541
+ return llist;
542
+ }
543
+
544
+ void free_lookup_list(Dict_node *llist)
545
+ {
546
+ Dict_node * n;
547
+ while(llist != NULL)
548
+ {
549
+ n = llist->right;
550
+ free_dict_node(llist);
551
+ llist = n;
552
+ }
553
+ }
554
+
555
+ static void free_dict_node_recursive(Dict_node * dn)
556
+ {
557
+ if (dn == NULL) return;
558
+ free_dict_node_recursive(dn->left);
559
+ free_dict_node_recursive(dn->right);
560
+ free_dict_node(dn);
561
+ }
562
+
563
+ /* ======================================================================== */
564
+ /**
565
+ * rdictionary_lookup() -- recursive dictionary lookup
566
+ * Walk binary tree, given by 'dn', looking for the string 's'.
567
+ * For every node in the tree where 's' matches (including wildcards)
568
+ * make a copy of that node, and append it to llist.
569
+ */
570
+ static Dict_node * rdictionary_lookup(Dict_node *llist,
571
+ Dict_node * dn, const char * s, int match_idiom)
572
+ {
573
+ /* see comment in dictionary_lookup below */
574
+ int m;
575
+ Dict_node * dn_new;
576
+ if (dn == NULL) return llist;
577
+ m = dict_order_wild(s, dn->string);
578
+ if (m >= 0)
579
+ {
580
+ llist = rdictionary_lookup(llist, dn->right, s, match_idiom);
581
+ }
582
+ if ((m == 0) && (match_idiom || !is_idiom_word(dn->string)))
583
+ {
584
+ dn_new = dict_node_new();
585
+ *dn_new = *dn;
586
+ dn_new->right = llist;
587
+ llist = dn_new;
588
+ }
589
+ if (m <= 0)
590
+ {
591
+ llist = rdictionary_lookup(llist, dn->left, s, match_idiom);
592
+ }
593
+ return llist;
594
+ }
595
+
596
+ /**
597
+ * dictionary_lookup_list() - return lookup list of words in the dictionary
598
+ *
599
+ * Returns a pointer to a lookup list of the words in the dictionary.
600
+ * Matches include word that appear in idioms. Use
601
+ * abridged_lookup_list() to obtain matches, excluding idioms.
602
+ *
603
+ * This list is made up of Dict_nodes, linked by their right pointers.
604
+ * The node, file and string fields are copied from the dictionary.
605
+ *
606
+ * The returned list must be freed with free_lookup_list().
607
+ */
608
+ Dict_node * dictionary_lookup_list(Dictionary dict, const char *s)
609
+ {
610
+ Dict_node * llist = rdictionary_lookup(NULL, dict->root, s, TRUE);
611
+ llist = prune_lookup_list(llist, s);
612
+ return llist;
613
+ }
614
+
615
+ /**
616
+ * abridged_lookup_list() - return lookup list of words in the dictionary
617
+ *
618
+ * Returns a pointer to a lookup list of the words in the dictionary.
619
+ * Excludes any idioms that contain the word; use
620
+ * dictionary_lookup_list() to obtain the complete list.
621
+ *
622
+ * This list is made up of Dict_nodes, linked by their right pointers.
623
+ * The node, file and string fields are copied from the dictionary.
624
+ *
625
+ * The returned list must be freed with free_lookup_list().
626
+ */
627
+ Dict_node * abridged_lookup_list(Dictionary dict, const char *s)
628
+ {
629
+ Dict_node *llist;
630
+ llist = rdictionary_lookup(NULL, dict->root, s, FALSE);
631
+ llist = prune_lookup_list(llist, s);
632
+ return llist;
633
+ }
634
+
635
+ int boolean_dictionary_lookup(Dictionary dict, const char *s)
636
+ {
637
+ Dict_node *llist = dictionary_lookup_list(dict, s);
638
+ int boool = (llist != NULL);
639
+ free_lookup_list(llist);
640
+ return boool;
641
+ }
642
+
643
+ /* ======================================================================== */
644
+ /**
645
+ * Allocate a new Exp node and link it into the exp_list for freeing later.
646
+ */
647
+ Exp * Exp_create(Dictionary dict)
648
+ {
649
+ Exp * e;
650
+ e = (Exp *) xalloc(sizeof(Exp));
651
+ e->next = dict->exp_list;
652
+ dict->exp_list = e;
653
+ return e;
654
+ }
655
+
656
+ static inline void exp_free(Exp * e)
657
+ {
658
+ xfree((char *)e, sizeof(Exp));
659
+ }
660
+
661
+ /* ======================================================================== */
662
+ /**
663
+ * This creates a node with one child (namely e). Initializes
664
+ * the cost to zero.
665
+ */
666
+ static Exp * make_unary_node(Dictionary dict, Exp * e)
667
+ {
668
+ Exp * n;
669
+ n = Exp_create(dict);
670
+ n->type = AND_type; /* these must be AND types */
671
+ n->cost = 0.0f;
672
+ n->u.l = (E_list *) xalloc(sizeof(E_list));
673
+ n->u.l->next = NULL;
674
+ n->u.l->e = e;
675
+ return n;
676
+ }
677
+
678
+ /**
679
+ * connector() -- make a node for a connector or dictionary word.
680
+ *
681
+ * Assumes the current token is a connector or dictionary word.
682
+ */
683
+ static Exp * connector(Dictionary dict)
684
+ {
685
+ Exp * n;
686
+ Dict_node *dn, *dn_head;
687
+ int i;
688
+
689
+ i = strlen(dict->token) - 1; /* this must be + or - if a connector */
690
+ if ((dict->token[i] != '+') && (dict->token[i] != '-'))
691
+ {
692
+ /* If we are here, token is a word */
693
+ dn_head = abridged_lookup_list(dict, dict->token);
694
+ dn = dn_head;
695
+ while ((dn != NULL) && (strcmp(dn->string, dict->token) != 0))
696
+ {
697
+ dn = dn->right;
698
+ }
699
+ if (dn == NULL)
700
+ {
701
+ free_lookup_list(dn_head);
702
+ dict_error(dict, "\nPerhaps missing + or - in a connector.\n"
703
+ "Or perhaps you forgot the suffix on a word.\n"
704
+ "Or perhaps a word is used before it is defined.\n");
705
+ return NULL;
706
+ }
707
+ n = make_unary_node(dict, dn->exp);
708
+ free_lookup_list(dn_head);
709
+ }
710
+ else
711
+ {
712
+ /* If we are here, token is a connector */
713
+ if (!check_connector(dict, dict->token))
714
+ {
715
+ return NULL;
716
+ }
717
+ n = Exp_create(dict);
718
+ n->dir = dict->token[i];
719
+ dict->token[i] = '\0'; /* get rid of the + or - */
720
+ if (dict->token[0] == '@')
721
+ {
722
+ n->u.string = string_set_add(dict->token+1, dict->string_set);
723
+ n->multi = TRUE;
724
+ }
725
+ else
726
+ {
727
+ n->u.string = string_set_add(dict->token, dict->string_set);
728
+ n->multi = FALSE;
729
+ }
730
+ n->type = CONNECTOR_type;
731
+ n->cost = 0.0f;
732
+ }
733
+
734
+ if (!link_advance(dict))
735
+ {
736
+ exp_free(n);
737
+ return NULL;
738
+ }
739
+ return n;
740
+ }
741
+
742
+ /**
743
+ * This creates a node with zero children. Initializes
744
+ * the cost to zero.
745
+ */
746
+ static Exp * make_zeroary_node(Dictionary dict)
747
+ {
748
+ Exp * n;
749
+ n = Exp_create(dict);
750
+ n->type = AND_type; /* these must be AND types */
751
+ n->cost = 0.0f;
752
+ n->u.l = NULL;
753
+ return n;
754
+ }
755
+
756
+ /**
757
+ * This creates an OR node with two children, one the given node,
758
+ * and the other as zeroary node. This has the effect of creating
759
+ * what used to be called an optional node.
760
+ */
761
+ static Exp * make_optional_node(Dictionary dict, Exp * e)
762
+ {
763
+ Exp * n;
764
+ E_list *el, *elx;
765
+ n = Exp_create(dict);
766
+ n->type = OR_type;
767
+ n->cost = 0.0f;
768
+ n->u.l = el = (E_list *) xalloc(sizeof(E_list));
769
+ el->e = make_zeroary_node(dict);
770
+ el->next = elx = (E_list *) xalloc(sizeof(E_list));
771
+ elx->next = NULL;
772
+ elx->e = e;
773
+ return n;
774
+ }
775
+
776
+ /* ======================================================================== */
777
+
778
+ #if ! defined INFIX_NOTATION
779
+
780
+ Exp * expression(Dictionary dict);
781
+ /**
782
+ * We're looking at the first of the stuff after an "and" or "or".
783
+ * Build a Exp node for this expression. Set the cost and optional
784
+ * fields to the default values. Set the type field according to type
785
+ */
786
+ Exp * operator_exp(Dictionary dict, int type)
787
+ {
788
+ Exp * n;
789
+ E_list first;
790
+ E_list * elist;
791
+ n = Exp_create(dict);
792
+ n->type = type;
793
+ n->cost = 0.0f;
794
+ elist = &first;
795
+ while((!is_equal(dict, ')')) && (!is_equal(dict, ']')) && (!is_equal(dict, '}'))) {
796
+ elist->next = (E_list *) xalloc(sizeof(E_list));
797
+ elist = elist->next;
798
+ elist->next = NULL;
799
+ elist->e = expression(dict);
800
+ if (elist->e == NULL) {
801
+ return NULL;
802
+ }
803
+ }
804
+ if (elist == &first) {
805
+ dict_error(dict, "An \"or\" or \"and\" of nothing");
806
+ return NULL;
807
+ }
808
+ n->u.l = first.next;
809
+ return n;
810
+ }
811
+
812
+ /**
813
+ * Looks for the stuff that is allowed to be inside of parentheses
814
+ * either & or | followed by a list, or a terminal symbol.
815
+ */
816
+ Exp * in_parens(Dictionary dict)
817
+ {
818
+ Exp * e;
819
+
820
+ if (is_equal(dict, '&') || (strcmp(token, "and")==0)) {
821
+ if (!link_advance(dict)) {
822
+ return NULL;
823
+ }
824
+ return operator_exp(dict, AND_type);
825
+ } else if (is_equal(dict, '|') || (strcmp(dict->token, "or")==0)) {
826
+ if (!link_advance(dict)) {
827
+ return NULL;
828
+ }
829
+ return operator_exp(dict, OR_type);
830
+ } else {
831
+ return expression(dict);
832
+ }
833
+ }
834
+
835
+ /**
836
+ * Build (and return the root of) the tree for the expression beginning
837
+ * with the current token. At the end, the token is the first one not
838
+ * part of this expression.
839
+ */
840
+ Exp * expression(Dictionary dict)
841
+ {
842
+ Exp * n;
843
+ if (is_equal(dict, '(')) {
844
+ if (!link_advance(dict)) {
845
+ return NULL;
846
+ }
847
+ n = in_parens(dict);
848
+ if (!is_equal(dict, ')')) {
849
+ dict_error(dict, "Expecting a \")\".");
850
+ return NULL;
851
+ }
852
+ if (!link_advance(dict)) {
853
+ return NULL;
854
+ }
855
+ } else if (is_equal(dict, '{')) {
856
+ if (!link_advance(dict)) {
857
+ return NULL;
858
+ }
859
+ n = in_parens(dict);
860
+ if (!is_equal(dict, '}')) {
861
+ dict_error(dict, "Expecting a \"}\".");
862
+ return NULL;
863
+ }
864
+ if (!link_advance(dict)) {
865
+ return NULL;
866
+ }
867
+ n = make_optional_node(dict, n);
868
+ } else if (is_equal(dict, '[')) {
869
+ if (!link_advance(dict)) {
870
+ return NULL;
871
+ }
872
+ n = in_parens(dict);
873
+ if (!is_equal(dict, ']')) {
874
+ dict_error(dict, "Expecting a \"]\".");
875
+ return NULL;
876
+ }
877
+ if (!link_advance(dict)) {
878
+ return NULL;
879
+ }
880
+ n->cost += 1.0f;
881
+ } else if (!dict->is_special) {
882
+ n = connector(dict);
883
+ if (n == NULL) {
884
+ return NULL;
885
+ }
886
+ } else if (is_equal(dict, ')') || is_equal(dict, ']')) {
887
+ /* allows "()" or "[]" */
888
+ n = make_zeroary_node(dict);
889
+ } else {
890
+ dict_error(dict, "Connector, \"(\", \"[\", or \"{\" expected.");
891
+ return NULL;
892
+ }
893
+ return n;
894
+ }
895
+
896
+ /* ======================================================================== */
897
+ #else /* This is for infix notation */
898
+
899
+ static Exp * restricted_expression(Dictionary dict, int and_ok, int or_ok);
900
+
901
+ /**
902
+ * Build (and return the root of) the tree for the expression beginning
903
+ * with the current token. At the end, the token is the first one not
904
+ * part of this expression.
905
+ */
906
+ static Exp * expression(Dictionary dict)
907
+ {
908
+ return restricted_expression(dict, TRUE, TRUE);
909
+ }
910
+
911
+ static Exp * restricted_expression(Dictionary dict, int and_ok, int or_ok)
912
+ {
913
+ Exp *nl = NULL, *nr;
914
+ E_list *ell, *elr;
915
+
916
+ if (is_equal(dict, '('))
917
+ {
918
+ if (!link_advance(dict)) {
919
+ return NULL;
920
+ }
921
+ nl = expression(dict);
922
+ if (nl == NULL) {
923
+ return NULL;
924
+ }
925
+ if (!is_equal(dict, ')')) {
926
+ dict_error(dict, "Expecting a \")\".");
927
+ return NULL;
928
+ }
929
+ if (!link_advance(dict)) {
930
+ return NULL;
931
+ }
932
+ }
933
+ else if (is_equal(dict, '{'))
934
+ {
935
+ if (!link_advance(dict)) {
936
+ return NULL;
937
+ }
938
+ nl = expression(dict);
939
+ if (nl == NULL) {
940
+ return NULL;
941
+ }
942
+ if (!is_equal(dict, '}')) {
943
+ dict_error(dict, "Expecting a \"}\".");
944
+ return NULL;
945
+ }
946
+ if (!link_advance(dict)) {
947
+ return NULL;
948
+ }
949
+ nl = make_optional_node(dict, nl);
950
+ }
951
+ else if (is_equal(dict, '['))
952
+ {
953
+ if (!link_advance(dict)) {
954
+ return NULL;
955
+ }
956
+ nl = expression(dict);
957
+ if (nl == NULL) {
958
+ return NULL;
959
+ }
960
+ if (!is_equal(dict, ']')) {
961
+ dict_error(dict, "Expecting a \"]\".");
962
+ return NULL;
963
+ }
964
+ if (!link_advance(dict)) {
965
+ return NULL;
966
+ }
967
+ nl->cost += 1.0f;
968
+ }
969
+ else if (!dict->is_special)
970
+ {
971
+ nl = connector(dict);
972
+ if (nl == NULL) {
973
+ return NULL;
974
+ }
975
+ }
976
+ else if (is_equal(dict, ')') || is_equal(dict, ']'))
977
+ {
978
+ /* allows "()" or "[]" */
979
+ nl = make_zeroary_node(dict);
980
+ }
981
+ else
982
+ {
983
+ dict_error(dict, "Connector, \"(\", \"[\", or \"{\" expected.");
984
+ return NULL;
985
+ }
986
+
987
+ if (is_equal(dict, '&') || (strcmp(dict->token, "and") == 0))
988
+ {
989
+ Exp *n;
990
+
991
+ if (!and_ok) {
992
+ warning(dict, "\"and\" and \"or\" at the same level in an expression");
993
+ }
994
+ if (!link_advance(dict)) {
995
+ return NULL;
996
+ }
997
+ nr = restricted_expression(dict, TRUE, FALSE);
998
+ if (nr == NULL) {
999
+ return NULL;
1000
+ }
1001
+ n = Exp_create(dict);
1002
+ n->u.l = ell = (E_list *) xalloc(sizeof(E_list));
1003
+ ell->next = elr = (E_list *) xalloc(sizeof(E_list));
1004
+ elr->next = NULL;
1005
+
1006
+ ell->e = nl;
1007
+ elr->e = nr;
1008
+ n->type = AND_type;
1009
+ n->cost = 0.0f;
1010
+ return n;
1011
+ }
1012
+ else if (is_equal(dict, '|') || (strcmp(dict->token, "or") == 0))
1013
+ {
1014
+ Exp *n;
1015
+
1016
+ if (!or_ok) {
1017
+ warning(dict, "\"and\" and \"or\" at the same level in an expression");
1018
+ }
1019
+ if (!link_advance(dict)) {
1020
+ return NULL;
1021
+ }
1022
+ nr = restricted_expression(dict, FALSE,TRUE);
1023
+ if (nr == NULL) {
1024
+ return NULL;
1025
+ }
1026
+ n = Exp_create(dict);
1027
+ n->u.l = ell = (E_list *) xalloc(sizeof(E_list));
1028
+ ell->next = elr = (E_list *) xalloc(sizeof(E_list));
1029
+ elr->next = NULL;
1030
+
1031
+ ell->e = nl;
1032
+ elr->e = nr;
1033
+ n->type = OR_type;
1034
+ n->cost = 0.0f;
1035
+ return n;
1036
+ }
1037
+
1038
+ return nl;
1039
+ }
1040
+
1041
+ #endif
1042
+
1043
+ /* ======================================================================== */
1044
+ /* Tree balancing utilities, used to implement an AVL tree.
1045
+ * Unfortunately, AVL tree insertion is very slowww, unusably
1046
+ * slow for creating the dictionary. The code is thus ifdef'ed out
1047
+ * but is left here for debugging and other sundry purposes.
1048
+ * A better way to rebalance the tree is the DSW algo, implemented
1049
+ * further below.
1050
+ */
1051
+
1052
+ static Dict_node *rotate_right(Dict_node *root)
1053
+ {
1054
+ Dict_node *pivot = root->left;
1055
+ root->left = pivot->right;
1056
+ pivot->right = root;
1057
+ return pivot;
1058
+ }
1059
+
1060
+ #ifdef USE_AVL_TREE_FOR_INSERTION
1061
+
1062
+ static Dict_node *rotate_left(Dict_node *root)
1063
+ {
1064
+ Dict_node *pivot = root->right;
1065
+ root->right = pivot->left;
1066
+ pivot->left = root;
1067
+ return pivot;
1068
+ }
1069
+
1070
+ /* Return tree height. XXX this is not tail-recursive! */
1071
+ static int tree_depth (Dict_node *n)
1072
+ {
1073
+ int l, r;
1074
+ if (NULL == n) return 0;
1075
+ if (NULL == n->left) return 1+tree_depth(n->right);
1076
+ if (NULL == n->right) return 1+tree_depth(n->left);
1077
+ l = tree_depth(n->left);
1078
+ r = tree_depth(n->right);
1079
+ if (l < r) return r+1;
1080
+ return l+1;
1081
+ }
1082
+
1083
+ static int tree_balance(Dict_node *n)
1084
+ {
1085
+ int l = tree_depth(n->left);
1086
+ int r = tree_depth(n->right);
1087
+ return r-l;
1088
+ }
1089
+
1090
+ /**
1091
+ * Rebalance the dictionary tree.
1092
+ * This recomputes the tree depth wayy too often, but so what.. this
1093
+ * only wastes cpu time during the initial dictinary read.
1094
+ */
1095
+ static Dict_node *rebalance(Dict_node *root)
1096
+ {
1097
+ int bal = tree_balance(root);
1098
+ if (2 == bal)
1099
+ {
1100
+ bal = tree_balance(root->right);
1101
+ if (-1 == bal)
1102
+ {
1103
+ root->right = rotate_right (root->right);
1104
+ }
1105
+ return rotate_left(root);
1106
+ }
1107
+ else if (-2 == bal)
1108
+ {
1109
+ bal = tree_balance(root->left);
1110
+ if (1 == bal)
1111
+ {
1112
+ root->left = rotate_left (root->left);
1113
+ }
1114
+ return rotate_right(root);
1115
+ }
1116
+ return root;
1117
+ }
1118
+
1119
+ #endif /* USE_AVL_TREE_FOR_INSERTION */
1120
+
1121
+ /* ======================================================================== */
1122
+ /* Implementation of the DSW algo for rebalancing a binary tree.
1123
+ * The point is -- after building the dictionary tree, we rebalance it
1124
+ * once at the end. This is a **LOT LOT** quicker than maintaing an
1125
+ * AVL tree along the way (less than quarter-of-a-second vs. about
1126
+ * a minute or more!) FWIW, the DSW tree is even more balanced than
1127
+ * the AVL tree is (its less deep, more full).
1128
+ *
1129
+ * The DSW algo, with C++ code, is described in
1130
+ *
1131
+ * Timothy J. Rolfe, "One-Time Binary Search Tree Balancing:
1132
+ * The Day/Stout/Warren (DSW) Algorithm", inroads, Vol. 34, No. 4
1133
+ * (December 2002), pp. 85-88
1134
+ * http://penguin.ewu.edu/~trolfe/DSWpaper/
1135
+ */
1136
+
1137
+ static Dict_node * dsw_tree_to_vine (Dict_node *root)
1138
+ {
1139
+ Dict_node *vine_tail, *vine_head, *rest;
1140
+ Dict_node vh;
1141
+
1142
+ vine_head = &vh;
1143
+ vine_head->left = NULL;
1144
+ vine_head->right = root;
1145
+ vine_tail = vine_head;
1146
+ rest = root;
1147
+
1148
+ while (NULL != rest)
1149
+ {
1150
+ /* If no left, we are done, do the right */
1151
+ if (NULL == rest->left)
1152
+ {
1153
+ vine_tail = rest;
1154
+ rest = rest->right;
1155
+ }
1156
+ /* eliminate the left subtree */
1157
+ else
1158
+ {
1159
+ rest = rotate_right(rest);
1160
+ vine_tail->right = rest;
1161
+ }
1162
+ }
1163
+
1164
+ return vh.right;
1165
+ }
1166
+
1167
+ static void dsw_compression (Dict_node *root, unsigned int count)
1168
+ {
1169
+ unsigned int j;
1170
+ for (j = 0; j < count; j++)
1171
+ {
1172
+ /* Compound left rotation */
1173
+ Dict_node * pivot = root->right;
1174
+ root->right = pivot->right;
1175
+ root = pivot->right;
1176
+ pivot->right = root->left;
1177
+ root->left = pivot;
1178
+ }
1179
+ }
1180
+
1181
+ /* Return size of the full portion of the tree
1182
+ * Gets the next pow(2,k)-1
1183
+ */
1184
+ static inline unsigned int full_tree_size (unsigned int size)
1185
+ {
1186
+ unsigned int pk = 1;
1187
+ while (pk < size) pk = 2*pk + 1;
1188
+ return pk/2;
1189
+ }
1190
+
1191
+ static Dict_node * dsw_vine_to_tree (Dict_node *root, int size)
1192
+ {
1193
+ Dict_node vine_head;
1194
+ unsigned int full_count = full_tree_size(size +1);
1195
+
1196
+ vine_head.left = NULL;
1197
+ vine_head.right = root;
1198
+
1199
+ dsw_compression(&vine_head, size - full_count);
1200
+ for (size = full_count ; size > 1 ; size /= 2)
1201
+ {
1202
+ dsw_compression(&vine_head, size / 2);
1203
+ }
1204
+ return vine_head.right;
1205
+ }
1206
+
1207
+ /* ======================================================================== */
1208
+ /**
1209
+ * Insert the new node into the dictionary below node n.
1210
+ * Give error message if the new element's string is already there.
1211
+ * Assumes that the "n" field of new is already set, and the left
1212
+ * and right fields of it are NULL.
1213
+ *
1214
+ * The resulting tree is highly unbalanced. It needs to be rebalanced
1215
+ * before used.
1216
+ */
1217
+ Dict_node * insert_dict(Dictionary dict, Dict_node * n, Dict_node * newnode)
1218
+ {
1219
+ int comp;
1220
+
1221
+ if (NULL == n) return newnode;
1222
+
1223
+ comp = dict_order(newnode->string, n->string);
1224
+ if (comp < 0)
1225
+ {
1226
+ if (NULL == n->left)
1227
+ {
1228
+ n->left = newnode;
1229
+ return n;
1230
+ }
1231
+ n->left = insert_dict(dict, n->left, newnode);
1232
+ return n;
1233
+ /* return rebalance(n); Uncomment to get an AVL tree */
1234
+ }
1235
+ else if (comp > 0)
1236
+ {
1237
+ if (NULL == n->right)
1238
+ {
1239
+ n->right = newnode;
1240
+ return n;
1241
+ }
1242
+ n->right = insert_dict(dict, n->right, newnode);
1243
+ return n;
1244
+ /* return rebalance(n); Uncomment to get an AVL tree */
1245
+ }
1246
+ else
1247
+ {
1248
+ char t[256];
1249
+ snprintf(t, 256, "The word \"%s\" has been multiply defined\n", newnode->string);
1250
+ dict_error(dict, t);
1251
+ return NULL;
1252
+ }
1253
+ }
1254
+
1255
+ /**
1256
+ * insert_list() -
1257
+ * p points to a list of dict_nodes connected by their left pointers.
1258
+ * l is the length of this list (the last ptr may not be NULL).
1259
+ * It inserts the list into the dictionary.
1260
+ * It does the middle one first, then the left half, then the right.
1261
+ *
1262
+ * Note: I think this insert middle, then left, then right, has
1263
+ * its origins as a lame attempt to hack around the fact that the
1264
+ * resulting binary tree is rather badly unbalanced. This has been
1265
+ * fixed by using the DSW rebalancing algo. Now, that would seem
1266
+ * to render this crazy bisected-insertion algo obsoloete, but ..
1267
+ * oddly enough, it seems to make the DSW balancing go really fast!
1268
+ * Faster than a simple insertion. Go figure. I think this has
1269
+ * something to do with the fact that the dictionaries are in
1270
+ * alphabetical order! This subdivision helps randomize a bit.
1271
+ */
1272
+ static void insert_list(Dictionary dict, Dict_node * p, int l)
1273
+ {
1274
+ Dict_node * dn, *dn_head, *dn_second_half;
1275
+ int k, i; /* length of first half */
1276
+
1277
+ if (l == 0) return;
1278
+
1279
+ k = (l-1)/2;
1280
+ dn = p;
1281
+ for (i = 0; i < k; i++)
1282
+ {
1283
+ dn = dn->left;
1284
+ }
1285
+
1286
+ /* dn now points to the middle element */
1287
+ dn_second_half = dn->left;
1288
+ dn->left = dn->right = NULL;
1289
+
1290
+ if (contains_underbar(dn->string))
1291
+ {
1292
+ insert_idiom(dict, dn);
1293
+ }
1294
+ else if (is_idiom_word(dn->string))
1295
+ {
1296
+ err_ctxt ec;
1297
+ ec.sent = NULL;
1298
+ err_msg(&ec, Warn, "Warning: Word \"%s\" found near line %d.\n"
1299
+ "\tWords ending \".Ix\" (x a number) are reserved for idioms.\n"
1300
+ "\tThis word will be ignored.\n",
1301
+ dn->string, dict->line_number);
1302
+ free_dict_node(dn);
1303
+ }
1304
+ else if ((dn_head = abridged_lookup_list(dict, dn->string)) != NULL)
1305
+ {
1306
+ Dict_node *dnx;
1307
+ err_ctxt ec;
1308
+ ec.sent = NULL;
1309
+ err_msg(&ec, Warn, "Warning: The word \"%s\" "
1310
+ "found near line %d of %s matches the following words:\n",
1311
+ dn->string, dict->line_number, dict->name);
1312
+ for (dnx = dn_head; dnx != NULL; dnx = dnx->right) {
1313
+ fprintf(stderr, "\t%s", dnx->string);
1314
+ }
1315
+ fprintf(stderr, "\n\tThis word will be ignored.\n");
1316
+ free_lookup_list(dn_head);
1317
+ free_dict_node(dn);
1318
+ }
1319
+ else
1320
+ {
1321
+ dict->root = insert_dict(dict, dict->root, dn);
1322
+ dict->num_entries++;
1323
+ }
1324
+
1325
+ insert_list(dict, p, k);
1326
+ insert_list(dict, dn_second_half, l-k-1);
1327
+ }
1328
+
1329
+ /**
1330
+ * read_entry() -- read one dictionary entry
1331
+ * Starting with the current token parse one dictionary entry.
1332
+ * Add these words to the dictionary.
1333
+ */
1334
+ static int read_entry(Dictionary dict)
1335
+ {
1336
+ Exp *n;
1337
+ int i;
1338
+
1339
+ Dict_node *dn_new, *dnx, *dn = NULL;
1340
+
1341
+ /* Reset multi-byte shift state every line. */
1342
+ memset(&dict->mbss, 0, sizeof(dict->mbss));
1343
+
1344
+ while (!is_equal(dict, ':'))
1345
+ {
1346
+ if (dict->is_special)
1347
+ {
1348
+ dict_error(dict, "I expected a word but didn\'t get it.");
1349
+ return 0;
1350
+ }
1351
+
1352
+ /* if it's a word-file name */
1353
+ /* However, be careful to reject "/.v" which is the division symbol
1354
+ * used in equations (.v means verb-like) */
1355
+ if ((dict->token[0] == '/') && (dict->token[1] != '.'))
1356
+ {
1357
+ dn = read_word_file(dict, dn, dict->token);
1358
+ if (dn == NULL)
1359
+ {
1360
+ err_ctxt ec;
1361
+ ec.sent = NULL;
1362
+ err_msg(&ec, Error, "Error opening word file %s\n", dict->token);
1363
+ return 0;
1364
+ }
1365
+ }
1366
+ else
1367
+ {
1368
+ dn_new = dict_node_new();
1369
+ dn_new->left = dn;
1370
+ dn = dn_new;
1371
+ dn->file = NULL;
1372
+ dn->string = string_set_add(dict->token, dict->string_set);
1373
+ }
1374
+
1375
+ /* Advance to next entry, unless error */
1376
+ if (0 == link_advance(dict)) goto syntax_error;
1377
+ }
1378
+
1379
+ /* pass the : */
1380
+ if (!link_advance(dict))
1381
+ {
1382
+ goto syntax_error;
1383
+ }
1384
+
1385
+ n = expression(dict);
1386
+ if (n == NULL)
1387
+ {
1388
+ goto syntax_error;
1389
+ }
1390
+
1391
+ if (!is_equal(dict, ';'))
1392
+ {
1393
+ dict_error(dict, "Expecting \";\" at the end of an entry.");
1394
+ goto syntax_error;
1395
+ }
1396
+
1397
+ /* pass the ; */
1398
+ if (!link_advance(dict))
1399
+ {
1400
+ goto syntax_error;
1401
+ }
1402
+
1403
+ /* At this point, dn points to a list of Dict_nodes connected by
1404
+ * their left pointers. These are to be inserted into the dictionary */
1405
+ i = 0;
1406
+ for (dnx = dn; dnx != NULL; dnx = dnx->left)
1407
+ {
1408
+ dnx->exp = n;
1409
+ i++;
1410
+ }
1411
+ insert_list(dict, dn, i);
1412
+ return 1;
1413
+
1414
+ syntax_error:
1415
+ free_lookup_list(dn);
1416
+ return 0;
1417
+ }
1418
+
1419
+ #if ! defined INFIX_NOTATION
1420
+ /**
1421
+ * print the expression, in prefix-style
1422
+ */
1423
+ void print_expression(Exp * n)
1424
+ {
1425
+ E_list * el;
1426
+ int i, icost;
1427
+
1428
+ if (n == NULL)
1429
+ {
1430
+ printf("NULL expression");
1431
+ return;
1432
+ }
1433
+
1434
+ icost = (int) (n->cost);
1435
+ if (n->type == CONNECTOR_type)
1436
+ {
1437
+ for (i=0; i<icost; i++) printf("[");
1438
+ if (n->multi) printf("@");
1439
+ printf("%s%c",n->u.string, n->dir);
1440
+ for (i=0; i<icost; i++) printf("]");
1441
+ if (icost > 0) printf(" ");
1442
+ }
1443
+ else
1444
+ {
1445
+ for (i=0; i<icost; i++) printf("[");
1446
+ if (icost == 0) printf("(");
1447
+ if (n->type == AND_type) printf("& ");
1448
+ if (n->type == OR_type) printf("or ");
1449
+ for (el = n->u.l; el != NULL; el = el->next)
1450
+ {
1451
+ print_expression(el->e);
1452
+ }
1453
+ for (i=0; i<icost; i++) printf("]");
1454
+ if (icost > 0) printf(" ");
1455
+ if (icost == 0) printf(") ");
1456
+ }
1457
+ }
1458
+
1459
+ #else /* INFIX_NOTATION */
1460
+
1461
+ /**
1462
+ * print the expression, in infix-style
1463
+ */
1464
+ static void print_expression_parens(Exp * n, int need_parens)
1465
+ {
1466
+ E_list * el;
1467
+ int i, icost;
1468
+
1469
+ if (n == NULL)
1470
+ {
1471
+ printf("NULL expression");
1472
+ return;
1473
+ }
1474
+
1475
+ icost = (int) (n->cost);
1476
+ /* print the connector only */
1477
+ if (n->type == CONNECTOR_type)
1478
+ {
1479
+ for (i=0; i<icost; i++) printf("[");
1480
+ if (n->multi) printf("@");
1481
+ printf("%s%c",n->u.string, n->dir);
1482
+ for (i=0; i<icost; i++) printf("]");
1483
+ return;
1484
+ }
1485
+
1486
+ /* Look for optional, and print only that */
1487
+ el = n->u.l;
1488
+ if (el == NULL)
1489
+ {
1490
+ for (i=0; i<icost; i++) printf("[");
1491
+ printf ("()");
1492
+ for (i=0; i<icost; i++) printf("]");
1493
+ return;
1494
+ }
1495
+
1496
+ for (i=0; i<icost; i++) printf("[");
1497
+ if ((n->type == OR_type) &&
1498
+ el && el->e && (NULL == el->e->u.l))
1499
+ {
1500
+ printf ("{");
1501
+ print_expression_parens(el->next->e, FALSE);
1502
+ printf ("}");
1503
+ return;
1504
+ }
1505
+
1506
+ if ((icost == 0) && need_parens) printf("(");
1507
+
1508
+ /* print left side of binary expr */
1509
+ print_expression_parens(el->e, TRUE);
1510
+
1511
+ /* get a funny "and optional" when its a named expression thing. */
1512
+ if ((n->type == AND_type) && (el->next == NULL))
1513
+ {
1514
+ return;
1515
+ }
1516
+
1517
+ if (n->type == AND_type) printf(" & ");
1518
+ if (n->type == OR_type) printf(" or ");
1519
+
1520
+ /* print right side of binary expr */
1521
+ el = el->next;
1522
+ if (el == NULL)
1523
+ {
1524
+ printf ("()");
1525
+ }
1526
+ else
1527
+ {
1528
+ if (el->e->type == n->type)
1529
+ {
1530
+ print_expression_parens(el->e, FALSE);
1531
+ }
1532
+ else
1533
+ {
1534
+ print_expression_parens(el->e, TRUE);
1535
+ }
1536
+ if (el->next != NULL)
1537
+ printf ("\nERROR! Unexpected list!\n");
1538
+ }
1539
+
1540
+ for (i=0; i<icost; i++) printf("]");
1541
+ if ((icost == 0) && need_parens) printf(")");
1542
+ }
1543
+
1544
+ void print_expression(Exp * n)
1545
+ {
1546
+ print_expression_parens(n, FALSE);
1547
+ printf("\n");
1548
+ }
1549
+ #endif /* INFIX_NOTATION */
1550
+
1551
+ static void rprint_dictionary_data(Dict_node * n)
1552
+ {
1553
+ if (n == NULL) return;
1554
+ rprint_dictionary_data(n->left);
1555
+ printf("%s: ", n->string);
1556
+ print_expression(n->exp);
1557
+ printf("\n");
1558
+ rprint_dictionary_data(n->right);
1559
+ }
1560
+
1561
+ /**
1562
+ * Dump the entire contents of the dictionary
1563
+ * XXX This is not currently called by anything, but is a "good thing
1564
+ * to keep around".
1565
+ */
1566
+ void print_dictionary_data(Dictionary dict)
1567
+ {
1568
+ rprint_dictionary_data(dict->root);
1569
+ }
1570
+
1571
+ int read_dictionary(Dictionary dict)
1572
+ {
1573
+ if (!link_advance(dict))
1574
+ {
1575
+ return 0;
1576
+ }
1577
+ while (dict->token[0] != '\0')
1578
+ {
1579
+ if (!read_entry(dict))
1580
+ {
1581
+ return 0;
1582
+ }
1583
+ }
1584
+ dict->root = dsw_tree_to_vine(dict->root);
1585
+ dict->root = dsw_vine_to_tree(dict->root, dict->num_entries);
1586
+ return 1;
1587
+ }
1588
+
1589
+ /* ======================================================================= */
1590
+ /* the following functions are for handling deletion */
1591
+ /**
1592
+ * Returns true if it finds a non-idiom dict_node in a file that matches
1593
+ * the string s.
1594
+ *
1595
+ ** note: this now DOES include non-file words in its search.
1596
+ *
1597
+ * Also sets parent and to_be_deleted appropriately.
1598
+ */
1599
+ static int find_one_non_idiom_node(Dict_node * p, Dict_node * dn,
1600
+ const char * s,
1601
+ Dict_node **parent, Dict_node **to_be_deleted)
1602
+ {
1603
+ int m;
1604
+ if (dn == NULL) return FALSE;
1605
+ m = dict_order_wild(s, dn->string);
1606
+ if (m <= 0) {
1607
+ if (find_one_non_idiom_node(dn,dn->left, s, parent, to_be_deleted)) return TRUE;
1608
+ }
1609
+ /* if ((m == 0) && (!is_idiom_word(dn->string)) && (dn->file != NULL)) { */
1610
+ if ((m == 0) && (!is_idiom_word(dn->string))) {
1611
+ *to_be_deleted = dn;
1612
+ *parent = p;
1613
+ return TRUE;
1614
+ }
1615
+ if (m >= 0) {
1616
+ if (find_one_non_idiom_node(dn,dn->right, s, parent, to_be_deleted)) return TRUE;
1617
+ }
1618
+ return FALSE;
1619
+ }
1620
+
1621
+ static void set_parent_of_node(Dictionary dict,
1622
+ Dict_node *p,
1623
+ Dict_node * del,
1624
+ Dict_node * newnode)
1625
+ {
1626
+ if (p == NULL) {
1627
+ dict->root = newnode;
1628
+ } else {
1629
+ if (p->left == del) {
1630
+ p->left = newnode;
1631
+ } else if (p->right == del) {
1632
+ p->right = newnode;
1633
+ } else {
1634
+ assert(FALSE, "Dictionary broken?");
1635
+ }
1636
+ }
1637
+ }
1638
+
1639
+ /**
1640
+ * This deletes all the non-idiom words of the dictionary that match
1641
+ * the given string. Returns TRUE if some deleted, FALSE otherwise.
1642
+ *
1643
+ * XXX Note: this function is not currently used anywhere in the code,
1644
+ * but it could be useful for general dictionary editing.
1645
+ */
1646
+ int delete_dictionary_words(Dictionary dict, const char * s)
1647
+ {
1648
+ Dict_node *pred, *pred_parent;
1649
+ Dict_node *parent, *to_be_deleted;
1650
+
1651
+ if (!find_one_non_idiom_node(NULL, dict->root, s, &parent, &to_be_deleted)) return FALSE;
1652
+ for(;;) {
1653
+ /* now parent and to_be_deleted are set */
1654
+ if (to_be_deleted->file != NULL) {
1655
+ to_be_deleted->file->changed = TRUE;
1656
+ }
1657
+ if (to_be_deleted->left == NULL) {
1658
+ set_parent_of_node(dict, parent, to_be_deleted, to_be_deleted->right);
1659
+ free_dict_node(to_be_deleted);
1660
+ } else {
1661
+ pred_parent = to_be_deleted;
1662
+ pred = to_be_deleted->left;
1663
+ while(pred->right != NULL) {
1664
+ pred_parent = pred;
1665
+ pred = pred->right;
1666
+ }
1667
+ to_be_deleted->string = pred->string;
1668
+ to_be_deleted->file = pred->file;
1669
+ to_be_deleted->exp = pred->exp;
1670
+ set_parent_of_node(dict, pred_parent, pred, pred->left);
1671
+ free_dict_node(pred);
1672
+ }
1673
+ if (!find_one_non_idiom_node(NULL, dict->root, s, &parent, &to_be_deleted)) return TRUE;
1674
+ }
1675
+ }
1676
+
1677
+ static void free_Word_file(Word_file * wf)
1678
+ {
1679
+ Word_file *wf1;
1680
+
1681
+ for (;wf != NULL; wf = wf1) {
1682
+ wf1 = wf->next;
1683
+ xfree((char *) wf, sizeof(Word_file));
1684
+ }
1685
+ }
1686
+
1687
+ /**
1688
+ * The following two functions free the Exp s and the
1689
+ * E_lists of the dictionary. Not to be confused with
1690
+ * free_E_list in utilities.c
1691
+ */
1692
+ static void free_Elist(E_list * l)
1693
+ {
1694
+ E_list * l1;
1695
+
1696
+ for (; l != NULL; l = l1) {
1697
+ l1 = l->next;
1698
+ xfree(l, sizeof(E_list));
1699
+ }
1700
+ }
1701
+
1702
+ static void free_Exp_list(Exp * e)
1703
+ {
1704
+ Exp * e1;
1705
+ for (; e != NULL; e = e1)
1706
+ {
1707
+ e1 = e->next;
1708
+ if (e->type != CONNECTOR_type)
1709
+ {
1710
+ free_Elist(e->u.l);
1711
+ }
1712
+ exp_free(e);
1713
+ }
1714
+ }
1715
+
1716
+ void free_dictionary(Dictionary dict)
1717
+ {
1718
+ free_dict_node_recursive(dict->root);
1719
+ free_Word_file(dict->word_file_header);
1720
+ free_Exp_list(dict->exp_list);
1721
+ }
1722
+
1723
+ /**
1724
+ * dict_display_word_info() - display the information about the given word.
1725
+ */
1726
+ void dict_display_word_info(Dictionary dict, const char * s)
1727
+ {
1728
+ Dict_node *dn, *dn_head;
1729
+ Disjunct * d1, * d2;
1730
+ int len;
1731
+ dn_head = dictionary_lookup_list(dict, s);
1732
+ if (dn_head == NULL)
1733
+ {
1734
+ printf(" \"%s\" matches nothing in the dictionary.\n", s);
1735
+ return;
1736
+ }
1737
+ printf("Matches:\n");
1738
+ for (dn = dn_head; dn != NULL; dn = dn->right)
1739
+ {
1740
+ len = 0;
1741
+ d1 = build_disjuncts_for_dict_node(dn);
1742
+ for(d2 = d1 ; d2 != NULL; d2 = d2->next)
1743
+ {
1744
+ len++;
1745
+ }
1746
+ free_disjuncts(d1);
1747
+ printf(" ");
1748
+ left_print_string(stdout, dn->string,
1749
+ " ");
1750
+ printf(" %5d disjuncts ", len);
1751
+ if (dn->file != NULL)
1752
+ {
1753
+ printf("<%s>", dn->file->file);
1754
+ }
1755
+ printf("\n");
1756
+ }
1757
+ free_lookup_list(dn_head);
1758
+ return;
1759
+ }
1760
+
1761
+ /**
1762
+ * dict_display_word_expr() - display the connector info for a given word.
1763
+ */
1764
+ void dict_display_word_expr(Dictionary dict, const char * s)
1765
+ {
1766
+ Dict_node *dn, *dn_head;
1767
+
1768
+ dn_head = dictionary_lookup_list(dict, s);
1769
+ if (dn_head == NULL)
1770
+ {
1771
+ printf(" \"%s\" matches nothing in the dictionary.\n", s);
1772
+ return;
1773
+ }
1774
+ printf("\nExpressions:\n");
1775
+ for (dn = dn_head; dn != NULL; dn = dn->right)
1776
+ {
1777
+ printf(" ");
1778
+ left_print_string(stdout, dn->string,
1779
+ " ");
1780
+ print_expression(dn->exp);
1781
+ printf("\n\n");
1782
+ }
1783
+ free_lookup_list(dn_head);
1784
+ return;
1785
+ }