grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,329 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #include "api.h"
15
+ #include "disjunct-utils.h"
16
+
17
+ /* This file contains the functions for massaging disjuncts of the
18
+ sentence in special ways having to do with conjunctions.
19
+ The only function called from the outside world is
20
+ install_special_conjunctive_connectors()
21
+
22
+ It would be nice if this code was written more transparently. In
23
+ other words, there should be some fairly general functions that
24
+ manipulate disjuncts, and take words like "neither" etc as input
25
+ parameters, so as to encapsulate the changes being made for special
26
+ words. This would not be too hard to do, but it's not a high priority.
27
+ -DS 3/98
28
+ */
29
+
30
+ #define COMMA_LABEL (-2) /* to hook the comma to the following "and" */
31
+ #define EITHER_LABEL (-3) /* to connect the "either" to the following "or" */
32
+ #define NEITHER_LABEL (-4) /* to connect the "neither" to the following "nor"*/
33
+ #define NOT_LABEL (-5) /* to connect the "not" to the following "but"*/
34
+ #define NOTONLY_LABEL (-6) /* to connect the "not" to the following "only"*/
35
+ #define BOTH_LABEL (-7) /* to connect the "both" to the following "and"*/
36
+
37
+ /* There's a problem with installing "...but...", "not only...but...", and
38
+ "not...but...", which is that the current comma mechanism will allow
39
+ a list separated by commas. "Not only John, Mary but Jim came"
40
+ The best way to prevent this is to make it impossible for the comma
41
+ to attach to the "but", via some sort of additional subscript on commas.
42
+
43
+ I can't think of a good way to prevent this.
44
+ */
45
+
46
+ /* The following functions all do slightly different variants of the
47
+ following thing:
48
+
49
+ Catenate to the disjunct list pointed to by d, a new disjunct list.
50
+ The new list is formed by copying the old list, and adding the new
51
+ connector somewhere in the old disjunct, for disjuncts that satisfy
52
+ certain conditions
53
+ */
54
+
55
+ /**
56
+ * glom_comma_connector() --
57
+ * In this case the connector is to connect to the comma to the
58
+ * left of an "and" or an "or". Only gets added next to a fat link
59
+ */
60
+ static Disjunct * glom_comma_connector(Disjunct * d)
61
+ {
62
+ Disjunct * d_list, * d1, * d2;
63
+ Connector * c, * c1;
64
+ d_list = NULL;
65
+ for (d1 = d; d1!=NULL; d1=d1->next) {
66
+ if (d1->left == NULL) continue;
67
+ for (c = d1->left; c->next != NULL; c = c->next)
68
+ ;
69
+ if (c->label < 0) continue; /* last one must be a fat link */
70
+
71
+ d2 = copy_disjunct(d1);
72
+ d2->next = d_list;
73
+ d_list = d2;
74
+
75
+ c1 = connector_new();
76
+ c1->label = COMMA_LABEL;
77
+
78
+ c->next = c1;
79
+ }
80
+ return catenate_disjuncts(d, d_list);
81
+ }
82
+
83
+ /**
84
+ * In this case the connector is to connect to the "either", "neither",
85
+ * "not", or some auxilliary d to the current which is a conjunction.
86
+ * Only gets added next to a fat link, but before it (not after it)
87
+ * In the case of "nor", we don't create new disjuncts, we merely modify
88
+ * existing ones. This forces the fat link uses of "nor" to
89
+ * use a neither. (Not the case with "or".) If necessary=FALSE, then
90
+ * duplication is done, otherwise it isn't
91
+ */
92
+ static Disjunct * glom_aux_connector(Disjunct * d, int label, int necessary)
93
+ {
94
+ Disjunct * d_list, * d1, * d2;
95
+ Connector * c, * c1, *c2;
96
+ d_list = NULL;
97
+ for (d1 = d; d1!=NULL; d1=d1->next) {
98
+ if (d1->left == NULL) continue;
99
+ for (c = d1->left; c->next != NULL; c = c->next)
100
+ ;
101
+ if (c->label < 0) continue; /* last one must be a fat link */
102
+
103
+ if (!necessary) {
104
+ d2 = copy_disjunct(d1);
105
+ d2->next = d_list;
106
+ d_list = d2;
107
+ }
108
+
109
+ c1 = connector_new();
110
+ c1->label = label;
111
+ c1->next = c;
112
+
113
+ if (d1->left == c) {
114
+ d1->left = c1;
115
+ } else {
116
+ for (c2 = d1->left; c2->next != c; c2 = c2->next)
117
+ ;
118
+ c2->next = c1;
119
+ }
120
+ }
121
+ return catenate_disjuncts(d, d_list);
122
+ }
123
+
124
+ /**
125
+ * This adds one connector onto the beginning of the left (or right)
126
+ * connector list of d. The label and string of the connector are
127
+ * specified
128
+ */
129
+ static Disjunct * add_one_connector(int label, int dir, const char *cs, Disjunct * d)
130
+ {
131
+ Connector * c;
132
+
133
+ c = connector_new();
134
+ c->string = cs;
135
+ c->label = label;
136
+
137
+ if (dir == '+') {
138
+ c->next = d->right;
139
+ d->right = c;
140
+ } else {
141
+ c->next = d->left;
142
+ d->left = c;
143
+ }
144
+ return d;
145
+ }
146
+
147
+ /**
148
+ * special_disjunct() --
149
+ * Builds a new disjunct with one connector pointing in direction dir
150
+ * (which is '+' or '-'). The label and string of the connector
151
+ * are specified, as well as the string of the disjunct.
152
+ * The next pointer of the new disjunct set to NULL, so it can be
153
+ * regarded as a list.
154
+ */
155
+ static Disjunct * special_disjunct(int label, int dir, const char *cs, const char * ds)
156
+ {
157
+ Disjunct * d1;
158
+ Connector * c;
159
+ d1 = (Disjunct *) xalloc(sizeof(Disjunct));
160
+ d1->cost = 0;
161
+ d1->string = ds;
162
+ d1->next = NULL;
163
+
164
+ c = connector_new();
165
+ c->string = cs;
166
+ c->label = label;
167
+
168
+ if (dir == '+') {
169
+ d1->left = NULL;
170
+ d1->right = c;
171
+ } else {
172
+ d1->right = NULL;
173
+ d1->left = c;
174
+ }
175
+ return d1;
176
+ }
177
+
178
+ /**
179
+ * Finds all places in the sentence where a comma is followed by
180
+ * a conjunction ("and", "or", "but", or "nor"). It modifies these comma
181
+ * disjuncts, and those of the following word, to allow the following
182
+ * word to absorb the comma (if used as a conjunction).
183
+ */
184
+ static void construct_comma(Sentence sent)
185
+ {
186
+ int w;
187
+ for (w=0; w<sent->length-1; w++) {
188
+ if ((strcmp(sent->word[w].string, ",")==0) && sent->is_conjunction[w+1]) {
189
+ sent->word[w].d = catenate_disjuncts(special_disjunct(COMMA_LABEL,'+',"", ","), sent->word[w].d);
190
+ sent->word[w+1].d = glom_comma_connector(sent->word[w+1].d);
191
+ }
192
+ }
193
+ }
194
+
195
+
196
+ /** Returns TRUE if one of the words in the sentence is s */
197
+ static int sentence_contains(Sentence sent, const char * s)
198
+ {
199
+ int w;
200
+ for (w=0; w<sent->length; w++) {
201
+ if (strcmp(sent->word[w].string, s) == 0) return TRUE;
202
+ }
203
+ return FALSE;
204
+ }
205
+
206
+ /**
207
+ * The functions below put the special connectors on certain auxilliary
208
+ words to be used with conjunctions. Examples: either, neither,
209
+ both...and..., not only...but...
210
+ XXX FIXME: This routine uses "sentence_contains" to test for explicit
211
+ English words, and clearly this fails for other langauges!! XXX FIXME!
212
+ */
213
+
214
+ static void construct_either(Sentence sent)
215
+ {
216
+ int w;
217
+ if (!sentence_contains(sent, "either")) return;
218
+ for (w=0; w<sent->length; w++) {
219
+ if (strcmp(sent->word[w].string, "either") != 0) continue;
220
+ sent->word[w].d = catenate_disjuncts(
221
+ special_disjunct(EITHER_LABEL,'+',"", "either"),
222
+ sent->word[w].d);
223
+ }
224
+
225
+ for (w=0; w<sent->length; w++) {
226
+ if (strcmp(sent->word[w].string, "or") != 0) continue;
227
+ sent->word[w].d = glom_aux_connector
228
+ (sent->word[w].d, EITHER_LABEL, FALSE);
229
+ }
230
+ }
231
+
232
+ static void construct_neither(Sentence sent)
233
+ {
234
+ int w;
235
+ if (!sentence_contains(sent, "neither")) {
236
+ /* I don't see the point removing disjuncts on "nor". I
237
+ Don't know why I did this. What's the problem keeping the
238
+ stuff explicitely defined for "nor" in the dictionary? --DS 3/98 */
239
+ #if 0
240
+ for (w=0; w<sent->length; w++) {
241
+ if (strcmp(sent->word[w].string, "nor") != 0) continue;
242
+ free_disjuncts(sent->word[w].d);
243
+ sent->word[w].d = NULL; /* a nor with no neither is dead */
244
+ }
245
+ #endif
246
+ return;
247
+ }
248
+ for (w=0; w<sent->length; w++) {
249
+ if (strcmp(sent->word[w].string, "neither") != 0) continue;
250
+ sent->word[w].d = catenate_disjuncts(
251
+ special_disjunct(NEITHER_LABEL,'+',"", "neither"),
252
+ sent->word[w].d);
253
+ }
254
+
255
+ for (w=0; w<sent->length; w++) {
256
+ if (strcmp(sent->word[w].string, "nor") != 0) continue;
257
+ sent->word[w].d = glom_aux_connector
258
+ (sent->word[w].d, NEITHER_LABEL, TRUE);
259
+ }
260
+ }
261
+
262
+ static void construct_notonlybut(Sentence sent)
263
+ {
264
+ int w;
265
+ Disjunct *d;
266
+ if (!sentence_contains(sent, "not")) {
267
+ return;
268
+ }
269
+ for (w=0; w<sent->length; w++) {
270
+ if (strcmp(sent->word[w].string, "not") != 0) continue;
271
+ sent->word[w].d = catenate_disjuncts(
272
+ special_disjunct(NOT_LABEL,'+',"", "not"),
273
+ sent->word[w].d);
274
+ if (w<sent->length-1 && strcmp(sent->word[w+1].string, "only")==0) {
275
+ sent->word[w+1].d = catenate_disjuncts(
276
+ special_disjunct(NOTONLY_LABEL, '-',"","only"),
277
+ sent->word[w+1].d);
278
+ d = special_disjunct(NOTONLY_LABEL, '+', "","not");
279
+ d = add_one_connector(NOT_LABEL,'+',"", d);
280
+ sent->word[w].d = catenate_disjuncts(d, sent->word[w].d);
281
+ }
282
+ }
283
+ /* The code below prevents sentences such as the following from
284
+ parsing:
285
+ it was not carried out by Serbs but by Croats */
286
+
287
+
288
+ /* We decided that this is a silly thing to. Here's the bug report
289
+ caused by this:
290
+
291
+ Bug with conjunctions. Some that work with "and" but they don't work
292
+ with "but". "He was not hit by John and by Fred".
293
+ (Try replacing "and" by "but" and it does not work.
294
+ It's getting confused by the "not".)
295
+ */
296
+ for (w=0; w<sent->length; w++) {
297
+ if (strcmp(sent->word[w].string, "but") != 0) continue;
298
+ sent->word[w].d = glom_aux_connector
299
+ (sent->word[w].d, NOT_LABEL, FALSE);
300
+ /* The above line use to have a TRUE in it */
301
+ }
302
+ }
303
+
304
+ static void construct_both(Sentence sent)
305
+ {
306
+ int w;
307
+ if (!sentence_contains(sent, "both")) return;
308
+ for (w=0; w<sent->length; w++) {
309
+ if (strcmp(sent->word[w].string, "both") != 0) continue;
310
+ sent->word[w].d = catenate_disjuncts(
311
+ special_disjunct(BOTH_LABEL,'+',"", "both"),
312
+ sent->word[w].d);
313
+ }
314
+
315
+ for (w=0; w<sent->length; w++) {
316
+ if (strcmp(sent->word[w].string, "and") != 0) continue;
317
+ sent->word[w].d = glom_aux_connector(sent->word[w].d, BOTH_LABEL, FALSE);
318
+ }
319
+ }
320
+
321
+ void install_special_conjunctive_connectors(Sentence sent)
322
+ {
323
+ construct_either(sent); /* special connectors for "either" */
324
+ construct_neither(sent); /* special connectors for "neither" */
325
+ construct_notonlybut(sent); /* special connectors for "not..but.." */
326
+ /* and "not only..but.." */
327
+ construct_both(sent); /* special connectors for "both..and.." */
328
+ construct_comma(sent); /* special connectors for extra comma */
329
+ }
@@ -0,0 +1,13 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ void install_special_conjunctive_connectors(Sentence sent);
@@ -0,0 +1,1113 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ /* see bottom of file for comments on post processing */
15
+
16
+ #include <stdarg.h>
17
+ #include <memory.h>
18
+ #include "api.h"
19
+ #include "error.h"
20
+
21
+ #define PP_MAX_DOMAINS 128
22
+
23
+ /***************** utility routines (not exported) ***********************/
24
+
25
+ static int string_in_list(const char * s, const char * a[])
26
+ {
27
+ /* returns FALSE if the string s does not match anything in
28
+ the array. The array elements are post-processing symbols */
29
+ int i;
30
+ for (i=0; a[i] != NULL; i++)
31
+ if (post_process_match(a[i], s)) return TRUE;
32
+ return FALSE;
33
+ }
34
+
35
+ static int find_domain_name(Postprocessor *pp, const char *link)
36
+ {
37
+ /* Return the name of the domain associated with the provided starting
38
+ link. Return -1 if link isn't associated with a domain. */
39
+ int i,domain;
40
+ StartingLinkAndDomain *sllt = pp->knowledge->starting_link_lookup_table;
41
+ for (i=0;;i++)
42
+ {
43
+ domain = sllt[i].domain;
44
+ if (domain==-1) return -1; /* hit the end-of-list sentinel */
45
+ if (post_process_match(sllt[i].starting_link, link)) return domain;
46
+ }
47
+ }
48
+
49
+ static int contained_in(Domain * d1, Domain * d2, Sublinkage *sublinkage)
50
+ {
51
+ /* returns TRUE if domain d1 is contained in domain d2 */
52
+ char mark[MAX_LINKS];
53
+ List_o_links * lol;
54
+ memset(mark, 0, sublinkage->num_links*(sizeof mark[0]));
55
+ for (lol=d2->lol; lol != NULL; lol = lol->next)
56
+ mark[lol->link] = TRUE;
57
+ for (lol=d1->lol; lol != NULL; lol = lol->next)
58
+ if (!mark[lol->link]) return FALSE;
59
+ return TRUE;
60
+ }
61
+
62
+ static int link_in_domain(int link, Domain * d)
63
+ {
64
+ /* returns the predicate "the given link is in the given domain" */
65
+ List_o_links * lol;
66
+ for (lol = d->lol; lol != NULL; lol = lol->next)
67
+ if (lol->link == link) return TRUE;
68
+ return FALSE;
69
+ }
70
+
71
+ /* #define CHECK_DOMAIN_NESTING */
72
+
73
+ #if defined(CHECK_DOMAIN_NESTING)
74
+ /* Although this is no longer used, I'm leaving the code here for future reference --DS 3/98 */
75
+
76
+ static int check_domain_nesting(Postprocessor *pp, int num_links)
77
+ {
78
+ /* returns TRUE if the domains actually form a properly nested structure */
79
+ Domain * d1, * d2;
80
+ int counts[4];
81
+ char mark[MAX_LINKS];
82
+ List_o_links * lol;
83
+ int i;
84
+ for (d1=pp->pp_data.domain_array; d1 < pp->pp_data.domain_array + pp->pp_data.N_domains; d1++) {
85
+ for (d2=d1+1; d2 < pp->pp_data.domain_array + pp->pp_data.N_domains; d2++) {
86
+ memset(mark, 0, num_links*(sizeof mark[0]));
87
+ for (lol=d2->lol; lol != NULL; lol = lol->next) {
88
+ mark[lol->link] = 1;
89
+ }
90
+ for (lol=d1->lol; lol != NULL; lol = lol->next) {
91
+ mark[lol->link] += 2;
92
+ }
93
+ counts[0] = counts[1] = counts[2] = counts[3] = 0;
94
+ for (i=0; i<num_links; i++)
95
+ counts[(int)mark[i]]++;/* (int) cast avoids compiler warning DS 7/97 */
96
+ if ((counts[1] > 0) && (counts[2] > 0) && (counts[3] > 0))
97
+ return FALSE;
98
+ }
99
+ }
100
+ return TRUE;
101
+ }
102
+ #endif
103
+
104
+ /**
105
+ * Free the list of links pointed to by lol
106
+ * (does not free any strings)
107
+ */
108
+ static void free_List_o_links(List_o_links *lol)
109
+ {
110
+ List_o_links * xlol;
111
+ while(lol != NULL) {
112
+ xlol = lol->next;
113
+ xfree(lol, sizeof(List_o_links));
114
+ lol = xlol;
115
+ }
116
+ }
117
+
118
+ static void free_D_tree_leaves(DTreeLeaf *dtl)
119
+ {
120
+ DTreeLeaf * xdtl;
121
+ while(dtl != NULL) {
122
+ xdtl = dtl->next;
123
+ xfree(dtl, sizeof(DTreeLeaf));
124
+ dtl = xdtl;
125
+ }
126
+ }
127
+
128
+ /**
129
+ * Gets called after every invocation of post_process()
130
+ */
131
+ void post_process_free_data(PP_data * ppd)
132
+ {
133
+ int w, d;
134
+ for (w = 0; w < ppd->length; w++)
135
+ {
136
+ free_List_o_links(ppd->word_links[w]);
137
+ ppd->word_links[w] = NULL;
138
+ }
139
+ for (d = 0; d < ppd->N_domains; d++)
140
+ {
141
+ free_List_o_links(ppd->domain_array[d].lol);
142
+ ppd->domain_array[d].lol = NULL;
143
+ free_D_tree_leaves(ppd->domain_array[d].child);
144
+ ppd->domain_array[d].child = NULL;
145
+ }
146
+ free_List_o_links(ppd->links_to_ignore);
147
+ ppd->links_to_ignore = NULL;
148
+ }
149
+
150
+ #ifdef THIS_FUNCTION_IS_NOT_CURRENTLY_USED
151
+ static void connectivity_dfs(Postprocessor *pp, Sublinkage *sublinkage,
152
+ int w, pp_linkset *ls)
153
+ {
154
+ List_o_links *lol;
155
+ pp->visited[w] = TRUE;
156
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next)
157
+ {
158
+ if (!pp->visited[lol->word] &&
159
+ !pp_linkset_match(ls, sublinkage->link[lol->link]->name))
160
+ connectivity_dfs(pp, sublinkage, lol->word, ls);
161
+ }
162
+ }
163
+ #endif /* THIS_FUNCTION_IS_NOT_CURRENTLY_USED */
164
+
165
+ static void mark_reachable_words(Postprocessor *pp, int w)
166
+ {
167
+ List_o_links *lol;
168
+ if (pp->visited[w]) return;
169
+ pp->visited[w] = TRUE;
170
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next)
171
+ mark_reachable_words(pp, lol->word);
172
+ }
173
+
174
+ static int is_connected(Postprocessor *pp)
175
+ {
176
+ /* Returns true if the linkage is connected, considering words
177
+ that have at least one edge....this allows conjunctive sentences
178
+ not to be thrown out. */
179
+ int i;
180
+ for (i=0; i<pp->pp_data.length; i++)
181
+ pp->visited[i] = (pp->pp_data.word_links[i] == NULL);
182
+ mark_reachable_words(pp, 0);
183
+ for (i=0; i<pp->pp_data.length; i++)
184
+ if (!pp->visited[i]) return FALSE;
185
+ return TRUE;
186
+ }
187
+
188
+
189
+ static void build_type_array(Postprocessor *pp)
190
+ {
191
+ D_type_list * dtl;
192
+ int d;
193
+ List_o_links * lol;
194
+ for (d=0; d<pp->pp_data.N_domains; d++)
195
+ {
196
+ for (lol=pp->pp_data.domain_array[d].lol; lol != NULL; lol = lol->next)
197
+ {
198
+ dtl = (D_type_list *) xalloc(sizeof(D_type_list));
199
+ dtl->next = pp->pp_node->d_type_array[lol->link];
200
+ pp->pp_node->d_type_array[lol->link] = dtl;
201
+ dtl->type = pp->pp_data.domain_array[d].type;
202
+ }
203
+ }
204
+ }
205
+
206
+ void free_d_type(D_type_list * dtl)
207
+ {
208
+ D_type_list * dtlx;
209
+ for (; dtl!=NULL; dtl=dtlx) {
210
+ dtlx = dtl->next;
211
+ xfree((void*) dtl, sizeof(D_type_list));
212
+ }
213
+ }
214
+
215
+ D_type_list * copy_d_type(D_type_list * dtl)
216
+ {
217
+ D_type_list *dtlx, *dtlcurr=NULL, *dtlhead=NULL;
218
+ for (; dtl!=NULL; dtl=dtl->next)
219
+ {
220
+ dtlx = (D_type_list *) xalloc(sizeof(D_type_list));
221
+ *dtlx = *dtl;
222
+ if (dtlhead == NULL)
223
+ {
224
+ dtlhead = dtlx;
225
+ dtlcurr = dtlx;
226
+ }
227
+ else
228
+ {
229
+ dtlcurr->next = dtlx;
230
+ dtlcurr = dtlx;
231
+ }
232
+ }
233
+ return dtlhead;
234
+ }
235
+
236
+ /** free the pp node from last time */
237
+ static void free_pp_node(Postprocessor *pp)
238
+ {
239
+ int i;
240
+ PP_node *ppn = pp->pp_node;
241
+ pp->pp_node = NULL;
242
+ if (ppn == NULL) return;
243
+
244
+ for (i=0; i<MAX_LINKS; i++)
245
+ {
246
+ free_d_type(ppn->d_type_array[i]);
247
+ }
248
+ xfree((void*) ppn, sizeof(PP_node));
249
+ }
250
+
251
+
252
+ /** set up a fresh pp_node for later use */
253
+ static void alloc_pp_node(Postprocessor *pp)
254
+ {
255
+ int i;
256
+ pp->pp_node=(PP_node *) xalloc(sizeof(PP_node));
257
+ pp->pp_node->violation = NULL;
258
+ for (i=0; i<MAX_LINKS; i++)
259
+ pp->pp_node->d_type_array[i] = NULL;
260
+ }
261
+
262
+ static void reset_pp_node(Postprocessor *pp)
263
+ {
264
+ free_pp_node(pp);
265
+ alloc_pp_node(pp);
266
+ }
267
+
268
+ /************************ rule application *******************************/
269
+
270
+ static int apply_rules(Postprocessor *pp,
271
+ int (applyfn) (Postprocessor *,Sublinkage *,pp_rule *),
272
+ Sublinkage *sublinkage,
273
+ pp_rule *rule_array,
274
+ const char **msg)
275
+ {
276
+ int i;
277
+ for (i=0; (*msg=rule_array[i].msg)!=NULL; i++)
278
+ if (!applyfn(pp, sublinkage, &(rule_array[i]))) return 0;
279
+ return 1;
280
+ }
281
+
282
+ static int
283
+ apply_relevant_rules(Postprocessor *pp,
284
+ int(applyfn)(Postprocessor *pp,Sublinkage*,pp_rule *rule),
285
+ Sublinkage *sublinkage,
286
+ pp_rule *rule_array,
287
+ int *relevant_rules,
288
+ const char **msg)
289
+ {
290
+ int i, idx;
291
+
292
+ /* if we didn't accumulate link names for this sentence, we need to apply
293
+ all rules */
294
+ if (pp_linkset_population(pp->set_of_links_of_sentence)==0) {
295
+ return apply_rules(pp, applyfn, sublinkage, rule_array, msg);
296
+ }
297
+
298
+ /* we did, and we don't */
299
+ for (i=0; (idx=relevant_rules[i])!=-1; i++) {
300
+ *msg = rule_array[idx].msg; /* Adam had forgotten this -- DS 4/9/98 */
301
+ if (!applyfn(pp, sublinkage, &(rule_array[idx]))) return 0;
302
+ }
303
+ return 1;
304
+ }
305
+
306
+ static int
307
+ apply_contains_one(Postprocessor *pp, Sublinkage *sublinkage, pp_rule *rule)
308
+ {
309
+ /* returns TRUE if and only if all groups containing the specified link
310
+ contain at least one from the required list. (as determined by exact
311
+ string matching) */
312
+ DTreeLeaf * dtl;
313
+ int d, count;
314
+ for (d=0; d<pp->pp_data.N_domains; d++)
315
+ {
316
+ for (dtl = pp->pp_data.domain_array[d].child;
317
+ dtl != NULL &&
318
+ !post_process_match(rule->selector,
319
+ sublinkage->link[dtl->link]->name);
320
+ dtl = dtl->next) {}
321
+ if (dtl != NULL)
322
+ {
323
+ /* selector link of rule appears in this domain */
324
+ count=0;
325
+ for (dtl = pp->pp_data.domain_array[d].child; dtl != NULL; dtl = dtl->next)
326
+ if (string_in_list(sublinkage->link[dtl->link]->name,
327
+ rule->link_array))
328
+ {
329
+ count=1;
330
+ break;
331
+ }
332
+ if (count == 0) return FALSE;
333
+ }
334
+ }
335
+ return TRUE;
336
+ }
337
+
338
+
339
+ static int
340
+ apply_contains_none(Postprocessor *pp,Sublinkage *sublinkage,pp_rule *rule)
341
+ {
342
+ /* returns TRUE if and only if:
343
+ all groups containing the selector link do not contain anything
344
+ from the link_array contained in the rule. Uses exact string matching. */
345
+ DTreeLeaf * dtl;
346
+ int d;
347
+ for (d=0; d<pp->pp_data.N_domains; d++)
348
+ {
349
+ for (dtl = pp->pp_data.domain_array[d].child;
350
+ dtl != NULL &&
351
+ !post_process_match(rule->selector,
352
+ sublinkage->link[dtl->link]->name);
353
+ dtl = dtl->next) {}
354
+ if (dtl != NULL)
355
+ {
356
+ /* selector link of rule appears in this domain */
357
+ for (dtl = pp->pp_data.domain_array[d].child; dtl != NULL; dtl = dtl->next)
358
+ if (string_in_list(sublinkage->link[dtl->link]->name,
359
+ rule->link_array))
360
+ return FALSE;
361
+ }
362
+ }
363
+ return TRUE;
364
+ }
365
+
366
+ static int
367
+ apply_contains_one_globally(Postprocessor *pp,Sublinkage *sublinkage,pp_rule *rule)
368
+ {
369
+ /* returns TRUE if and only if
370
+ (1) the sentence doesn't contain the selector link for the rule, or
371
+ (2) it does, and it also contains one or more from the rule's link set */
372
+
373
+ int i,j,count;
374
+ for (i=0; i<sublinkage->num_links; i++) {
375
+ if (sublinkage->link[i]->l == -1) continue;
376
+ if (post_process_match(rule->selector,sublinkage->link[i]->name)) break;
377
+ }
378
+ if (i==sublinkage->num_links) return TRUE;
379
+
380
+ /* selector link of rule appears in sentence */
381
+ count=0;
382
+ for (j=0; j<sublinkage->num_links && count==0; j++) {
383
+ if (sublinkage->link[j]->l == -1) continue;
384
+ if (string_in_list(sublinkage->link[j]->name, rule->link_array))
385
+ {
386
+ count=1;
387
+ break;
388
+ }
389
+ }
390
+ if (count==0) return FALSE; else return TRUE;
391
+ }
392
+
393
+ static int
394
+ apply_connected(Postprocessor *pp, Sublinkage *sublinkage, pp_rule *rule)
395
+ {
396
+ /* There is actually just one (or none, if user didn't specify it)
397
+ rule asserting that linkage is connected. */
398
+ if (!is_connected(pp)) return 0;
399
+ return 1;
400
+ }
401
+
402
+ #if 0
403
+ /* replaced in 3/98 with a slightly different algorithm shown below ---DS*/
404
+ static int
405
+ apply_connected_without(Postprocessor *pp,Sublinkage *sublinkage,pp_rule *rule)
406
+ {
407
+ /* Returns true if the linkage is connected when ignoring the links
408
+ whose names are in the given list of link names.
409
+ Actually, what it does is this: it returns FALSE if the connectivity
410
+ of the subgraph reachable from word 0 changes as a result of deleting
411
+ these links. */
412
+ int i;
413
+ memset(pp->visited, 0, pp->pp_data.length*(sizeof pp->visited[0]));
414
+ mark_reachable_words(pp, 0);
415
+ for (i=0; i<pp->pp_data.length; i++)
416
+ pp->visited[i] = !pp->visited[i];
417
+ connectivity_dfs(pp, sublinkage, 0, rule->link_set);
418
+ for (i=0; i<pp->pp_data.length; i++)
419
+ if (pp->visited[i] == FALSE) return FALSE;
420
+ return TRUE;
421
+ }
422
+ #else
423
+
424
+ /* Here's the new algorithm: For each link in the linkage that is in the
425
+ must_form_a_cycle list, we want to make sure that that link
426
+ is in a cycle. We do this simply by deleting the link, then seeing if the
427
+ end points of that link are still connected.
428
+ */
429
+
430
+ static void reachable_without_dfs(Postprocessor *pp, Sublinkage *sublinkage, int a, int b, int w) {
431
+ /* This is a depth first search of words reachable from w, excluding any direct edge
432
+ between word a and word b. */
433
+ List_o_links *lol;
434
+ pp->visited[w] = TRUE;
435
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) {
436
+ if (!pp->visited[lol->word] && !(w == a && lol->word == b) && ! (w == b && lol->word == a)) {
437
+ reachable_without_dfs(pp, sublinkage, a, b, lol->word);
438
+ }
439
+ }
440
+ }
441
+
442
+ /**
443
+ * Returns TRUE if the linkage is connected when ignoring the links
444
+ * whose names are in the given list of link names.
445
+ * Actually, what it does is this: it returns FALSE if the connectivity
446
+ * of the subgraph reachable from word 0 changes as a result of deleting
447
+ * these links.
448
+ */
449
+ static int
450
+ apply_must_form_a_cycle(Postprocessor *pp,Sublinkage *sublinkage,pp_rule *rule)
451
+ {
452
+ List_o_links *lol;
453
+ int w;
454
+ for (w=0; w<pp->pp_data.length; w++) {
455
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) {
456
+ if (w > lol->word) continue; /* only consider each edge once */
457
+ if (!pp_linkset_match(rule->link_set, sublinkage->link[lol->link]->name)) continue;
458
+ memset(pp->visited, 0, pp->pp_data.length*(sizeof pp->visited[0]));
459
+ reachable_without_dfs(pp, sublinkage, w, lol->word, w);
460
+ if (!pp->visited[lol->word]) return FALSE;
461
+ }
462
+ }
463
+
464
+ for (lol = pp->pp_data.links_to_ignore; lol != NULL; lol = lol->next) {
465
+ w = sublinkage->link[lol->link]->l;
466
+ /* (w, lol->word) are the left and right ends of the edge we're considering */
467
+ if (!pp_linkset_match(rule->link_set, sublinkage->link[lol->link]->name)) continue;
468
+ memset(pp->visited, 0, pp->pp_data.length*(sizeof pp->visited[0]));
469
+ reachable_without_dfs(pp, sublinkage, w, lol->word, w);
470
+ if (!pp->visited[lol->word]) return FALSE;
471
+ }
472
+
473
+ return TRUE;
474
+ }
475
+
476
+ #endif
477
+
478
+ static int
479
+ apply_bounded(Postprocessor *pp,Sublinkage *sublinkage,pp_rule *rule)
480
+ {
481
+ /* Checks to see that all domains with this name have the property that
482
+ all of the words that touch a link in the domain are not to the left
483
+ of the root word of the domain. */
484
+ int d, lw, d_type;
485
+ List_o_links * lol;
486
+ d_type = rule->domain;
487
+ for (d=0; d<pp->pp_data.N_domains; d++) {
488
+ if (pp->pp_data.domain_array[d].type != d_type) continue;
489
+ lw = sublinkage->link[pp->pp_data.domain_array[d].start_link]->l;
490
+ for (lol = pp->pp_data.domain_array[d].lol; lol != NULL; lol = lol->next) {
491
+ if (sublinkage->link[lol->link]->l < lw) return FALSE;
492
+ }
493
+ }
494
+ return TRUE;
495
+ }
496
+
497
+ /********************* various non-exported functions ***********************/
498
+
499
+ static void build_graph(Postprocessor *pp, Sublinkage *sublinkage)
500
+ {
501
+ /* fill in the pp->pp_data.word_links array with a list of words neighboring each
502
+ word (actually a list of links). The dir fields are not set, since this
503
+ (after fat-link-extraction) is an undirected graph. */
504
+ int i, link;
505
+ List_o_links * lol;
506
+
507
+ for (i=0; i<pp->pp_data.length; i++)
508
+ pp->pp_data.word_links[i] = NULL;
509
+
510
+ for (link=0; link<sublinkage->num_links; link++)
511
+ {
512
+ if (sublinkage->link[link]->l == -1) continue;
513
+ if (pp_linkset_match(pp->knowledge->ignore_these_links, sublinkage->link[link]->name)) {
514
+ lol = (List_o_links *) xalloc(sizeof(List_o_links));
515
+ lol->next = pp->pp_data.links_to_ignore;
516
+ pp->pp_data.links_to_ignore = lol;
517
+ lol->link = link;
518
+ lol->word = sublinkage->link[link]->r;
519
+ continue;
520
+ }
521
+
522
+ lol = (List_o_links *) xalloc(sizeof(List_o_links));
523
+ lol->next = pp->pp_data.word_links[sublinkage->link[link]->l];
524
+ pp->pp_data.word_links[sublinkage->link[link]->l] = lol;
525
+ lol->link = link;
526
+ lol->word = sublinkage->link[link]->r;
527
+
528
+ lol = (List_o_links *) xalloc(sizeof(List_o_links));
529
+ lol->next = pp->pp_data.word_links[sublinkage->link[link]->r];
530
+ pp->pp_data.word_links[sublinkage->link[link]->r] = lol;
531
+ lol->link = link;
532
+ lol->word = sublinkage->link[link]->l;
533
+ }
534
+ }
535
+
536
+ static void setup_domain_array(Postprocessor *pp,
537
+ int n, const char *string, int start_link)
538
+ {
539
+ /* set pp->visited[i] to FALSE */
540
+ memset(pp->visited, 0, pp->pp_data.length*(sizeof pp->visited[0]));
541
+ pp->pp_data.domain_array[n].string = string;
542
+ pp->pp_data.domain_array[n].lol = NULL;
543
+ pp->pp_data.domain_array[n].size = 0;
544
+ pp->pp_data.domain_array[n].start_link = start_link;
545
+ }
546
+
547
+ static void add_link_to_domain(Postprocessor *pp, int link)
548
+ {
549
+ List_o_links *lol;
550
+ lol = (List_o_links *) xalloc(sizeof(List_o_links));
551
+ lol->next = pp->pp_data.domain_array[pp->pp_data.N_domains].lol;
552
+ pp->pp_data.domain_array[pp->pp_data.N_domains].lol = lol;
553
+ pp->pp_data.domain_array[pp->pp_data.N_domains].size++;
554
+ lol->link = link;
555
+ }
556
+
557
+ static void depth_first_search(Postprocessor *pp, Sublinkage *sublinkage,
558
+ int w, int root,int start_link)
559
+ {
560
+ List_o_links *lol;
561
+ pp->visited[w] = TRUE;
562
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) {
563
+ if (lol->word < w && lol->link != start_link) {
564
+ add_link_to_domain(pp, lol->link);
565
+ }
566
+ }
567
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) {
568
+ if (!pp->visited[lol->word] && (lol->word != root) &&
569
+ !(lol->word < root && lol->word < w &&
570
+ pp_linkset_match(pp->knowledge->restricted_links,
571
+ sublinkage->link[lol->link]->name)))
572
+ depth_first_search(pp, sublinkage, lol->word, root, start_link);
573
+ }
574
+ }
575
+
576
+ static void bad_depth_first_search(Postprocessor *pp, Sublinkage *sublinkage,
577
+ int w, int root, int start_link)
578
+ {
579
+ List_o_links * lol;
580
+ pp->visited[w] = TRUE;
581
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) {
582
+ if ((lol->word < w) && (lol->link != start_link) && (w != root)) {
583
+ add_link_to_domain(pp, lol->link);
584
+ }
585
+ }
586
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) {
587
+ if ((!pp->visited[lol->word]) && !(w == root && lol->word < w) &&
588
+ !(lol->word < root && lol->word < w &&
589
+ pp_linkset_match(pp->knowledge->restricted_links,
590
+ sublinkage->link[lol->link]->name)))
591
+ bad_depth_first_search(pp, sublinkage, lol->word, root, start_link);
592
+ }
593
+ }
594
+
595
+ static void d_depth_first_search(Postprocessor *pp, Sublinkage *sublinkage,
596
+ int w, int root, int right, int start_link)
597
+ {
598
+ List_o_links * lol;
599
+ pp->visited[w] = TRUE;
600
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) {
601
+ if ((lol->word < w) && (lol->link != start_link) && (w != root)) {
602
+ add_link_to_domain(pp, lol->link);
603
+ }
604
+ }
605
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) {
606
+ if (!pp->visited[lol->word] && !(w == root && lol->word >= right) &&
607
+ !(w == root && lol->word < root) &&
608
+ !(lol->word < root && lol->word < w &&
609
+ pp_linkset_match(pp->knowledge->restricted_links,
610
+ sublinkage->link[lol->link]->name)))
611
+ d_depth_first_search(pp,sublinkage,lol->word,root,right,start_link);
612
+ }
613
+ }
614
+
615
+ static void left_depth_first_search(Postprocessor *pp, Sublinkage *sublinkage,
616
+ int w, int right,int start_link)
617
+ {
618
+ List_o_links *lol;
619
+ pp->visited[w] = TRUE;
620
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) {
621
+ if (lol->word < w && lol->link != start_link) {
622
+ add_link_to_domain(pp, lol->link);
623
+ }
624
+ }
625
+ for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) {
626
+ if (!pp->visited[lol->word] && (lol->word != right))
627
+ depth_first_search(pp, sublinkage, lol->word, right, start_link);
628
+ }
629
+ }
630
+
631
+ static int domain_compare(const Domain * d1, const Domain * d2)
632
+ { return (d1->size-d2->size); /* for sorting the domains by size */ }
633
+
634
+ static void build_domains(Postprocessor *pp, Sublinkage *sublinkage)
635
+ {
636
+ int link, i, d;
637
+ const char *s;
638
+ pp->pp_data.N_domains = 0;
639
+
640
+ for (link = 0; link<sublinkage->num_links; link++) {
641
+ if (sublinkage->link[link]->l == -1) continue;
642
+ s = sublinkage->link[link]->name;
643
+
644
+ if (pp_linkset_match(pp->knowledge->ignore_these_links, s)) continue;
645
+ if (pp_linkset_match(pp->knowledge->domain_starter_links, s))
646
+ {
647
+ setup_domain_array(pp, pp->pp_data.N_domains, s, link);
648
+ if (pp_linkset_match(pp->knowledge->domain_contains_links, s))
649
+ add_link_to_domain(pp, link);
650
+ depth_first_search(pp,sublinkage,sublinkage->link[link]->r,
651
+ sublinkage->link[link]->l, link);
652
+
653
+ pp->pp_data.N_domains++;
654
+ assert(pp->pp_data.N_domains<PP_MAX_DOMAINS, "raise value of PP_MAX_DOMAINS");
655
+ }
656
+ else {
657
+ if (pp_linkset_match(pp->knowledge->urfl_domain_starter_links,s))
658
+ {
659
+ setup_domain_array(pp, pp->pp_data.N_domains, s, link);
660
+ /* always add the starter link to its urfl domain */
661
+ add_link_to_domain(pp, link);
662
+ bad_depth_first_search(pp,sublinkage,sublinkage->link[link]->r,
663
+ sublinkage->link[link]->l,link);
664
+ pp->pp_data.N_domains++;
665
+ assert(pp->pp_data.N_domains<PP_MAX_DOMAINS,"raise PP_MAX_DOMAINS value");
666
+ }
667
+ else
668
+ if (pp_linkset_match(pp->knowledge->urfl_only_domain_starter_links,s))
669
+ {
670
+ setup_domain_array(pp, pp->pp_data.N_domains, s, link);
671
+ /* do not add the starter link to its urfl_only domain */
672
+ d_depth_first_search(pp,sublinkage, sublinkage->link[link]->l,
673
+ sublinkage->link[link]->l,
674
+ sublinkage->link[link]->r,link);
675
+ pp->pp_data.N_domains++;
676
+ assert(pp->pp_data.N_domains<PP_MAX_DOMAINS,"raise PP_MAX_DOMAINS value");
677
+ }
678
+ else
679
+ if (pp_linkset_match(pp->knowledge->left_domain_starter_links,s))
680
+ {
681
+ setup_domain_array(pp, pp->pp_data.N_domains, s, link);
682
+ /* do not add the starter link to a left domain */
683
+ left_depth_first_search(pp,sublinkage, sublinkage->link[link]->l,
684
+ sublinkage->link[link]->r,link);
685
+ pp->pp_data.N_domains++;
686
+ assert(pp->pp_data.N_domains<PP_MAX_DOMAINS,"raise PP_MAX_DOMAINS value");
687
+ }
688
+ }
689
+ }
690
+
691
+ /* sort the domains by size */
692
+ qsort((void *) pp->pp_data.domain_array,
693
+ pp->pp_data.N_domains,
694
+ sizeof(Domain),
695
+ (int (*)(const void *, const void *)) domain_compare);
696
+
697
+ /* sanity check: all links in all domains have a legal domain name */
698
+ for (d=0; d<pp->pp_data.N_domains; d++) {
699
+ i = find_domain_name(pp, pp->pp_data.domain_array[d].string);
700
+ if (i == -1)
701
+ prt_error("Error: post_process(): Need an entry for %s in LINK_TYPE_TABLE",
702
+ pp->pp_data.domain_array[d].string);
703
+ pp->pp_data.domain_array[d].type = i;
704
+ }
705
+ }
706
+
707
+ static void build_domain_forest(Postprocessor *pp, Sublinkage *sublinkage)
708
+ {
709
+ int d, d1, link;
710
+ DTreeLeaf * dtl;
711
+ if (pp->pp_data.N_domains > 0)
712
+ pp->pp_data.domain_array[pp->pp_data.N_domains-1].parent = NULL;
713
+ for (d=0; d < pp->pp_data.N_domains-1; d++) {
714
+ for (d1 = d+1; d1 < pp->pp_data.N_domains; d1++) {
715
+ if (contained_in(&pp->pp_data.domain_array[d],&pp->pp_data.domain_array[d1],sublinkage))
716
+ {
717
+ pp->pp_data.domain_array[d].parent = &pp->pp_data.domain_array[d1];
718
+ break;
719
+ }
720
+ }
721
+ if (d1 == pp->pp_data.N_domains) {
722
+ /* we know this domain is a root of a new tree */
723
+ pp->pp_data.domain_array[d].parent = NULL;
724
+ /* It's now ok for this to happen. It used to do:
725
+ printf("I can't find a parent domain for this domain\n");
726
+ print_domain(d);
727
+ exit(1); */
728
+ }
729
+ }
730
+ /* the parent links of domain nodes have been established.
731
+ now do the leaves */
732
+ for (d=0; d < pp->pp_data.N_domains; d++) {
733
+ pp->pp_data.domain_array[d].child = NULL;
734
+ }
735
+ for (link=0; link < sublinkage->num_links; link++) {
736
+ if (sublinkage->link[link]->l == -1) continue; /* probably not necessary */
737
+ for (d=0; d<pp->pp_data.N_domains; d++) {
738
+ if (link_in_domain(link, &pp->pp_data.domain_array[d])) {
739
+ dtl = (DTreeLeaf *) xalloc(sizeof(DTreeLeaf));
740
+ dtl->link = link;
741
+ dtl->parent = &pp->pp_data.domain_array[d];
742
+ dtl->next = pp->pp_data.domain_array[d].child;
743
+ pp->pp_data.domain_array[d].child = dtl;
744
+ break;
745
+ }
746
+ }
747
+ }
748
+ }
749
+
750
+ static int
751
+ internal_process(Postprocessor *pp, Sublinkage *sublinkage, const char **msg)
752
+ {
753
+ int i;
754
+ /* quick test: try applying just the relevant global rules */
755
+ if (!apply_relevant_rules(pp,apply_contains_one_globally,
756
+ sublinkage,
757
+ pp->knowledge->contains_one_rules,
758
+ pp->relevant_contains_one_rules, msg)) {
759
+ for (i=0; i<pp->pp_data.length; i++)
760
+ pp->pp_data.word_links[i] = NULL;
761
+ pp->pp_data.N_domains = 0;
762
+ return -1;
763
+ }
764
+
765
+ /* build graph; confirm that it's legally connected */
766
+ build_graph(pp, sublinkage);
767
+ build_domains(pp, sublinkage);
768
+ build_domain_forest(pp, sublinkage);
769
+
770
+ #if defined(CHECK_DOMAIN_NESTING)
771
+ /* These messages were deemed to not be useful, so
772
+ this code is commented out. See comment above. */
773
+ if(!check_domain_nesting(pp, sublinkage->num_links))
774
+ printf("WARNING: The domains are not nested.\n");
775
+ #endif
776
+
777
+ /* The order below should be optimal for most cases */
778
+ if (!apply_relevant_rules(pp,apply_contains_one, sublinkage,
779
+ pp->knowledge->contains_one_rules,
780
+ pp->relevant_contains_one_rules, msg)) return 1;
781
+ if (!apply_relevant_rules(pp,apply_contains_none, sublinkage,
782
+ pp->knowledge->contains_none_rules,
783
+ pp->relevant_contains_none_rules, msg)) return 1;
784
+ if (!apply_rules(pp,apply_must_form_a_cycle, sublinkage,
785
+ pp->knowledge->form_a_cycle_rules,msg)) return 1;
786
+ if (!apply_rules(pp,apply_connected, sublinkage,
787
+ pp->knowledge->connected_rules, msg)) return 1;
788
+ if (!apply_rules(pp,apply_bounded, sublinkage,
789
+ pp->knowledge->bounded_rules, msg)) return 1;
790
+ return 0; /* This linkage satisfied all the rules */
791
+ }
792
+
793
+
794
+ /**
795
+ * Call this (a) after having called post_process_scan_linkage() on all
796
+ * generated linkages, but (b) before calling post_process() on any
797
+ * particular linkage. Here we mark all rules which we know (from having
798
+ * accumulated a set of link names appearing in *any* linkage) won't
799
+ * ever be needed.
800
+ */
801
+ static void prune_irrelevant_rules(Postprocessor *pp)
802
+ {
803
+ pp_rule *rule;
804
+ int coIDX, cnIDX, rcoIDX=0, rcnIDX=0;
805
+
806
+ /* If we didn't scan any linkages, there's no pruning to be done. */
807
+ if (pp_linkset_population(pp->set_of_links_of_sentence)==0) return;
808
+
809
+ for (coIDX=0;;coIDX++)
810
+ {
811
+ rule = &(pp->knowledge->contains_one_rules[coIDX]);
812
+ if (rule->msg==NULL) break;
813
+ if (pp_linkset_match_bw(pp->set_of_links_of_sentence, rule->selector))
814
+ {
815
+ /* mark rule as being relevant to this sentence */
816
+ pp->relevant_contains_one_rules[rcoIDX++] = coIDX;
817
+ pp_linkset_add(pp->set_of_links_in_an_active_rule, rule->selector);
818
+ }
819
+ }
820
+ pp->relevant_contains_one_rules[rcoIDX] = -1; /* end sentinel */
821
+
822
+ for (cnIDX=0;;cnIDX++)
823
+ {
824
+ rule = &(pp->knowledge->contains_none_rules[cnIDX]);
825
+ if (rule->msg==NULL) break;
826
+ if (pp_linkset_match_bw(pp->set_of_links_of_sentence, rule->selector))
827
+ {
828
+ pp->relevant_contains_none_rules[rcnIDX++] = cnIDX;
829
+ pp_linkset_add(pp->set_of_links_in_an_active_rule, rule->selector);
830
+ }
831
+ }
832
+ pp->relevant_contains_none_rules[rcnIDX] = -1;
833
+
834
+ if (verbosity > 1)
835
+ {
836
+ printf("Saw %i unique link names in all linkages.\n",
837
+ pp_linkset_population(pp->set_of_links_of_sentence));
838
+ printf("Using %i 'contains one' rules and %i 'contains none' rules\n",
839
+ rcoIDX, rcnIDX);
840
+ }
841
+ }
842
+
843
+
844
+ /***************** definitions of exported functions ***********************/
845
+
846
+ /**
847
+ * read rules from path and initialize the appropriate fields in
848
+ * a postprocessor structure, a pointer to which is returned.
849
+ */
850
+ Postprocessor * post_process_open(const char *path)
851
+ {
852
+ Postprocessor *pp;
853
+ if (path==NULL) return NULL;
854
+
855
+ pp = (Postprocessor *) xalloc (sizeof(Postprocessor));
856
+ pp->knowledge = pp_knowledge_open(path);
857
+ pp->sentence_link_name_set = string_set_create();
858
+ pp->set_of_links_of_sentence = pp_linkset_open(1024);
859
+ pp->set_of_links_in_an_active_rule=pp_linkset_open(1024);
860
+ pp->relevant_contains_one_rules =
861
+ (int *) xalloc ((pp->knowledge->n_contains_one_rules+1)
862
+ *(sizeof pp->relevant_contains_one_rules[0]));
863
+ pp->relevant_contains_none_rules =
864
+ (int *) xalloc ((pp->knowledge->n_contains_none_rules+1)
865
+ *(sizeof pp->relevant_contains_none_rules[0]));
866
+ pp->relevant_contains_one_rules[0] = -1;
867
+ pp->relevant_contains_none_rules[0] = -1;
868
+ pp->pp_node = NULL;
869
+ pp->pp_data.links_to_ignore = NULL;
870
+ pp->n_local_rules_firing = 0;
871
+ pp->n_global_rules_firing = 0;
872
+ return pp;
873
+ }
874
+
875
+ void post_process_close(Postprocessor *pp)
876
+ {
877
+ /* frees up memory associated with pp, previously allocated by open */
878
+ if (pp==NULL) return;
879
+ string_set_delete(pp->sentence_link_name_set);
880
+ pp_linkset_close(pp->set_of_links_of_sentence);
881
+ pp_linkset_close(pp->set_of_links_in_an_active_rule);
882
+ xfree(pp->relevant_contains_one_rules,
883
+ (1+pp->knowledge->n_contains_one_rules)
884
+ *(sizeof pp->relevant_contains_one_rules[0]));
885
+ xfree(pp->relevant_contains_none_rules,
886
+ (1+pp->knowledge->n_contains_none_rules)
887
+ *(sizeof pp->relevant_contains_none_rules[0]));
888
+ pp_knowledge_close(pp->knowledge);
889
+ free_pp_node(pp);
890
+ xfree(pp, sizeof(Postprocessor));
891
+ }
892
+
893
+ void post_process_close_sentence(Postprocessor *pp)
894
+ {
895
+ if (pp==NULL) return;
896
+ pp_linkset_clear(pp->set_of_links_of_sentence);
897
+ pp_linkset_clear(pp->set_of_links_in_an_active_rule);
898
+ string_set_delete(pp->sentence_link_name_set);
899
+ pp->sentence_link_name_set = string_set_create();
900
+ pp->n_local_rules_firing = 0;
901
+ pp->n_global_rules_firing = 0;
902
+ pp->relevant_contains_one_rules[0] = -1;
903
+ pp->relevant_contains_none_rules[0] = -1;
904
+ free_pp_node(pp);
905
+ }
906
+
907
+ /**
908
+ * During a first pass (prior to actual post-processing of the linkages
909
+ * of a sentence), call this once for every generated linkage. Here we
910
+ * simply maintain a set of "seen" link names for rule pruning later on
911
+ */
912
+ void post_process_scan_linkage(Postprocessor *pp, Parse_Options opts,
913
+ Sentence sent, Sublinkage *sublinkage)
914
+ {
915
+ const char *p;
916
+ int i;
917
+ if (pp == NULL) return;
918
+ if (sent->length < opts->twopass_length) return;
919
+ for (i=0; i<sublinkage->num_links; i++)
920
+ {
921
+ if (sublinkage->link[i]->l == -1) continue;
922
+ p = string_set_add(sublinkage->link[i]->name, pp->sentence_link_name_set);
923
+ pp_linkset_add(pp->set_of_links_of_sentence, p);
924
+ }
925
+ }
926
+
927
+ /**
928
+ * Takes a sublinkage and returns:
929
+ * . for each link, the domain structure of that link
930
+ * . a list of the violation strings
931
+ * NB: sublinkage->link[i]->l=-1 means that this connector
932
+ * is to be ignored
933
+ */
934
+ PP_node *post_process(Postprocessor *pp, Parse_Options opts,
935
+ Sentence sent, Sublinkage *sublinkage, int cleanup)
936
+ {
937
+ const char *msg;
938
+
939
+ if (pp==NULL) return NULL;
940
+
941
+ pp->pp_data.links_to_ignore = NULL;
942
+ pp->pp_data.length = sent->length;
943
+
944
+ /* In the name of responsible memory management, we retain a copy of the
945
+ * returned data structure pp_node as a field in pp, so that we can clear
946
+ * it out after every call, without relying on the user to do so. */
947
+ reset_pp_node(pp);
948
+
949
+ /* The first time we see a sentence, prune the rules which we won't be
950
+ * needing during postprocessing the linkages of this sentence */
951
+ if (sent->q_pruned_rules==FALSE && sent->length >= opts->twopass_length)
952
+ prune_irrelevant_rules(pp);
953
+ sent->q_pruned_rules=TRUE;
954
+
955
+ switch(internal_process(pp, sublinkage, &msg))
956
+ {
957
+ case -1:
958
+ /* some global test failed even before we had to build the domains */
959
+ pp->n_global_rules_firing++;
960
+ pp->pp_node->violation = msg;
961
+ return pp->pp_node;
962
+ break;
963
+ case 1:
964
+ /* one of the "normal" post processing tests failed */
965
+ pp->n_local_rules_firing++;
966
+ pp->pp_node->violation = msg;
967
+ break;
968
+ case 0:
969
+ /* This linkage is legal according to the post processing rules */
970
+ pp->pp_node->violation = NULL;
971
+ break;
972
+ }
973
+
974
+ build_type_array(pp);
975
+ if (cleanup)
976
+ {
977
+ post_process_free_data(&pp->pp_data);
978
+ }
979
+ return pp->pp_node;
980
+ }
981
+
982
+ /*
983
+ string comparison in postprocessing. The first parameter is a
984
+ post-processing symbol. The second one is a connector name from a link. The
985
+ upper case parts must match. We imagine that the first arg is padded with an
986
+ infinite sequence of "#" and that the 2nd one is padded with "*". "#"
987
+ matches anything, but "*" is just like an ordinary char for matching
988
+ purposes. For efficiency sake there are several different versions of these
989
+ functions
990
+ */
991
+
992
+ int post_process_match(const char *s, const char *t)
993
+ {
994
+ char c;
995
+ while(isupper((int)*s) || isupper((int)*t))
996
+ {
997
+ if (*s != *t) return FALSE;
998
+ s++;
999
+ t++;
1000
+ }
1001
+ while (*s != '\0')
1002
+ {
1003
+ if (*s != '#')
1004
+ {
1005
+ if (*t == '\0') c = '*'; else c = *t;
1006
+ if (*s != c) return FALSE;
1007
+ }
1008
+ s++;
1009
+ if (*t != '\0') t++;
1010
+ }
1011
+ return TRUE;
1012
+ }
1013
+
1014
+ /* OLD COMMENTS (OUT OF DATE):
1015
+ This file does the post-processing.
1016
+ The main routine is "post_process()". It uses the link names only,
1017
+ and not the connectors.
1018
+
1019
+ A domain is a set of links. Each domain has a defining link.
1020
+ Only certain types of links serve to define a domain. These
1021
+ parameters are set by the lists of link names in a separate,
1022
+ human-readable file referred to herein as the 'knowledge file.'
1023
+
1024
+ The domains are nested: given two domains, either they're disjoint,
1025
+ or one contains the other, i.e. they're tree structured. The set of links
1026
+ in a domain (but in no smaller domain) are called the "group" of the
1027
+ domain. Data structures are built to store all this stuff.
1028
+ The tree structured property is not mathematically guaranteed by
1029
+ the domain construction algorithm. Davy simply claims that because
1030
+ of how he built the dictionary, the domains will always be so
1031
+ structured. The program checks this and gives an error message
1032
+ if it's violated.
1033
+
1034
+ Define the "root word" of a link (or domain) to be the word at the
1035
+ left end of the link. The other end of the defining link is called
1036
+ the "right word".
1037
+
1038
+ The domain corresponding to a link is defined to be the set of links
1039
+ reachable by starting from the right word, following links and never
1040
+ using the root word or any word to its left.
1041
+
1042
+ There are some minor exceptions to this. The "restricted_link" lists
1043
+ those connectors that, even if they point back before the root word,
1044
+ are included in the domain. Some of the starting links are included
1045
+ in their domain, these are listed in the "domain_contains_links" list.
1046
+
1047
+ Such was the way it was. Now Davy tells me there should be another type
1048
+ of domain that's quite different. Let's call these "urfl" domains.
1049
+ Certain type of connectors start urfl domains. They're listed below.
1050
+ In a urfl domain, the search includes the root word. It does a separate
1051
+ search to find urfl domains.
1052
+
1053
+ Restricted links should work just as they do with ordinary domains. If they
1054
+ come out of the right word, or anything to the right of it (that's
1055
+ in the domain), they should be included but should not be traced
1056
+ further. If they come out of the root word, they should not be
1057
+ included.
1058
+ */
1059
+
1060
+ /*
1061
+ I also, unfortunately, want to propose a new type of domain. These
1062
+ would include everything that can be reached from the root word of the
1063
+ link, to the right, that is closer than the right word of the link.
1064
+ (They would not include the link itself.)
1065
+
1066
+ In the following sentence, then, the "Urfl_Only Domain" of the G link
1067
+ would include only the "O" link:
1068
+
1069
+ +-----G----+
1070
+ +---O--+ +-AI+
1071
+ | | | |
1072
+ hitting dogs is fun.a
1073
+
1074
+ In the following sentence it would include the "O", the "TT", the "I",
1075
+ the second "O", and the "A".
1076
+
1077
+ +----------------G---------------+
1078
+ +-----TT-----+ +-----O-----+ |
1079
+ +---O---+ +-I+ +---A--+ +-AI+
1080
+ | | | | | | | |
1081
+ telling people to do stupid things is fun.a
1082
+
1083
+ This would allow us to judge the following:
1084
+
1085
+ kicking dogs bores me
1086
+ *kicking dogs kicks dogs
1087
+ explaining the program is easy
1088
+ *explaining the program is running
1089
+
1090
+ (These are distinctions that I thought we would never be able to make,
1091
+ so I told myself they were semantic rather than syntactic. But with
1092
+ domains, they should be easy.)
1093
+ */
1094
+
1095
+ /* Modifications, 6/96 ALB:
1096
+ 1) Rules and link sets are relegated to a separate, user-written
1097
+ file(s), herein referred to as the 'knowledge file'
1098
+ 2) This information is read by a lexer, in pp_lexer.l (lex code)
1099
+ whose exported routines are all prefixed by 'pp_lexer'
1100
+ 3) when postprocessing a sentence, the links of each domain are
1101
+ placed in a set for quick lookup, ('contains one' and 'contains none')
1102
+ 4) Functions which were never called have been eliminated:
1103
+ link_inhabits(), match_in_list(), group_type_contains(),
1104
+ group_type_contains_one(), group_type_contains_all()
1105
+ 5) Some 'one-by-one' initializations have been replaced by faster
1106
+ block memory operations (memset etc.)
1107
+ 6) The above comments are correct but incomplete! (1/97)
1108
+ 7) observation: the 'contains one' is, empirically, by far the most
1109
+ violated rule, so it should come first in applying the rules.
1110
+
1111
+ Modifications, 9/97 ALB:
1112
+ Deglobalization. Made code constistent with api.
1113
+ */