grammar_police 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (345) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +2 -0
  5. data/c/.DS_Store +0 -0
  6. data/c/link-grammar.c +65 -0
  7. data/c/link-grammar.h +60 -0
  8. data/c/link-grammar.o +0 -0
  9. data/c/link-grammar.so +0 -0
  10. data/c/link-grammar/.DS_Store +0 -0
  11. data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
  12. data/c/link-grammar/.deps/and.Plo +202 -0
  13. data/c/link-grammar/.deps/api.Plo +244 -0
  14. data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
  15. data/c/link-grammar/.deps/command-line.Plo +201 -0
  16. data/c/link-grammar/.deps/constituents.Plo +201 -0
  17. data/c/link-grammar/.deps/count.Plo +202 -0
  18. data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
  19. data/c/link-grammar/.deps/disjuncts.Plo +123 -0
  20. data/c/link-grammar/.deps/error.Plo +121 -0
  21. data/c/link-grammar/.deps/expand.Plo +133 -0
  22. data/c/link-grammar/.deps/extract-links.Plo +198 -0
  23. data/c/link-grammar/.deps/fast-match.Plo +200 -0
  24. data/c/link-grammar/.deps/idiom.Plo +200 -0
  25. data/c/link-grammar/.deps/jni-client.Plo +217 -0
  26. data/c/link-grammar/.deps/link-parser.Po +1 -0
  27. data/c/link-grammar/.deps/massage.Plo +202 -0
  28. data/c/link-grammar/.deps/post-process.Plo +202 -0
  29. data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
  30. data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
  31. data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
  32. data/c/link-grammar/.deps/prefix.Plo +102 -0
  33. data/c/link-grammar/.deps/preparation.Plo +202 -0
  34. data/c/link-grammar/.deps/print-util.Plo +200 -0
  35. data/c/link-grammar/.deps/print.Plo +201 -0
  36. data/c/link-grammar/.deps/prune.Plo +202 -0
  37. data/c/link-grammar/.deps/read-dict.Plo +223 -0
  38. data/c/link-grammar/.deps/read-regex.Plo +123 -0
  39. data/c/link-grammar/.deps/regex-morph.Plo +131 -0
  40. data/c/link-grammar/.deps/resources.Plo +203 -0
  41. data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  42. data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  43. data/c/link-grammar/.deps/string-set.Plo +198 -0
  44. data/c/link-grammar/.deps/tokenize.Plo +160 -0
  45. data/c/link-grammar/.deps/utilities.Plo +222 -0
  46. data/c/link-grammar/.deps/word-file.Plo +201 -0
  47. data/c/link-grammar/.deps/word-utils.Plo +212 -0
  48. data/c/link-grammar/.libs/analyze-linkage.o +0 -0
  49. data/c/link-grammar/.libs/and.o +0 -0
  50. data/c/link-grammar/.libs/api.o +0 -0
  51. data/c/link-grammar/.libs/build-disjuncts.o +0 -0
  52. data/c/link-grammar/.libs/command-line.o +0 -0
  53. data/c/link-grammar/.libs/constituents.o +0 -0
  54. data/c/link-grammar/.libs/count.o +0 -0
  55. data/c/link-grammar/.libs/disjunct-utils.o +0 -0
  56. data/c/link-grammar/.libs/disjuncts.o +0 -0
  57. data/c/link-grammar/.libs/error.o +0 -0
  58. data/c/link-grammar/.libs/expand.o +0 -0
  59. data/c/link-grammar/.libs/extract-links.o +0 -0
  60. data/c/link-grammar/.libs/fast-match.o +0 -0
  61. data/c/link-grammar/.libs/idiom.o +0 -0
  62. data/c/link-grammar/.libs/jni-client.o +0 -0
  63. data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  64. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  65. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  66. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  67. data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
  68. data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  69. data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  70. data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  71. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  72. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  73. data/c/link-grammar/.libs/liblink-grammar.a +0 -0
  74. data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
  75. data/c/link-grammar/.libs/liblink-grammar.la +41 -0
  76. data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
  77. data/c/link-grammar/.libs/massage.o +0 -0
  78. data/c/link-grammar/.libs/post-process.o +0 -0
  79. data/c/link-grammar/.libs/pp_knowledge.o +0 -0
  80. data/c/link-grammar/.libs/pp_lexer.o +0 -0
  81. data/c/link-grammar/.libs/pp_linkset.o +0 -0
  82. data/c/link-grammar/.libs/prefix.o +0 -0
  83. data/c/link-grammar/.libs/preparation.o +0 -0
  84. data/c/link-grammar/.libs/print-util.o +0 -0
  85. data/c/link-grammar/.libs/print.o +0 -0
  86. data/c/link-grammar/.libs/prune.o +0 -0
  87. data/c/link-grammar/.libs/read-dict.o +0 -0
  88. data/c/link-grammar/.libs/read-regex.o +0 -0
  89. data/c/link-grammar/.libs/regex-morph.o +0 -0
  90. data/c/link-grammar/.libs/resources.o +0 -0
  91. data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
  92. data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
  93. data/c/link-grammar/.libs/string-set.o +0 -0
  94. data/c/link-grammar/.libs/tokenize.o +0 -0
  95. data/c/link-grammar/.libs/utilities.o +0 -0
  96. data/c/link-grammar/.libs/word-file.o +0 -0
  97. data/c/link-grammar/.libs/word-utils.o +0 -0
  98. data/c/link-grammar/Makefile +900 -0
  99. data/c/link-grammar/Makefile.am +202 -0
  100. data/c/link-grammar/Makefile.in +900 -0
  101. data/c/link-grammar/analyze-linkage.c +1317 -0
  102. data/c/link-grammar/analyze-linkage.h +24 -0
  103. data/c/link-grammar/and.c +1603 -0
  104. data/c/link-grammar/and.h +27 -0
  105. data/c/link-grammar/api-structures.h +362 -0
  106. data/c/link-grammar/api-types.h +72 -0
  107. data/c/link-grammar/api.c +1887 -0
  108. data/c/link-grammar/api.h +96 -0
  109. data/c/link-grammar/autoit/.DS_Store +0 -0
  110. data/c/link-grammar/autoit/README +10 -0
  111. data/c/link-grammar/autoit/_LGTest.au3 +22 -0
  112. data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  113. data/c/link-grammar/build-disjuncts.c +487 -0
  114. data/c/link-grammar/build-disjuncts.h +21 -0
  115. data/c/link-grammar/command-line.c +458 -0
  116. data/c/link-grammar/command-line.h +15 -0
  117. data/c/link-grammar/constituents.c +1836 -0
  118. data/c/link-grammar/constituents.h +26 -0
  119. data/c/link-grammar/corpus/.DS_Store +0 -0
  120. data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
  121. data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
  122. data/c/link-grammar/corpus/Makefile +527 -0
  123. data/c/link-grammar/corpus/Makefile.am +46 -0
  124. data/c/link-grammar/corpus/Makefile.in +527 -0
  125. data/c/link-grammar/corpus/README +17 -0
  126. data/c/link-grammar/corpus/cluster.c +286 -0
  127. data/c/link-grammar/corpus/cluster.h +32 -0
  128. data/c/link-grammar/corpus/corpus.c +483 -0
  129. data/c/link-grammar/corpus/corpus.h +46 -0
  130. data/c/link-grammar/count.c +828 -0
  131. data/c/link-grammar/count.h +25 -0
  132. data/c/link-grammar/disjunct-utils.c +261 -0
  133. data/c/link-grammar/disjunct-utils.h +27 -0
  134. data/c/link-grammar/disjuncts.c +138 -0
  135. data/c/link-grammar/disjuncts.h +13 -0
  136. data/c/link-grammar/error.c +92 -0
  137. data/c/link-grammar/error.h +35 -0
  138. data/c/link-grammar/expand.c +67 -0
  139. data/c/link-grammar/expand.h +13 -0
  140. data/c/link-grammar/externs.h +22 -0
  141. data/c/link-grammar/extract-links.c +625 -0
  142. data/c/link-grammar/extract-links.h +16 -0
  143. data/c/link-grammar/fast-match.c +309 -0
  144. data/c/link-grammar/fast-match.h +17 -0
  145. data/c/link-grammar/idiom.c +373 -0
  146. data/c/link-grammar/idiom.h +15 -0
  147. data/c/link-grammar/jni-client.c +779 -0
  148. data/c/link-grammar/jni-client.h +236 -0
  149. data/c/link-grammar/liblink-grammar-java.la +42 -0
  150. data/c/link-grammar/liblink-grammar.la +41 -0
  151. data/c/link-grammar/link-features.h +37 -0
  152. data/c/link-grammar/link-features.h.in +37 -0
  153. data/c/link-grammar/link-grammar-java.def +31 -0
  154. data/c/link-grammar/link-grammar.def +194 -0
  155. data/c/link-grammar/link-includes.h +465 -0
  156. data/c/link-grammar/link-parser.c +849 -0
  157. data/c/link-grammar/massage.c +329 -0
  158. data/c/link-grammar/massage.h +13 -0
  159. data/c/link-grammar/post-process.c +1113 -0
  160. data/c/link-grammar/post-process.h +45 -0
  161. data/c/link-grammar/pp_knowledge.c +376 -0
  162. data/c/link-grammar/pp_knowledge.h +14 -0
  163. data/c/link-grammar/pp_lexer.c +1920 -0
  164. data/c/link-grammar/pp_lexer.h +19 -0
  165. data/c/link-grammar/pp_linkset.c +158 -0
  166. data/c/link-grammar/pp_linkset.h +20 -0
  167. data/c/link-grammar/prefix.c +482 -0
  168. data/c/link-grammar/prefix.h +139 -0
  169. data/c/link-grammar/preparation.c +412 -0
  170. data/c/link-grammar/preparation.h +20 -0
  171. data/c/link-grammar/print-util.c +87 -0
  172. data/c/link-grammar/print-util.h +32 -0
  173. data/c/link-grammar/print.c +1085 -0
  174. data/c/link-grammar/print.h +16 -0
  175. data/c/link-grammar/prune.c +1864 -0
  176. data/c/link-grammar/prune.h +17 -0
  177. data/c/link-grammar/read-dict.c +1785 -0
  178. data/c/link-grammar/read-dict.h +29 -0
  179. data/c/link-grammar/read-regex.c +161 -0
  180. data/c/link-grammar/read-regex.h +12 -0
  181. data/c/link-grammar/regex-morph.c +126 -0
  182. data/c/link-grammar/regex-morph.h +17 -0
  183. data/c/link-grammar/resources.c +180 -0
  184. data/c/link-grammar/resources.h +23 -0
  185. data/c/link-grammar/sat-solver/.DS_Store +0 -0
  186. data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  187. data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  188. data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
  189. data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  190. data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  191. data/c/link-grammar/sat-solver/Makefile +527 -0
  192. data/c/link-grammar/sat-solver/Makefile.am +29 -0
  193. data/c/link-grammar/sat-solver/Makefile.in +527 -0
  194. data/c/link-grammar/sat-solver/clock.hpp +33 -0
  195. data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  196. data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  197. data/c/link-grammar/sat-solver/guiding.hpp +244 -0
  198. data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  199. data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  200. data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
  201. data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  202. data/c/link-grammar/sat-solver/trie.hpp +118 -0
  203. data/c/link-grammar/sat-solver/util.cpp +23 -0
  204. data/c/link-grammar/sat-solver/util.hpp +14 -0
  205. data/c/link-grammar/sat-solver/variables.cpp +5 -0
  206. data/c/link-grammar/sat-solver/variables.hpp +829 -0
  207. data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
  208. data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
  209. data/c/link-grammar/spellcheck-aspell.c +148 -0
  210. data/c/link-grammar/spellcheck-hun.c +136 -0
  211. data/c/link-grammar/spellcheck.h +34 -0
  212. data/c/link-grammar/string-set.c +169 -0
  213. data/c/link-grammar/string-set.h +16 -0
  214. data/c/link-grammar/structures.h +498 -0
  215. data/c/link-grammar/tokenize.c +1049 -0
  216. data/c/link-grammar/tokenize.h +15 -0
  217. data/c/link-grammar/utilities.c +847 -0
  218. data/c/link-grammar/utilities.h +281 -0
  219. data/c/link-grammar/word-file.c +124 -0
  220. data/c/link-grammar/word-file.h +15 -0
  221. data/c/link-grammar/word-utils.c +526 -0
  222. data/c/link-grammar/word-utils.h +152 -0
  223. data/data/.DS_Store +0 -0
  224. data/data/Makefile +511 -0
  225. data/data/Makefile.am +4 -0
  226. data/data/Makefile.in +511 -0
  227. data/data/de/.DS_Store +0 -0
  228. data/data/de/4.0.affix +7 -0
  229. data/data/de/4.0.dict +474 -0
  230. data/data/de/Makefile +387 -0
  231. data/data/de/Makefile.am +9 -0
  232. data/data/de/Makefile.in +387 -0
  233. data/data/en/.DS_Store +0 -0
  234. data/data/en/4.0.affix +26 -0
  235. data/data/en/4.0.batch +1002 -0
  236. data/data/en/4.0.biolg.batch +411 -0
  237. data/data/en/4.0.constituent-knowledge +127 -0
  238. data/data/en/4.0.dict +8759 -0
  239. data/data/en/4.0.dict.m4 +6928 -0
  240. data/data/en/4.0.enwiki.batch +14 -0
  241. data/data/en/4.0.fixes.batch +2776 -0
  242. data/data/en/4.0.knowledge +306 -0
  243. data/data/en/4.0.regex +225 -0
  244. data/data/en/4.0.voa.batch +114 -0
  245. data/data/en/Makefile +554 -0
  246. data/data/en/Makefile.am +19 -0
  247. data/data/en/Makefile.in +554 -0
  248. data/data/en/README +173 -0
  249. data/data/en/tiny.dict +157 -0
  250. data/data/en/words/.DS_Store +0 -0
  251. data/data/en/words/Makefile +456 -0
  252. data/data/en/words/Makefile.am +78 -0
  253. data/data/en/words/Makefile.in +456 -0
  254. data/data/en/words/currency +205 -0
  255. data/data/en/words/currency.p +28 -0
  256. data/data/en/words/entities.given-bisex.sing +39 -0
  257. data/data/en/words/entities.given-female.sing +4141 -0
  258. data/data/en/words/entities.given-male.sing +1633 -0
  259. data/data/en/words/entities.locations.sing +68 -0
  260. data/data/en/words/entities.national.sing +253 -0
  261. data/data/en/words/entities.organizations.sing +7 -0
  262. data/data/en/words/entities.us-states.sing +11 -0
  263. data/data/en/words/units.1 +45 -0
  264. data/data/en/words/units.1.dot +4 -0
  265. data/data/en/words/units.3 +2 -0
  266. data/data/en/words/units.4 +5 -0
  267. data/data/en/words/units.4.dot +1 -0
  268. data/data/en/words/words-medical.adv.1 +1191 -0
  269. data/data/en/words/words-medical.prep.1 +67 -0
  270. data/data/en/words/words-medical.v.4.1 +2835 -0
  271. data/data/en/words/words-medical.v.4.2 +2848 -0
  272. data/data/en/words/words-medical.v.4.3 +3011 -0
  273. data/data/en/words/words-medical.v.4.4 +3036 -0
  274. data/data/en/words/words-medical.v.4.5 +3050 -0
  275. data/data/en/words/words.adj.1 +6794 -0
  276. data/data/en/words/words.adj.2 +638 -0
  277. data/data/en/words/words.adj.3 +667 -0
  278. data/data/en/words/words.adv.1 +1573 -0
  279. data/data/en/words/words.adv.2 +67 -0
  280. data/data/en/words/words.adv.3 +157 -0
  281. data/data/en/words/words.adv.4 +80 -0
  282. data/data/en/words/words.n.1 +11464 -0
  283. data/data/en/words/words.n.1.wiki +264 -0
  284. data/data/en/words/words.n.2.s +2017 -0
  285. data/data/en/words/words.n.2.s.biolg +1 -0
  286. data/data/en/words/words.n.2.s.wiki +298 -0
  287. data/data/en/words/words.n.2.x +65 -0
  288. data/data/en/words/words.n.2.x.wiki +10 -0
  289. data/data/en/words/words.n.3 +5717 -0
  290. data/data/en/words/words.n.t +23 -0
  291. data/data/en/words/words.v.1.1 +1038 -0
  292. data/data/en/words/words.v.1.2 +1043 -0
  293. data/data/en/words/words.v.1.3 +1052 -0
  294. data/data/en/words/words.v.1.4 +1023 -0
  295. data/data/en/words/words.v.1.p +17 -0
  296. data/data/en/words/words.v.10.1 +14 -0
  297. data/data/en/words/words.v.10.2 +15 -0
  298. data/data/en/words/words.v.10.3 +88 -0
  299. data/data/en/words/words.v.10.4 +17 -0
  300. data/data/en/words/words.v.2.1 +1253 -0
  301. data/data/en/words/words.v.2.2 +1304 -0
  302. data/data/en/words/words.v.2.3 +1280 -0
  303. data/data/en/words/words.v.2.4 +1285 -0
  304. data/data/en/words/words.v.2.5 +1287 -0
  305. data/data/en/words/words.v.4.1 +2472 -0
  306. data/data/en/words/words.v.4.2 +2487 -0
  307. data/data/en/words/words.v.4.3 +2441 -0
  308. data/data/en/words/words.v.4.4 +2478 -0
  309. data/data/en/words/words.v.4.5 +2483 -0
  310. data/data/en/words/words.v.5.1 +98 -0
  311. data/data/en/words/words.v.5.2 +98 -0
  312. data/data/en/words/words.v.5.3 +103 -0
  313. data/data/en/words/words.v.5.4 +102 -0
  314. data/data/en/words/words.v.6.1 +388 -0
  315. data/data/en/words/words.v.6.2 +401 -0
  316. data/data/en/words/words.v.6.3 +397 -0
  317. data/data/en/words/words.v.6.4 +405 -0
  318. data/data/en/words/words.v.6.5 +401 -0
  319. data/data/en/words/words.v.8.1 +117 -0
  320. data/data/en/words/words.v.8.2 +118 -0
  321. data/data/en/words/words.v.8.3 +118 -0
  322. data/data/en/words/words.v.8.4 +119 -0
  323. data/data/en/words/words.v.8.5 +119 -0
  324. data/data/en/words/words.y +104 -0
  325. data/data/lt/.DS_Store +0 -0
  326. data/data/lt/4.0.affix +6 -0
  327. data/data/lt/4.0.constituent-knowledge +24 -0
  328. data/data/lt/4.0.dict +135 -0
  329. data/data/lt/4.0.knowledge +38 -0
  330. data/data/lt/Makefile +389 -0
  331. data/data/lt/Makefile.am +11 -0
  332. data/data/lt/Makefile.in +389 -0
  333. data/grammar_police.gemspec +23 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_police.rb +11 -0
  336. data/lib/grammar_police/.DS_Store +0 -0
  337. data/lib/grammar_police/dictionary.rb +30 -0
  338. data/lib/grammar_police/linkage.rb +26 -0
  339. data/lib/grammar_police/parse_options.rb +32 -0
  340. data/lib/grammar_police/sentence.rb +44 -0
  341. data/lib/grammar_police/version.rb +3 -0
  342. data/tests/.DS_Store +0 -0
  343. data/tests/count_linkages.rb +29 -0
  344. data/tests/sentences.txt +86 -0
  345. metadata +408 -0
@@ -0,0 +1,1317 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+
15
+ #include <stdarg.h>
16
+ #include "api.h"
17
+
18
+ /**
19
+ * The functions in this file do several things: () take a linkage
20
+ * involving fat links and expand it into a sequence of linkages
21
+ * (involving a subset of the given words), one for each way of
22
+ * eliminating the conjunctions. () determine if a linkage involving
23
+ * fat links has a structural violation. () make sure each of the expanded
24
+ * linkages has a consistent post-processing behavior. () compute the
25
+ * cost of the linkage.
26
+ */
27
+
28
+ typedef struct patch_element_struct Patch_element;
29
+ struct patch_element_struct
30
+ {
31
+ char used; /* TRUE if this link is used, else FALSE */
32
+ char changed;/* TRUE if this link changed, else FALSE */
33
+ int newl; /* the new value of the left end */
34
+ int newr; /* the new value of the right end */
35
+ };
36
+
37
+ struct analyze_context_s
38
+ {
39
+ List_o_links *word_links[MAX_SENTENCE]; /* ptr to l.o.l. out of word */
40
+ int structure_violation;
41
+
42
+ int dfs_root_word[MAX_SENTENCE]; /* for the depth-first search */
43
+ int dfs_height[MAX_SENTENCE]; /* to determine the order to do the root word dfs */
44
+ int height_perm[MAX_SENTENCE]; /* permute the vertices from highest to lowest */
45
+
46
+ /* The following are all for computing the cost of and lists */
47
+ int visited[MAX_SENTENCE];
48
+ int and_element_sizes[MAX_SENTENCE];
49
+ int and_element[MAX_SENTENCE];
50
+ int N_and_elements;
51
+ int outside_word[MAX_SENTENCE];
52
+ int N_outside_words;
53
+ Patch_element patch_array[MAX_LINKS];
54
+ };
55
+
56
+ typedef struct CON_node_struct CON_node;
57
+ typedef struct CON_list_struct CON_list;
58
+ typedef struct DIS_list_struct DIS_list;
59
+ typedef struct Links_to_patch_struct Links_to_patch;
60
+
61
+ struct DIS_node_struct
62
+ {
63
+ CON_list * cl; /* the list of children */
64
+ List_o_links * lol;/* the links that comprise this region of the graph */
65
+ int word; /* the word defining this node */
66
+ };
67
+
68
+ struct CON_node_struct
69
+ {
70
+ DIS_list * dl; /* the list of children */
71
+ DIS_list * current;/* defines the current child */
72
+ int word; /* the word defining this node */
73
+ };
74
+
75
+ struct DIS_list_struct
76
+ {
77
+ DIS_list * next;
78
+ DIS_node * dn;
79
+ };
80
+
81
+ struct CON_list_struct
82
+ {
83
+ CON_list * next;
84
+ CON_node * cn;
85
+ };
86
+
87
+ struct Links_to_patch_struct
88
+ {
89
+ Links_to_patch * next;
90
+ int link;
91
+ char dir; /* this is 'r' or 'l' depending on which end of the link
92
+ is to be patched. */
93
+ };
94
+
95
+ void zero_sublinkage(Sublinkage *s)
96
+ {
97
+ int i;
98
+ s->pp_info = NULL;
99
+ s->violation = NULL;
100
+ for (i=0; i<s->num_links; i++) s->link[i] = NULL;
101
+
102
+ memset(&s->pp_data, 0, sizeof(PP_data));
103
+ }
104
+
105
+ static Sublinkage * x_create_sublinkage(Parse_info pi)
106
+ {
107
+ Sublinkage *s = (Sublinkage *) xalloc (sizeof(Sublinkage));
108
+ s->link = (Link **) xalloc(MAX_LINKS*sizeof(Link *));
109
+ s->num_links = MAX_LINKS;
110
+
111
+ zero_sublinkage(s);
112
+
113
+ s->num_links = pi->N_links;
114
+ assert(pi->N_links < MAX_LINKS, "Too many links");
115
+ return s;
116
+ }
117
+
118
+ static Sublinkage * ex_create_sublinkage(Parse_info pi)
119
+ {
120
+ Sublinkage *s = (Sublinkage *) exalloc (sizeof(Sublinkage));
121
+ s->link = (Link **) exalloc(pi->N_links*sizeof(Link *));
122
+ s->num_links = pi->N_links;
123
+
124
+ zero_sublinkage(s);
125
+
126
+ assert(pi->N_links < MAX_LINKS, "Too many links");
127
+ return s;
128
+ }
129
+
130
+ static void free_sublinkage(Sublinkage *s)
131
+ {
132
+ int i;
133
+ for (i=0; i<MAX_LINKS; i++) {
134
+ if (s->link[i]!=NULL) exfree_link(s->link[i]);
135
+ }
136
+ xfree(s->link, MAX_LINKS*sizeof(Link));
137
+ xfree(s, sizeof(Sublinkage));
138
+ }
139
+
140
+ static void replace_link_name(Link *l, const char *s)
141
+ {
142
+ /* XXX can get some perf improvement by avoiding strlen! */
143
+ char * t;
144
+ exfree((char *) l->name, sizeof(char)*(strlen(l->name)+1));
145
+ t = (char *) exalloc(sizeof(char)*(strlen(s)+1));
146
+ strcpy(t, s);
147
+ l->name = t;
148
+ }
149
+
150
+ static void copy_full_link(Link **dest, Link *src)
151
+ {
152
+ if (*dest != NULL) exfree_link(*dest);
153
+ *dest = excopy_link(src);
154
+ }
155
+
156
+ /* end new code 9/97 ALB */
157
+
158
+
159
+ /**
160
+ * Constructs a graph in the wordlinks array based on the contents of
161
+ * the global link_array. Makes the wordlinks array point to a list of
162
+ * words neighboring each word (actually a list of links). This is a
163
+ * directed graph, constructed for dealing with "and". For a link in
164
+ * which the priorities are UP or DOWN_priority, the edge goes from the
165
+ * one labeled DOWN to the one labeled UP.
166
+ * Don't generate links edges for the bogus comma connectors.
167
+ */
168
+ static void build_digraph(analyze_context_t *actx, Parse_info pi)
169
+ {
170
+ int i, link, N_fat;
171
+ Link *lp;
172
+ List_o_links * lol;
173
+ N_fat = 0;
174
+
175
+ for (i = 0; i < pi->N_words; i++)
176
+ {
177
+ actx->word_links[i] = NULL;
178
+ }
179
+
180
+ for (link = 0; link < pi->N_links; link++)
181
+ {
182
+ lp = &(pi->link_array[link]);
183
+ i = lp->lc->label;
184
+ if (i < NORMAL_LABEL) { /* one of those special links for either-or, etc */
185
+ continue;
186
+ }
187
+
188
+ lol = (List_o_links *) xalloc(sizeof(List_o_links));
189
+ lol->next = actx->word_links[lp->l];
190
+ actx->word_links[lp->l] = lol;
191
+ lol->link = link;
192
+ lol->word = lp->r;
193
+ i = lp->lc->priority;
194
+ if (i == THIN_priority) {
195
+ lol->dir = 0;
196
+ } else if (i == DOWN_priority) {
197
+ lol->dir = 1;
198
+ } else {
199
+ lol->dir = -1;
200
+ }
201
+
202
+ lol = (List_o_links *) xalloc(sizeof(List_o_links));
203
+ lol->next = actx->word_links[lp->r];
204
+ actx->word_links[lp->r] = lol;
205
+ lol->link = link;
206
+ lol->word = lp->l;
207
+ i = lp->rc->priority;
208
+ if (i == THIN_priority) {
209
+ lol->dir = 0;
210
+ } else if (i == DOWN_priority) {
211
+ lol->dir = 1;
212
+ } else {
213
+ lol->dir = -1;
214
+ }
215
+ }
216
+ }
217
+
218
+ /**
219
+ * Returns TRUE if there is at least one fat link pointing out of this word.
220
+ */
221
+ static int is_CON_word(int w, List_o_links **wordlinks)
222
+ {
223
+ List_o_links * lol;
224
+ for (lol = wordlinks[w]; lol != NULL; lol = lol->next)
225
+ {
226
+ if (lol->dir == 1) return TRUE;
227
+ }
228
+ return FALSE;
229
+ }
230
+
231
+ static DIS_node * build_DIS_node(analyze_context_t*, int);
232
+
233
+ /**
234
+ * This word is a CON word (has fat links down). Build the tree for it.
235
+ */
236
+ static CON_node * build_CON_node(analyze_context_t *actx, int w)
237
+ {
238
+ List_o_links * lol;
239
+ CON_node * a;
240
+ DIS_list * d, *dx;
241
+ d = NULL;
242
+ for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
243
+ {
244
+ if (lol->dir == 1)
245
+ {
246
+ dx = (DIS_list *) xalloc (sizeof (DIS_list));
247
+ dx->next = d;
248
+ d = dx;
249
+ d->dn = build_DIS_node(actx, lol->word);
250
+ }
251
+ }
252
+ a = (CON_node *) xalloc(sizeof (CON_node));
253
+ a->dl = a->current = d;
254
+ a->word = w;
255
+ return a;
256
+ }
257
+
258
+ /**
259
+ * Does a depth-first-search starting from w. Puts on the front of the
260
+ * list pointed to by c all of the CON nodes it finds, and returns the
261
+ * result. Also construct the list of all edges reached as part of this
262
+ * DIS_node search and append it to the lol list of start_dn.
263
+ *
264
+ * Both of the structure violations actually occur, and represent
265
+ * linkages that have improper structure. Fortunately, they
266
+ * seem to be rather rare.
267
+ */
268
+ static CON_list * c_dfs(analyze_context_t *actx,
269
+ int w, DIS_node * start_dn, CON_list * c)
270
+ {
271
+ CON_list *cx;
272
+ List_o_links * lol, *lolx;
273
+ if (actx->dfs_root_word[w] != -1)
274
+ {
275
+ if (actx->dfs_root_word[w] != start_dn->word)
276
+ {
277
+ actx->structure_violation = TRUE;
278
+ }
279
+ return c;
280
+ }
281
+ actx->dfs_root_word[w] = start_dn->word;
282
+ for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
283
+ {
284
+ if (lol->dir < 0) /* a backwards link */
285
+ {
286
+ if (actx->dfs_root_word[lol->word] == -1)
287
+ {
288
+ actx->structure_violation = TRUE;
289
+ }
290
+ }
291
+ else if (lol->dir == 0)
292
+ {
293
+ /* Make a copy of the link */
294
+ lolx = (List_o_links *) xalloc(sizeof(List_o_links));
295
+ lolx->word = lol->word;
296
+ lolx->dir = lol->dir;
297
+ lolx->link = lol->link;
298
+
299
+ /* Chain it into place */
300
+ lolx->next = start_dn->lol;
301
+ start_dn->lol = lolx;
302
+ c = c_dfs(actx, lol->word, start_dn, c);
303
+ }
304
+ }
305
+
306
+ /* if the current node is CON, put it first */
307
+ if (is_CON_word(w, actx->word_links))
308
+ {
309
+ cx = (CON_list *) xalloc(sizeof(CON_list));
310
+ cx->next = c;
311
+ c = cx;
312
+ c->cn = build_CON_node(actx, w);
313
+ }
314
+ return c;
315
+ }
316
+
317
+ /**
318
+ * This node is connected to its parent via a fat link. Search the
319
+ * region reachable via thin links, and put all reachable nodes with fat
320
+ * links out of them in its list of children.
321
+ */
322
+ static DIS_node * build_DIS_node(analyze_context_t *actx,
323
+ int w)
324
+ {
325
+ DIS_node * dn;
326
+ dn = (DIS_node *) xalloc(sizeof (DIS_node));
327
+ dn->word = w; /* must do this before dfs so it knows the start word */
328
+ dn->lol = NULL;
329
+ dn->cl = c_dfs(actx, w, dn, NULL);
330
+ return dn;
331
+ }
332
+
333
+ static void height_dfs(analyze_context_t *actx, int w, int height)
334
+ {
335
+ List_o_links * lol;
336
+ if (actx->dfs_height[w] != 0) return;
337
+
338
+ actx->dfs_height[w] = height;
339
+
340
+ for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
341
+ {
342
+ /* The dir is 1 for a down link. */
343
+ height_dfs(actx, lol->word, height - lol->dir);
344
+ }
345
+ }
346
+
347
+ /**
348
+ * Simple insertion sort; should be plenty fast enough, since sentences
349
+ * are almost always shorter than 30 words or so. In fact, this is
350
+ * almost surely faster than qsort for such small arrays.
351
+ */
352
+ static void insort (analyze_context_t *actx, int nwords)
353
+ {
354
+ int i, j;
355
+ for (i=1; i<nwords; i++)
356
+ {
357
+ int heig = actx->dfs_height[i];
358
+ int perm = actx->height_perm[i];
359
+ j = i;
360
+ while (j>0 && (heig > actx->dfs_height[j-1]))
361
+ {
362
+ actx->dfs_height[j] = actx->dfs_height[j-1];
363
+ actx->height_perm[j] = actx->height_perm[j-1];
364
+ j--;
365
+ }
366
+ actx->dfs_height[j] = heig;
367
+ actx->height_perm[j] = perm;
368
+ }
369
+ }
370
+
371
+ static DIS_node * build_DIS_CON_tree(analyze_context_t *actx, Parse_info pi)
372
+ {
373
+ int xw, w;
374
+ DIS_node * dnroot, * dn;
375
+ CON_list * child, * xchild;
376
+ List_o_links * lol, * xlol;
377
+
378
+ /* The algorithm used here to build the DIS_CON tree depends on
379
+ * the search percolating down from the "top" of the tree. The
380
+ * original version of this started its search at the wall. This
381
+ * was fine because doing a DFS from the wall explores the tree in
382
+ * the right order.
383
+ *
384
+ * However, in order to handle null links correctly, a more careful
385
+ * ordering process must be used to explore the tree. We use
386
+ * dfs_height[] for this, and sort in height order.
387
+ *
388
+ * XXX Is the sort order correct here? This is not obvious; I think
389
+ * we want highest to lowest ... XXX is the height being calculated
390
+ * correctly? Looks weird to me ... XXX
391
+ */
392
+
393
+ for (w=0; w < pi->N_words; w++) actx->dfs_height[w] = 0;
394
+ for (w=0; w < pi->N_words; w++) height_dfs(actx, w, MAX_SENTENCE);
395
+
396
+ for (w=0; w < pi->N_words; w++) actx->height_perm[w] = w;
397
+
398
+ /* Sort the heights, keeping only the permuted order. */
399
+ insort (actx, pi->N_words);
400
+
401
+ for (w=0; w<pi->N_words; w++) actx->dfs_root_word[w] = -1;
402
+
403
+ dnroot = NULL;
404
+ for (xw = 0; xw < pi->N_words; xw++)
405
+ {
406
+ w = actx->height_perm[xw];
407
+ if (actx->dfs_root_word[w] == -1)
408
+ {
409
+ dn = build_DIS_node(actx, w);
410
+ if (dnroot == NULL)
411
+ {
412
+ dnroot = dn;
413
+ }
414
+ else
415
+ {
416
+ for (child = dn->cl; child != NULL; child = xchild)
417
+ {
418
+ xchild = child->next;
419
+ child->next = dnroot->cl;
420
+ dnroot->cl = child;
421
+ }
422
+ for (lol = dn->lol; lol != NULL; lol = xlol)
423
+ {
424
+ xlol = lol->next;
425
+ lol->next = dnroot->lol;
426
+ dnroot->lol = lol;
427
+ }
428
+ xfree((void *) dn, sizeof(DIS_node));
429
+ }
430
+ }
431
+ }
432
+ return dnroot;
433
+ }
434
+
435
+ static int advance_CON(CON_node *);
436
+
437
+ /**
438
+ * Cyclically advance the current state of this DIS node.
439
+ * If it's now at the beginning of its cycle, return FALSE;
440
+ * otherwise return TRUE. Together with the advance_CON()
441
+ * function, this can be used to iterate over the entire
442
+ * DIS_CON tree.
443
+ */
444
+ static int advance_DIS(DIS_node * dn)
445
+ {
446
+ CON_list * cl;
447
+ for (cl = dn->cl; cl != NULL; cl = cl->next)
448
+ {
449
+ if (advance_CON(cl->cn)) return TRUE;
450
+ }
451
+ return FALSE;
452
+ }
453
+
454
+ /**
455
+ * Cyclically advance the current state of this CON node.
456
+ * If it's now at the beginning of its cycle return FALSE,
457
+ * otherwise return TRUE. Together with the advance_CON()
458
+ * function, this can be used to iterate over the entire
459
+ * DIS_CON tree.
460
+ */
461
+ static int advance_CON(CON_node * cn)
462
+ {
463
+ if (advance_DIS(cn->current->dn))
464
+ {
465
+ return TRUE;
466
+ }
467
+ else
468
+ {
469
+ if (cn->current->next == NULL)
470
+ {
471
+ cn->current = cn->dl;
472
+ return FALSE;
473
+ }
474
+ else
475
+ {
476
+ cn->current = cn->current->next;
477
+ return TRUE;
478
+ }
479
+ }
480
+ }
481
+
482
+ static void fill_patch_array_CON(analyze_context_t *, CON_node *, Links_to_patch *);
483
+
484
+ /**
485
+ * Patches up appropriate links in the patch_array for this DIS_node
486
+ * and this patch list.
487
+ */
488
+ static void fill_patch_array_DIS(analyze_context_t *actx,
489
+ DIS_node * dn, Links_to_patch * ltp)
490
+ {
491
+ CON_list * cl;
492
+ List_o_links * lol;
493
+ Links_to_patch * ltpx;
494
+
495
+ for (lol = dn->lol; lol != NULL; lol = lol->next)
496
+ {
497
+ actx->patch_array[lol->link].used = TRUE;
498
+ }
499
+
500
+ if ((dn->cl == NULL) || (dn->cl->cn->word != dn->word))
501
+ {
502
+ for (; ltp != NULL; ltp = ltpx)
503
+ {
504
+ ltpx = ltp->next;
505
+ actx->patch_array[ltp->link].changed = TRUE;
506
+ if (ltp->dir == 'l')
507
+ {
508
+ actx->patch_array[ltp->link].newl = dn->word;
509
+ }
510
+ else
511
+ {
512
+ actx->patch_array[ltp->link].newr = dn->word;
513
+ }
514
+ xfree((void *) ltp, sizeof(Links_to_patch));
515
+ }
516
+ }
517
+
518
+ /* ltp != NULL at this point means that dn has child which is a cn
519
+ which is the same word */
520
+ for (cl = dn->cl; cl != NULL; cl = cl->next)
521
+ {
522
+ fill_patch_array_CON(actx, cl->cn, ltp);
523
+ ltp = NULL;
524
+ }
525
+ }
526
+
527
+ static void fill_patch_array_CON(analyze_context_t *actx,
528
+ CON_node * cn, Links_to_patch * ltp)
529
+ {
530
+ List_o_links * lol;
531
+ Links_to_patch *ltpx;
532
+
533
+ for (lol = actx->word_links[cn->word]; lol != NULL; lol = lol->next)
534
+ {
535
+ if (lol->dir == 0)
536
+ {
537
+ ltpx = (Links_to_patch *) xalloc(sizeof(Links_to_patch));
538
+ ltpx->next = ltp;
539
+ ltp = ltpx;
540
+ ltp->link = lol->link;
541
+ if (lol->word > cn->word) {
542
+ ltp->dir = 'l';
543
+ } else {
544
+ ltp->dir = 'r';
545
+ }
546
+ }
547
+ }
548
+ fill_patch_array_DIS(actx, cn->current->dn, ltp);
549
+ }
550
+
551
+ static void free_digraph(analyze_context_t *actx, Parse_info pi)
552
+ {
553
+ List_o_links * lol, *lolx;
554
+ int i;
555
+ for (i = 0; i < pi->N_words; i++)
556
+ {
557
+ for (lol = actx->word_links[i]; lol != NULL; lol = lolx)
558
+ {
559
+ lolx = lol->next;
560
+ xfree((void *) lol, sizeof(List_o_links));
561
+ }
562
+ }
563
+ }
564
+
565
+ static void free_CON_tree(CON_node *);
566
+
567
+ void free_DIS_tree(DIS_node * dn)
568
+ {
569
+ List_o_links * lol, *lolx;
570
+ CON_list *cl, *clx;
571
+ for (lol = dn->lol; lol != NULL; lol = lolx)
572
+ {
573
+ lolx = lol->next;
574
+ xfree((void *) lol, sizeof(List_o_links));
575
+ }
576
+ for (cl = dn->cl; cl != NULL; cl = clx)
577
+ {
578
+ clx = cl->next;
579
+ free_CON_tree(cl->cn);
580
+ xfree((void *) cl, sizeof(CON_list));
581
+ }
582
+ xfree((void *) dn, sizeof(DIS_node));
583
+ }
584
+
585
+ static void free_CON_tree(CON_node * cn)
586
+ {
587
+ DIS_list *dl, *dlx;
588
+ for (dl = cn->dl; dl != NULL; dl = dlx)
589
+ {
590
+ dlx = dl->next;
591
+ free_DIS_tree(dl->dn);
592
+ xfree((void *) dl, sizeof(DIS_list));
593
+ }
594
+ xfree((void *) cn, sizeof(CON_node));
595
+ }
596
+
597
+ /** scope out this and element */
598
+ static void and_dfs_full(analyze_context_t *actx, int w)
599
+ {
600
+ List_o_links *lol;
601
+ if (actx->visited[w]) return;
602
+ actx->visited[w] = TRUE;
603
+ actx->and_element_sizes[actx->N_and_elements]++;
604
+
605
+ for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
606
+ {
607
+ if (lol->dir >= 0)
608
+ {
609
+ and_dfs_full(actx, lol->word);
610
+ }
611
+ }
612
+ }
613
+
614
+ /** get down the tree past all the commas */
615
+ static void and_dfs_commas(analyze_context_t *actx, Sentence sent, int w)
616
+ {
617
+ List_o_links *lol;
618
+ if (actx->visited[w]) return;
619
+
620
+ actx->visited[w] = TRUE;
621
+
622
+ for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
623
+ {
624
+ /* we only consider UP or DOWN priority links here */
625
+ if (lol->dir == 1)
626
+ {
627
+ if (strcmp(sent->word[lol->word].string, ",") == 0)
628
+ {
629
+ /* pointing to a comma */
630
+ and_dfs_commas(actx, sent, lol->word);
631
+ }
632
+ else
633
+ {
634
+ actx->and_element[actx->N_and_elements] = lol->word;
635
+ and_dfs_full(actx, lol->word);
636
+ actx->N_and_elements++;
637
+ }
638
+ }
639
+ if (lol->dir == 0)
640
+ {
641
+ actx->outside_word[actx->N_outside_words] = lol->word;
642
+ actx->N_outside_words++;
643
+ }
644
+ }
645
+ }
646
+
647
+ /**
648
+ * This function computes the "and cost", resulting from inequalities
649
+ * in the length of and-list elements. It also computes other
650
+ * information used to construct the "andlist" structure of linkage_info.
651
+ */
652
+ static Andlist * build_andlist(analyze_context_t *actx, Sentence sent)
653
+ {
654
+ int w, i, min, max, j, cost;
655
+ char * s;
656
+ Andlist * new_andlist, * old_andlist;
657
+ Parse_info pi = sent->parse_info;
658
+
659
+ old_andlist = NULL;
660
+ cost = 0;
661
+
662
+ for(w = 0; w<pi->N_words; w++)
663
+ {
664
+ s = sent->word[w].string;
665
+ if (sent->is_conjunction[w])
666
+ {
667
+ actx->N_and_elements = 0;
668
+ actx->N_outside_words = 0;
669
+ for(i=0; i<pi->N_words; i++)
670
+ {
671
+ actx->visited[i] = FALSE;
672
+ actx->and_element_sizes[i] = 0;
673
+ }
674
+ if (sent->dict->left_wall_defined)
675
+ actx->visited[0] = TRUE;
676
+
677
+ and_dfs_commas(actx, sent, w);
678
+ if (actx->N_and_elements == 0) continue;
679
+
680
+ new_andlist = (Andlist *) xalloc(sizeof(Andlist));
681
+ new_andlist->num_elements = actx->N_and_elements;
682
+ new_andlist->num_outside_words = actx->N_outside_words;
683
+
684
+ for (i=0; i < actx->N_and_elements; i++)
685
+ {
686
+ new_andlist->element[i] = actx->and_element[i];
687
+ }
688
+ for (i=0; i < actx->N_outside_words; i++)
689
+ {
690
+ new_andlist->outside_word[i] = actx->outside_word[i];
691
+ }
692
+ new_andlist->conjunction = w;
693
+ new_andlist->next = old_andlist;
694
+ old_andlist = new_andlist;
695
+
696
+ if (actx->N_and_elements > 0)
697
+ {
698
+ min = MAX_SENTENCE;
699
+ max = 0;
700
+ for (i=0; i < actx->N_and_elements; i++)
701
+ {
702
+ j = actx->and_element_sizes[i];
703
+ if (j < min) min = j;
704
+ if (j > max) max = j;
705
+ }
706
+ cost += max-min;
707
+ }
708
+ }
709
+ }
710
+ if (old_andlist) old_andlist->cost = cost;
711
+ return old_andlist;
712
+ }
713
+
714
+ /**
715
+ * This function defines the cost of a link as a function of its length.
716
+ */
717
+ static inline int cost_for_length(int length)
718
+ {
719
+ return length-1;
720
+ }
721
+
722
+ /**
723
+ * Computes the cost of the current parse of the current sentence,
724
+ * due to the length of the links.
725
+ */
726
+ static int link_cost(Parse_info pi)
727
+ {
728
+ int lcost, i;
729
+ lcost = 0;
730
+ for (i = 0; i < pi->N_links; i++)
731
+ {
732
+ lcost += cost_for_length(pi->link_array[i].r - pi->link_array[i].l);
733
+ }
734
+ return lcost;
735
+ }
736
+
737
+ static int null_cost(Parse_info pi)
738
+ {
739
+ /* computes the number of null links in the linkage */
740
+ /* No one seems to care about this -- ALB */
741
+ return 0;
742
+ }
743
+
744
+ static int unused_word_cost(Parse_info pi)
745
+ {
746
+ int lcost, i;
747
+ lcost = 0;
748
+ for (i = 0; i < pi->N_words; i++)
749
+ lcost += (pi->chosen_disjuncts[i] == NULL);
750
+ return lcost;
751
+ }
752
+
753
+ /**
754
+ * Computes the cost of the current parse of the current sentence
755
+ * due to the cost of the chosen disjuncts.
756
+ */
757
+ static float disjunct_cost(Parse_info pi)
758
+ {
759
+ int i;
760
+ float lcost;
761
+ lcost = 0.0;
762
+ for (i = 0; i < pi->N_words; i++)
763
+ {
764
+ if (pi->chosen_disjuncts[i] != NULL)
765
+ lcost += pi->chosen_disjuncts[i]->cost;
766
+ }
767
+ return lcost;
768
+ }
769
+
770
+ /**
771
+ * Returns TRUE if string s represents a strictly smaller match set
772
+ * than does t. An almost identical function appears in and.c.
773
+ * The difference is that here we don't require s and t to be the
774
+ * same length.
775
+ */
776
+ static int strictly_smaller_name(const char * s, const char * t)
777
+ {
778
+ int strictness, ss, tt;
779
+ strictness = 0;
780
+ while ((*s!='\0') || (*t!='\0'))
781
+ {
782
+ if (*s == '\0') {
783
+ ss = '*';
784
+ } else {
785
+ ss = *s;
786
+ s++;
787
+ }
788
+ if (*t == '\0') {
789
+ tt = '*';
790
+ } else {
791
+ tt = *t;
792
+ t++;
793
+ }
794
+ if (ss == tt) continue;
795
+ if ((tt == '*') || (ss == '^')) {
796
+ strictness++;
797
+ } else {
798
+ return FALSE;
799
+ }
800
+ }
801
+ return (strictness > 0);
802
+ }
803
+
804
+ /**
805
+ * The name of the link is set to be the GCD of the names of
806
+ * its two endpoints. Must be called after each extract_links(),
807
+ * etc. since that call issues a brand-new set of links into
808
+ * parse_info.
809
+ */
810
+ static void compute_link_names(Sentence sent)
811
+ {
812
+ int i;
813
+ Parse_info pi = sent->parse_info;
814
+
815
+ for (i = 0; i < pi->N_links; i++)
816
+ {
817
+ pi->link_array[i].name = intersect_strings(sent,
818
+ connector_get_string(pi->link_array[i].lc),
819
+ connector_get_string(pi->link_array[i].rc));
820
+ }
821
+ }
822
+
823
+ /**
824
+ * This fills in the sublinkage->link[].name field. We assume that
825
+ * link_array[].name have already been filled in. As above, in the
826
+ * standard case, the name is just the GCD of the two end points.
827
+ * If pluralization has occurred, then we want to use the name
828
+ * already in link_array[].name. We detect this in two ways.
829
+ * If the endpoints don't match, then we know pluralization
830
+ * has occured. If they do, but the name in link_array[].name
831
+ * is *less* restrictive, then pluralization must have occured.
832
+ */
833
+ static void compute_pp_link_names(Sentence sent, Sublinkage *sublinkage)
834
+ {
835
+ int i;
836
+ const char * s;
837
+ Parse_info pi = sent->parse_info;
838
+
839
+ for (i = 0; i < pi->N_links; i++)
840
+ {
841
+ if (sublinkage->link[i]->l == -1) continue;
842
+ /* NULL's here are quite unexpected -- I think there's a bug
843
+ * elsewhere in the code. But for now, punt. Here's a sentence
844
+ * that triggers a NULL -- "His convalescence was relatively brief
845
+ * and he was able to return and fight at The Wilderness,
846
+ * Spotsylvania and Cold Harbor."
847
+ */
848
+ if (NULL == sublinkage->link[i]->lc) continue;
849
+ if (NULL == sublinkage->link[i]->rc) continue;
850
+ if (!x_match(sent, sublinkage->link[i]->lc, sublinkage->link[i]->rc))
851
+ {
852
+ replace_link_name(sublinkage->link[i], pi->link_array[i].name);
853
+ }
854
+ else
855
+ {
856
+ s = intersect_strings(sent,
857
+ connector_get_string(sublinkage->link[i]->lc),
858
+ connector_get_string(sublinkage->link[i]->rc));
859
+
860
+ if (strictly_smaller_name(s, pi->link_array[i].name))
861
+ replace_link_name(sublinkage->link[i], pi->link_array[i].name);
862
+ else
863
+ replace_link_name(sublinkage->link[i], s);
864
+ }
865
+ }
866
+ }
867
+
868
+ /********************** exported functions *****************************/
869
+
870
+ void init_analyze(Sentence s)
871
+ {
872
+ analyze_context_t *actx = s->analyze_ctxt;
873
+
874
+ if (NULL == actx)
875
+ {
876
+ actx = (analyze_context_t *) malloc (sizeof(analyze_context_t));
877
+ s->analyze_ctxt = actx;
878
+ }
879
+
880
+ actx->structure_violation = FALSE;
881
+ }
882
+
883
+ void free_analyze(Sentence s)
884
+ {
885
+ if (s->analyze_ctxt != NULL) free(s->analyze_ctxt);
886
+ s->analyze_ctxt = NULL;
887
+ }
888
+
889
+ /**
890
+ * This uses link_array. It enumerates and post-processes
891
+ * all the linkages represented by this one. We know this contains
892
+ * at least one fat link.
893
+ */
894
+ Linkage_info analyze_fat_linkage(Sentence sent, Parse_Options opts, int analyze_pass)
895
+ {
896
+ int i;
897
+ Linkage_info li;
898
+ DIS_node *d_root;
899
+ PP_node *pp;
900
+ Postprocessor *postprocessor;
901
+ Sublinkage *sublinkage;
902
+ Parse_info pi = sent->parse_info;
903
+ PP_node accum; /* for domain ancestry check */
904
+ D_type_list * dtl0, * dtl1; /* for domain ancestry check */
905
+
906
+ analyze_context_t *actx = sent->analyze_ctxt;
907
+
908
+ sublinkage = x_create_sublinkage(pi);
909
+ postprocessor = sent->dict->postprocessor;
910
+ build_digraph(actx, pi);
911
+ actx->structure_violation = FALSE;
912
+ d_root = build_DIS_CON_tree(actx, pi); /* may set structure_violation to TRUE */
913
+
914
+ memset(&li, 0, sizeof(li));
915
+ li.N_violations = 0;
916
+ li.improper_fat_linkage = actx->structure_violation;
917
+ li.inconsistent_domains = FALSE;
918
+ li.unused_word_cost = unused_word_cost(sent->parse_info);
919
+ if (opts->use_sat_solver)
920
+ {
921
+ li.disjunct_cost = 0.0;
922
+ }
923
+ else
924
+ {
925
+ li.disjunct_cost = disjunct_cost(pi);
926
+ }
927
+ li.null_cost = null_cost(pi);
928
+ li.link_cost = link_cost(pi);
929
+ li.corpus_cost = -1.0f;
930
+ li.and_cost = 0;
931
+ li.andlist = NULL;
932
+
933
+ if (actx->structure_violation)
934
+ {
935
+ li.N_violations++;
936
+ free_sublinkage(sublinkage);
937
+ free_digraph(actx, pi);
938
+ free_DIS_tree(d_root);
939
+ for (i = 0; i < pi->N_links; i++)
940
+ {
941
+ pi->link_array[i].name = "";
942
+ }
943
+ return li;
944
+ }
945
+
946
+ if (analyze_pass == PP_SECOND_PASS)
947
+ {
948
+ li.andlist = build_andlist(actx, sent);
949
+ li.and_cost = 0;
950
+ if (li.andlist) li.and_cost = li.andlist->cost;
951
+ }
952
+ else li.and_cost = 0;
953
+
954
+ compute_link_names(sent);
955
+
956
+ for (i=0; i<pi->N_links; i++) accum.d_type_array[i] = NULL;
957
+
958
+ /* loop through all the sub linkages */
959
+ for (;;)
960
+ {
961
+ for (i=0; i<pi->N_links; i++)
962
+ {
963
+ actx->patch_array[i].used = actx->patch_array[i].changed = FALSE;
964
+ actx->patch_array[i].newl = pi->link_array[i].l;
965
+ actx->patch_array[i].newr = pi->link_array[i].r;
966
+ copy_full_link(&sublinkage->link[i], &(pi->link_array[i]));
967
+ }
968
+ fill_patch_array_DIS(actx, d_root, NULL);
969
+
970
+ for (i=0; i<pi->N_links; i++)
971
+ {
972
+ if (actx->patch_array[i].changed || actx->patch_array[i].used)
973
+ {
974
+ sublinkage->link[i]->l = actx->patch_array[i].newl;
975
+ sublinkage->link[i]->r = actx->patch_array[i].newr;
976
+ }
977
+ else if ((actx->dfs_root_word[pi->link_array[i].l] != -1) &&
978
+ (actx->dfs_root_word[pi->link_array[i].r] != -1))
979
+ {
980
+ sublinkage->link[i]->l = -1;
981
+ }
982
+ }
983
+
984
+ if (0 == opts->use_sat_solver)
985
+ {
986
+ compute_pp_link_array_connectors(sent, sublinkage);
987
+ compute_pp_link_names(sent, sublinkage);
988
+ }
989
+
990
+ /* 'analyze_pass' logic added ALB 1/97 */
991
+ if (analyze_pass==PP_FIRST_PASS) {
992
+ post_process_scan_linkage(postprocessor,opts,sent,sublinkage);
993
+ if (!advance_DIS(d_root)) break;
994
+ else continue;
995
+ }
996
+
997
+ pp = post_process(postprocessor, opts, sent, sublinkage, TRUE);
998
+
999
+ if (pp==NULL) {
1000
+ if (postprocessor != NULL) li.N_violations = 1;
1001
+ }
1002
+ else if (pp->violation == NULL) {
1003
+ /* the purpose of this stuff is to make sure the domain
1004
+ ancestry for a link in each of its sentences is consistent. */
1005
+
1006
+ for (i=0; i<pi->N_links; i++) {
1007
+ if (sublinkage->link[i]->l == -1) continue;
1008
+ if (accum.d_type_array[i] == NULL) {
1009
+ accum.d_type_array[i] = copy_d_type(pp->d_type_array[i]);
1010
+ } else {
1011
+ dtl0 = pp->d_type_array[i];
1012
+ dtl1 = accum.d_type_array[i];
1013
+ while((dtl0 != NULL) && (dtl1 != NULL) && (dtl0->type == dtl1->type)) {
1014
+ dtl0 = dtl0->next;
1015
+ dtl1 = dtl1->next;
1016
+ }
1017
+ if ((dtl0 != NULL) || (dtl1 != NULL)) break;
1018
+ }
1019
+ }
1020
+ if (i != pi->N_links) {
1021
+ li.N_violations++;
1022
+ li.inconsistent_domains = TRUE;
1023
+ }
1024
+ }
1025
+ else if (pp->violation!=NULL) {
1026
+ li.N_violations++;
1027
+ }
1028
+
1029
+ if (!advance_DIS(d_root)) break;
1030
+ }
1031
+
1032
+ for (i=0; i<pi->N_links; ++i) {
1033
+ free_d_type(accum.d_type_array[i]);
1034
+ }
1035
+
1036
+ /* if (display_on && (li.N_violations != 0) &&
1037
+ (verbosity > 3) && should_print_messages)
1038
+ printf("P.P. violation in one part of conjunction.\n"); */
1039
+ free_sublinkage(sublinkage);
1040
+ free_digraph(actx, pi);
1041
+ free_DIS_tree(d_root);
1042
+ return li;
1043
+ }
1044
+
1045
+ /**
1046
+ * This uses link_array. It post-processes
1047
+ * this linkage, and prints the appropriate thing. There are no fat
1048
+ * links in it.
1049
+ */
1050
+ Linkage_info analyze_thin_linkage(Sentence sent, Parse_Options opts, int analyze_pass)
1051
+ {
1052
+ int i;
1053
+ Linkage_info li;
1054
+ PP_node * pp;
1055
+ Postprocessor * postprocessor;
1056
+ Sublinkage *sublinkage;
1057
+ Parse_info pi = sent->parse_info;
1058
+ analyze_context_t *actx = sent->analyze_ctxt;
1059
+
1060
+ sublinkage = x_create_sublinkage(pi);
1061
+ postprocessor = sent->dict->postprocessor;
1062
+
1063
+ compute_link_names(sent);
1064
+ for (i=0; i<pi->N_links; i++)
1065
+ {
1066
+ copy_full_link(&(sublinkage->link[i]), &(pi->link_array[i]));
1067
+ }
1068
+
1069
+ if (analyze_pass == PP_FIRST_PASS)
1070
+ {
1071
+ post_process_scan_linkage(postprocessor, opts, sent, sublinkage);
1072
+ free_sublinkage(sublinkage);
1073
+ memset(&li, 0, sizeof(li));
1074
+ return li;
1075
+ }
1076
+
1077
+ build_digraph(actx, pi);
1078
+
1079
+ /* The code below can be used to generate the "islands" array.
1080
+ * For this to work, however, you have to call "build_digraph"
1081
+ * first (as in analyze_fat_linkage). and then "free_digraph".
1082
+ */
1083
+ pp = post_process(postprocessor, opts, sent, sublinkage, TRUE);
1084
+
1085
+ memset(&li, 0, sizeof(li));
1086
+ li.N_violations = 0;
1087
+ li.and_cost = 0;
1088
+ li.unused_word_cost = unused_word_cost(sent->parse_info);
1089
+ li.improper_fat_linkage = FALSE;
1090
+ li.inconsistent_domains = FALSE;
1091
+ if (opts->use_sat_solver)
1092
+ {
1093
+ li.disjunct_cost = 0.0;
1094
+ }
1095
+ else
1096
+ {
1097
+ li.disjunct_cost = disjunct_cost(pi);
1098
+ }
1099
+ li.null_cost = null_cost(pi);
1100
+ li.link_cost = link_cost(pi);
1101
+ li.corpus_cost = -1.0f;
1102
+ li.andlist = NULL;
1103
+
1104
+ if (pp == NULL)
1105
+ {
1106
+ if (postprocessor != NULL) li.N_violations = 1;
1107
+ }
1108
+ else if (pp->violation != NULL)
1109
+ {
1110
+ li.N_violations++;
1111
+ }
1112
+
1113
+ free_sublinkage(sublinkage);
1114
+ free_digraph(actx, pi);
1115
+ return li;
1116
+ }
1117
+
1118
+ void extract_thin_linkage(Sentence sent, Parse_Options opts, Linkage linkage)
1119
+ {
1120
+ int i;
1121
+ Parse_info pi = sent->parse_info;
1122
+
1123
+ linkage->num_sublinkages = 1;
1124
+ linkage->sublinkage = ex_create_sublinkage(pi);
1125
+
1126
+ compute_link_names(sent);
1127
+ for (i=0; i<pi->N_links; ++i)
1128
+ {
1129
+ linkage->sublinkage->link[i] = excopy_link(&(pi->link_array[i]));
1130
+ }
1131
+ }
1132
+
1133
+ #ifdef DBG
1134
+ static void prt_lol(Sentence sent , List_o_links *lol)
1135
+ {
1136
+ /* It appears that the list of links is always even in length:
1137
+ * The head word first, followed by a modifier.
1138
+ */
1139
+ while (lol)
1140
+ {
1141
+ // printf ("%d ", lol->link);
1142
+ printf ("%s ", sent->word[lol->word].string);
1143
+ lol = lol->next;
1144
+ }
1145
+ }
1146
+
1147
+ static void prt_con_list(Sentence, CON_list *);
1148
+ static void prt_dis_list(Sentence sent, DIS_list *dis)
1149
+ {
1150
+ while(dis)
1151
+ {
1152
+ /* There are three possibilities:
1153
+ * Either there's another conjunction (and we should print it)
1154
+ * Or there's a head word, with its modifiers in its list-o-links,
1155
+ * Or there's just the bare, naked word by itself.
1156
+ */
1157
+ if (dis->dn->cl)
1158
+ {
1159
+ prt_con_list(sent, dis->dn->cl);
1160
+ }
1161
+ else if (dis->dn->lol)
1162
+ {
1163
+ printf("[");
1164
+ prt_lol(sent, dis->dn->lol);
1165
+ printf("]");
1166
+ }
1167
+ else
1168
+ {
1169
+ int wd = dis->dn->word;
1170
+ printf("%s ", sent->word[wd].string);
1171
+ }
1172
+ dis = dis->next;
1173
+ }
1174
+ }
1175
+
1176
+ static void prt_con_list(Sentence sent, CON_list *con)
1177
+ {
1178
+ while(con)
1179
+ {
1180
+ int wd = con->cn->word;
1181
+ printf("(%s ", sent->word[wd].string);
1182
+ prt_dis_list(sent, con->cn->dl);
1183
+ printf(") ");
1184
+ con = con->next;
1185
+ }
1186
+ }
1187
+ static void prt_dis_con_tree(Sentence sent, DIS_node *dis)
1188
+ {
1189
+ prt_con_list(sent, dis->cl);
1190
+ printf ("\n");
1191
+ }
1192
+ #else
1193
+ static inline void prt_dis_con_tree(Sentence sent, DIS_node *dis) {}
1194
+ #endif
1195
+
1196
+ /**
1197
+ * This procedure mimics analyze_fat_linkage in order to
1198
+ * extract the sublinkages and copy them to the Linkage
1199
+ * data structure passed in.
1200
+ */
1201
+ void extract_fat_linkage(Sentence sent, Parse_Options opts, Linkage linkage)
1202
+ {
1203
+ int i, j, N_thin_links;
1204
+ DIS_node *d_root;
1205
+ int num_sublinkages;
1206
+ Sublinkage * sublinkage;
1207
+ Parse_info pi = sent->parse_info;
1208
+
1209
+ analyze_context_t *actx = sent->analyze_ctxt;
1210
+
1211
+ build_digraph(actx, pi);
1212
+ actx->structure_violation = FALSE;
1213
+ d_root = build_DIS_CON_tree(actx, pi);
1214
+
1215
+ if (actx->structure_violation)
1216
+ {
1217
+ compute_link_names(sent);
1218
+ linkage->num_sublinkages=1;
1219
+ linkage->sublinkage = ex_create_sublinkage(pi);
1220
+
1221
+ /* This will have fat links! */
1222
+ for (i=0; i<pi->N_links; ++i)
1223
+ {
1224
+ linkage->sublinkage->link[i] = excopy_link(&(pi->link_array[i]));
1225
+ }
1226
+
1227
+ free_digraph(actx, pi);
1228
+ free_DIS_tree(d_root);
1229
+ return;
1230
+ }
1231
+
1232
+ /* first get number of sublinkages and allocate space */
1233
+ num_sublinkages = 0;
1234
+ for (;;) {
1235
+ num_sublinkages++;
1236
+ if (!advance_DIS(d_root)) break;
1237
+ }
1238
+
1239
+ linkage->num_sublinkages = num_sublinkages;
1240
+ linkage->sublinkage =
1241
+ (Sublinkage *) exalloc(sizeof(Sublinkage)*num_sublinkages);
1242
+ for (i=0; i<num_sublinkages; ++i) {
1243
+ linkage->sublinkage[i].link = NULL;
1244
+ linkage->sublinkage[i].pp_info = NULL;
1245
+ linkage->sublinkage[i].violation = NULL;
1246
+ }
1247
+
1248
+ /* now fill out the sublinkage arrays */
1249
+ compute_link_names(sent);
1250
+
1251
+ sublinkage = x_create_sublinkage(pi);
1252
+ num_sublinkages = 0;
1253
+ for (;;)
1254
+ {
1255
+ for (i = 0; i < pi->N_links; i++)
1256
+ {
1257
+ actx->patch_array[i].used = actx->patch_array[i].changed = FALSE;
1258
+ actx->patch_array[i].newl = pi->link_array[i].l;
1259
+ actx->patch_array[i].newr = pi->link_array[i].r;
1260
+ copy_full_link(&sublinkage->link[i], &(pi->link_array[i]));
1261
+ }
1262
+ fill_patch_array_DIS(actx, d_root, NULL);
1263
+
1264
+ for (i = 0; i < pi->N_links; i++)
1265
+ {
1266
+ if (actx->patch_array[i].changed || actx->patch_array[i].used)
1267
+ {
1268
+ sublinkage->link[i]->l = actx->patch_array[i].newl;
1269
+ sublinkage->link[i]->r = actx->patch_array[i].newr;
1270
+ }
1271
+ else if ((actx->dfs_root_word[pi->link_array[i].l] != -1) &&
1272
+ (actx->dfs_root_word[pi->link_array[i].r] != -1))
1273
+ {
1274
+ sublinkage->link[i]->l = -1;
1275
+ }
1276
+ }
1277
+
1278
+ if (0 == opts->use_sat_solver)
1279
+ {
1280
+ compute_pp_link_array_connectors(sent, sublinkage);
1281
+ compute_pp_link_names(sent, sublinkage);
1282
+ }
1283
+
1284
+ /* Don't copy the fat links into the linkage */
1285
+ N_thin_links = 0;
1286
+ for (i = 0; i < pi->N_links; ++i)
1287
+ {
1288
+ if (sublinkage->link[i]->l == -1) continue;
1289
+ N_thin_links++;
1290
+ }
1291
+
1292
+ linkage->sublinkage[num_sublinkages].num_links = N_thin_links;
1293
+ linkage->sublinkage[num_sublinkages].link =
1294
+ (Link **) exalloc(sizeof(Link *)*N_thin_links);
1295
+ linkage->sublinkage[num_sublinkages].pp_info = NULL;
1296
+ linkage->sublinkage[num_sublinkages].violation = NULL;
1297
+
1298
+ for (i = 0, j = 0; i < pi->N_links; ++i)
1299
+ {
1300
+ if (sublinkage->link[i]->l == -1) continue;
1301
+ linkage->sublinkage[num_sublinkages].link[j++] =
1302
+ excopy_link(sublinkage->link[i]);
1303
+ }
1304
+
1305
+ num_sublinkages++;
1306
+ if (!advance_DIS(d_root)) break;
1307
+ }
1308
+
1309
+ free_sublinkage(sublinkage);
1310
+ free_digraph(actx, pi);
1311
+ if (linkage->dis_con_tree)
1312
+ free_DIS_tree(linkage->dis_con_tree);
1313
+ linkage->dis_con_tree = d_root;
1314
+
1315
+ prt_dis_con_tree(sent, d_root);
1316
+ }
1317
+