grammar_police 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (345) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +2 -0
  5. data/c/.DS_Store +0 -0
  6. data/c/link-grammar.c +65 -0
  7. data/c/link-grammar.h +60 -0
  8. data/c/link-grammar.o +0 -0
  9. data/c/link-grammar.so +0 -0
  10. data/c/link-grammar/.DS_Store +0 -0
  11. data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
  12. data/c/link-grammar/.deps/and.Plo +202 -0
  13. data/c/link-grammar/.deps/api.Plo +244 -0
  14. data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
  15. data/c/link-grammar/.deps/command-line.Plo +201 -0
  16. data/c/link-grammar/.deps/constituents.Plo +201 -0
  17. data/c/link-grammar/.deps/count.Plo +202 -0
  18. data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
  19. data/c/link-grammar/.deps/disjuncts.Plo +123 -0
  20. data/c/link-grammar/.deps/error.Plo +121 -0
  21. data/c/link-grammar/.deps/expand.Plo +133 -0
  22. data/c/link-grammar/.deps/extract-links.Plo +198 -0
  23. data/c/link-grammar/.deps/fast-match.Plo +200 -0
  24. data/c/link-grammar/.deps/idiom.Plo +200 -0
  25. data/c/link-grammar/.deps/jni-client.Plo +217 -0
  26. data/c/link-grammar/.deps/link-parser.Po +1 -0
  27. data/c/link-grammar/.deps/massage.Plo +202 -0
  28. data/c/link-grammar/.deps/post-process.Plo +202 -0
  29. data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
  30. data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
  31. data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
  32. data/c/link-grammar/.deps/prefix.Plo +102 -0
  33. data/c/link-grammar/.deps/preparation.Plo +202 -0
  34. data/c/link-grammar/.deps/print-util.Plo +200 -0
  35. data/c/link-grammar/.deps/print.Plo +201 -0
  36. data/c/link-grammar/.deps/prune.Plo +202 -0
  37. data/c/link-grammar/.deps/read-dict.Plo +223 -0
  38. data/c/link-grammar/.deps/read-regex.Plo +123 -0
  39. data/c/link-grammar/.deps/regex-morph.Plo +131 -0
  40. data/c/link-grammar/.deps/resources.Plo +203 -0
  41. data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  42. data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  43. data/c/link-grammar/.deps/string-set.Plo +198 -0
  44. data/c/link-grammar/.deps/tokenize.Plo +160 -0
  45. data/c/link-grammar/.deps/utilities.Plo +222 -0
  46. data/c/link-grammar/.deps/word-file.Plo +201 -0
  47. data/c/link-grammar/.deps/word-utils.Plo +212 -0
  48. data/c/link-grammar/.libs/analyze-linkage.o +0 -0
  49. data/c/link-grammar/.libs/and.o +0 -0
  50. data/c/link-grammar/.libs/api.o +0 -0
  51. data/c/link-grammar/.libs/build-disjuncts.o +0 -0
  52. data/c/link-grammar/.libs/command-line.o +0 -0
  53. data/c/link-grammar/.libs/constituents.o +0 -0
  54. data/c/link-grammar/.libs/count.o +0 -0
  55. data/c/link-grammar/.libs/disjunct-utils.o +0 -0
  56. data/c/link-grammar/.libs/disjuncts.o +0 -0
  57. data/c/link-grammar/.libs/error.o +0 -0
  58. data/c/link-grammar/.libs/expand.o +0 -0
  59. data/c/link-grammar/.libs/extract-links.o +0 -0
  60. data/c/link-grammar/.libs/fast-match.o +0 -0
  61. data/c/link-grammar/.libs/idiom.o +0 -0
  62. data/c/link-grammar/.libs/jni-client.o +0 -0
  63. data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  64. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  65. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  66. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  67. data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
  68. data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  69. data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  70. data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  71. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  72. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  73. data/c/link-grammar/.libs/liblink-grammar.a +0 -0
  74. data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
  75. data/c/link-grammar/.libs/liblink-grammar.la +41 -0
  76. data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
  77. data/c/link-grammar/.libs/massage.o +0 -0
  78. data/c/link-grammar/.libs/post-process.o +0 -0
  79. data/c/link-grammar/.libs/pp_knowledge.o +0 -0
  80. data/c/link-grammar/.libs/pp_lexer.o +0 -0
  81. data/c/link-grammar/.libs/pp_linkset.o +0 -0
  82. data/c/link-grammar/.libs/prefix.o +0 -0
  83. data/c/link-grammar/.libs/preparation.o +0 -0
  84. data/c/link-grammar/.libs/print-util.o +0 -0
  85. data/c/link-grammar/.libs/print.o +0 -0
  86. data/c/link-grammar/.libs/prune.o +0 -0
  87. data/c/link-grammar/.libs/read-dict.o +0 -0
  88. data/c/link-grammar/.libs/read-regex.o +0 -0
  89. data/c/link-grammar/.libs/regex-morph.o +0 -0
  90. data/c/link-grammar/.libs/resources.o +0 -0
  91. data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
  92. data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
  93. data/c/link-grammar/.libs/string-set.o +0 -0
  94. data/c/link-grammar/.libs/tokenize.o +0 -0
  95. data/c/link-grammar/.libs/utilities.o +0 -0
  96. data/c/link-grammar/.libs/word-file.o +0 -0
  97. data/c/link-grammar/.libs/word-utils.o +0 -0
  98. data/c/link-grammar/Makefile +900 -0
  99. data/c/link-grammar/Makefile.am +202 -0
  100. data/c/link-grammar/Makefile.in +900 -0
  101. data/c/link-grammar/analyze-linkage.c +1317 -0
  102. data/c/link-grammar/analyze-linkage.h +24 -0
  103. data/c/link-grammar/and.c +1603 -0
  104. data/c/link-grammar/and.h +27 -0
  105. data/c/link-grammar/api-structures.h +362 -0
  106. data/c/link-grammar/api-types.h +72 -0
  107. data/c/link-grammar/api.c +1887 -0
  108. data/c/link-grammar/api.h +96 -0
  109. data/c/link-grammar/autoit/.DS_Store +0 -0
  110. data/c/link-grammar/autoit/README +10 -0
  111. data/c/link-grammar/autoit/_LGTest.au3 +22 -0
  112. data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  113. data/c/link-grammar/build-disjuncts.c +487 -0
  114. data/c/link-grammar/build-disjuncts.h +21 -0
  115. data/c/link-grammar/command-line.c +458 -0
  116. data/c/link-grammar/command-line.h +15 -0
  117. data/c/link-grammar/constituents.c +1836 -0
  118. data/c/link-grammar/constituents.h +26 -0
  119. data/c/link-grammar/corpus/.DS_Store +0 -0
  120. data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
  121. data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
  122. data/c/link-grammar/corpus/Makefile +527 -0
  123. data/c/link-grammar/corpus/Makefile.am +46 -0
  124. data/c/link-grammar/corpus/Makefile.in +527 -0
  125. data/c/link-grammar/corpus/README +17 -0
  126. data/c/link-grammar/corpus/cluster.c +286 -0
  127. data/c/link-grammar/corpus/cluster.h +32 -0
  128. data/c/link-grammar/corpus/corpus.c +483 -0
  129. data/c/link-grammar/corpus/corpus.h +46 -0
  130. data/c/link-grammar/count.c +828 -0
  131. data/c/link-grammar/count.h +25 -0
  132. data/c/link-grammar/disjunct-utils.c +261 -0
  133. data/c/link-grammar/disjunct-utils.h +27 -0
  134. data/c/link-grammar/disjuncts.c +138 -0
  135. data/c/link-grammar/disjuncts.h +13 -0
  136. data/c/link-grammar/error.c +92 -0
  137. data/c/link-grammar/error.h +35 -0
  138. data/c/link-grammar/expand.c +67 -0
  139. data/c/link-grammar/expand.h +13 -0
  140. data/c/link-grammar/externs.h +22 -0
  141. data/c/link-grammar/extract-links.c +625 -0
  142. data/c/link-grammar/extract-links.h +16 -0
  143. data/c/link-grammar/fast-match.c +309 -0
  144. data/c/link-grammar/fast-match.h +17 -0
  145. data/c/link-grammar/idiom.c +373 -0
  146. data/c/link-grammar/idiom.h +15 -0
  147. data/c/link-grammar/jni-client.c +779 -0
  148. data/c/link-grammar/jni-client.h +236 -0
  149. data/c/link-grammar/liblink-grammar-java.la +42 -0
  150. data/c/link-grammar/liblink-grammar.la +41 -0
  151. data/c/link-grammar/link-features.h +37 -0
  152. data/c/link-grammar/link-features.h.in +37 -0
  153. data/c/link-grammar/link-grammar-java.def +31 -0
  154. data/c/link-grammar/link-grammar.def +194 -0
  155. data/c/link-grammar/link-includes.h +465 -0
  156. data/c/link-grammar/link-parser.c +849 -0
  157. data/c/link-grammar/massage.c +329 -0
  158. data/c/link-grammar/massage.h +13 -0
  159. data/c/link-grammar/post-process.c +1113 -0
  160. data/c/link-grammar/post-process.h +45 -0
  161. data/c/link-grammar/pp_knowledge.c +376 -0
  162. data/c/link-grammar/pp_knowledge.h +14 -0
  163. data/c/link-grammar/pp_lexer.c +1920 -0
  164. data/c/link-grammar/pp_lexer.h +19 -0
  165. data/c/link-grammar/pp_linkset.c +158 -0
  166. data/c/link-grammar/pp_linkset.h +20 -0
  167. data/c/link-grammar/prefix.c +482 -0
  168. data/c/link-grammar/prefix.h +139 -0
  169. data/c/link-grammar/preparation.c +412 -0
  170. data/c/link-grammar/preparation.h +20 -0
  171. data/c/link-grammar/print-util.c +87 -0
  172. data/c/link-grammar/print-util.h +32 -0
  173. data/c/link-grammar/print.c +1085 -0
  174. data/c/link-grammar/print.h +16 -0
  175. data/c/link-grammar/prune.c +1864 -0
  176. data/c/link-grammar/prune.h +17 -0
  177. data/c/link-grammar/read-dict.c +1785 -0
  178. data/c/link-grammar/read-dict.h +29 -0
  179. data/c/link-grammar/read-regex.c +161 -0
  180. data/c/link-grammar/read-regex.h +12 -0
  181. data/c/link-grammar/regex-morph.c +126 -0
  182. data/c/link-grammar/regex-morph.h +17 -0
  183. data/c/link-grammar/resources.c +180 -0
  184. data/c/link-grammar/resources.h +23 -0
  185. data/c/link-grammar/sat-solver/.DS_Store +0 -0
  186. data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  187. data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  188. data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
  189. data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  190. data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  191. data/c/link-grammar/sat-solver/Makefile +527 -0
  192. data/c/link-grammar/sat-solver/Makefile.am +29 -0
  193. data/c/link-grammar/sat-solver/Makefile.in +527 -0
  194. data/c/link-grammar/sat-solver/clock.hpp +33 -0
  195. data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  196. data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  197. data/c/link-grammar/sat-solver/guiding.hpp +244 -0
  198. data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  199. data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  200. data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
  201. data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  202. data/c/link-grammar/sat-solver/trie.hpp +118 -0
  203. data/c/link-grammar/sat-solver/util.cpp +23 -0
  204. data/c/link-grammar/sat-solver/util.hpp +14 -0
  205. data/c/link-grammar/sat-solver/variables.cpp +5 -0
  206. data/c/link-grammar/sat-solver/variables.hpp +829 -0
  207. data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
  208. data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
  209. data/c/link-grammar/spellcheck-aspell.c +148 -0
  210. data/c/link-grammar/spellcheck-hun.c +136 -0
  211. data/c/link-grammar/spellcheck.h +34 -0
  212. data/c/link-grammar/string-set.c +169 -0
  213. data/c/link-grammar/string-set.h +16 -0
  214. data/c/link-grammar/structures.h +498 -0
  215. data/c/link-grammar/tokenize.c +1049 -0
  216. data/c/link-grammar/tokenize.h +15 -0
  217. data/c/link-grammar/utilities.c +847 -0
  218. data/c/link-grammar/utilities.h +281 -0
  219. data/c/link-grammar/word-file.c +124 -0
  220. data/c/link-grammar/word-file.h +15 -0
  221. data/c/link-grammar/word-utils.c +526 -0
  222. data/c/link-grammar/word-utils.h +152 -0
  223. data/data/.DS_Store +0 -0
  224. data/data/Makefile +511 -0
  225. data/data/Makefile.am +4 -0
  226. data/data/Makefile.in +511 -0
  227. data/data/de/.DS_Store +0 -0
  228. data/data/de/4.0.affix +7 -0
  229. data/data/de/4.0.dict +474 -0
  230. data/data/de/Makefile +387 -0
  231. data/data/de/Makefile.am +9 -0
  232. data/data/de/Makefile.in +387 -0
  233. data/data/en/.DS_Store +0 -0
  234. data/data/en/4.0.affix +26 -0
  235. data/data/en/4.0.batch +1002 -0
  236. data/data/en/4.0.biolg.batch +411 -0
  237. data/data/en/4.0.constituent-knowledge +127 -0
  238. data/data/en/4.0.dict +8759 -0
  239. data/data/en/4.0.dict.m4 +6928 -0
  240. data/data/en/4.0.enwiki.batch +14 -0
  241. data/data/en/4.0.fixes.batch +2776 -0
  242. data/data/en/4.0.knowledge +306 -0
  243. data/data/en/4.0.regex +225 -0
  244. data/data/en/4.0.voa.batch +114 -0
  245. data/data/en/Makefile +554 -0
  246. data/data/en/Makefile.am +19 -0
  247. data/data/en/Makefile.in +554 -0
  248. data/data/en/README +173 -0
  249. data/data/en/tiny.dict +157 -0
  250. data/data/en/words/.DS_Store +0 -0
  251. data/data/en/words/Makefile +456 -0
  252. data/data/en/words/Makefile.am +78 -0
  253. data/data/en/words/Makefile.in +456 -0
  254. data/data/en/words/currency +205 -0
  255. data/data/en/words/currency.p +28 -0
  256. data/data/en/words/entities.given-bisex.sing +39 -0
  257. data/data/en/words/entities.given-female.sing +4141 -0
  258. data/data/en/words/entities.given-male.sing +1633 -0
  259. data/data/en/words/entities.locations.sing +68 -0
  260. data/data/en/words/entities.national.sing +253 -0
  261. data/data/en/words/entities.organizations.sing +7 -0
  262. data/data/en/words/entities.us-states.sing +11 -0
  263. data/data/en/words/units.1 +45 -0
  264. data/data/en/words/units.1.dot +4 -0
  265. data/data/en/words/units.3 +2 -0
  266. data/data/en/words/units.4 +5 -0
  267. data/data/en/words/units.4.dot +1 -0
  268. data/data/en/words/words-medical.adv.1 +1191 -0
  269. data/data/en/words/words-medical.prep.1 +67 -0
  270. data/data/en/words/words-medical.v.4.1 +2835 -0
  271. data/data/en/words/words-medical.v.4.2 +2848 -0
  272. data/data/en/words/words-medical.v.4.3 +3011 -0
  273. data/data/en/words/words-medical.v.4.4 +3036 -0
  274. data/data/en/words/words-medical.v.4.5 +3050 -0
  275. data/data/en/words/words.adj.1 +6794 -0
  276. data/data/en/words/words.adj.2 +638 -0
  277. data/data/en/words/words.adj.3 +667 -0
  278. data/data/en/words/words.adv.1 +1573 -0
  279. data/data/en/words/words.adv.2 +67 -0
  280. data/data/en/words/words.adv.3 +157 -0
  281. data/data/en/words/words.adv.4 +80 -0
  282. data/data/en/words/words.n.1 +11464 -0
  283. data/data/en/words/words.n.1.wiki +264 -0
  284. data/data/en/words/words.n.2.s +2017 -0
  285. data/data/en/words/words.n.2.s.biolg +1 -0
  286. data/data/en/words/words.n.2.s.wiki +298 -0
  287. data/data/en/words/words.n.2.x +65 -0
  288. data/data/en/words/words.n.2.x.wiki +10 -0
  289. data/data/en/words/words.n.3 +5717 -0
  290. data/data/en/words/words.n.t +23 -0
  291. data/data/en/words/words.v.1.1 +1038 -0
  292. data/data/en/words/words.v.1.2 +1043 -0
  293. data/data/en/words/words.v.1.3 +1052 -0
  294. data/data/en/words/words.v.1.4 +1023 -0
  295. data/data/en/words/words.v.1.p +17 -0
  296. data/data/en/words/words.v.10.1 +14 -0
  297. data/data/en/words/words.v.10.2 +15 -0
  298. data/data/en/words/words.v.10.3 +88 -0
  299. data/data/en/words/words.v.10.4 +17 -0
  300. data/data/en/words/words.v.2.1 +1253 -0
  301. data/data/en/words/words.v.2.2 +1304 -0
  302. data/data/en/words/words.v.2.3 +1280 -0
  303. data/data/en/words/words.v.2.4 +1285 -0
  304. data/data/en/words/words.v.2.5 +1287 -0
  305. data/data/en/words/words.v.4.1 +2472 -0
  306. data/data/en/words/words.v.4.2 +2487 -0
  307. data/data/en/words/words.v.4.3 +2441 -0
  308. data/data/en/words/words.v.4.4 +2478 -0
  309. data/data/en/words/words.v.4.5 +2483 -0
  310. data/data/en/words/words.v.5.1 +98 -0
  311. data/data/en/words/words.v.5.2 +98 -0
  312. data/data/en/words/words.v.5.3 +103 -0
  313. data/data/en/words/words.v.5.4 +102 -0
  314. data/data/en/words/words.v.6.1 +388 -0
  315. data/data/en/words/words.v.6.2 +401 -0
  316. data/data/en/words/words.v.6.3 +397 -0
  317. data/data/en/words/words.v.6.4 +405 -0
  318. data/data/en/words/words.v.6.5 +401 -0
  319. data/data/en/words/words.v.8.1 +117 -0
  320. data/data/en/words/words.v.8.2 +118 -0
  321. data/data/en/words/words.v.8.3 +118 -0
  322. data/data/en/words/words.v.8.4 +119 -0
  323. data/data/en/words/words.v.8.5 +119 -0
  324. data/data/en/words/words.y +104 -0
  325. data/data/lt/.DS_Store +0 -0
  326. data/data/lt/4.0.affix +6 -0
  327. data/data/lt/4.0.constituent-knowledge +24 -0
  328. data/data/lt/4.0.dict +135 -0
  329. data/data/lt/4.0.knowledge +38 -0
  330. data/data/lt/Makefile +389 -0
  331. data/data/lt/Makefile.am +11 -0
  332. data/data/lt/Makefile.in +389 -0
  333. data/grammar_police.gemspec +23 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_police.rb +11 -0
  336. data/lib/grammar_police/.DS_Store +0 -0
  337. data/lib/grammar_police/dictionary.rb +30 -0
  338. data/lib/grammar_police/linkage.rb +26 -0
  339. data/lib/grammar_police/parse_options.rb +32 -0
  340. data/lib/grammar_police/sentence.rb +44 -0
  341. data/lib/grammar_police/version.rb +3 -0
  342. data/tests/.DS_Store +0 -0
  343. data/tests/count_linkages.rb +29 -0
  344. data/tests/sentences.txt +86 -0
  345. metadata +408 -0
@@ -0,0 +1,15 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+
15
+
@@ -0,0 +1,1836 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #include <stdarg.h>
15
+ #include <string.h>
16
+ #include <link-grammar/api.h>
17
+ #include "error.h"
18
+ #include "constituents.h"
19
+
20
+ #define MAXCONSTITUENTS 8192
21
+ #define MAXSUBL 16
22
+ #define OPEN_BRACKET '['
23
+ #define CLOSE_BRACKET ']'
24
+
25
+ typedef enum {OPEN_TOK, CLOSE_TOK, WORD_TOK} CType;
26
+ typedef enum {NONE, STYPE, PTYPE, QTYPE, QDTYPE} WType;
27
+
28
+ typedef struct
29
+ {
30
+ int left;
31
+ int right;
32
+ const char * type;
33
+ char domain_type;
34
+ const char * start_link;
35
+ int start_num;
36
+ int subl;
37
+ int canon;
38
+ int valid;
39
+ #ifdef AUX_CODE_IS_DEAD
40
+ /* The only code that actually sets aux to a non-zero value is code
41
+ * followed by code that zets it to zero. -- its dead code, and so
42
+ * aux is never actually used. Comment this code out.
43
+ */
44
+ int aux;
45
+ /* 0: it's an ordinary VP (or other type);
46
+ * 1: it's an AUX, don't print it;
47
+ * 2: it's an AUX, and print it
48
+ */
49
+ #endif /* AUX_CODE_IS_DEAD */
50
+ } constituent_t;
51
+
52
+ /* XXX it seems like the old code worked fine with MAX_ELTS=10 */
53
+ #define MAX_ELTS 100
54
+ typedef struct
55
+ {
56
+ int num;
57
+ int e[MAX_ELTS];
58
+ int valid;
59
+ } andlist_t;
60
+
61
+ /*
62
+ * Context used to store assorted intermediate data
63
+ * when the constituent string is being generated.
64
+ */
65
+ #define MAX_ANDS 1024
66
+ typedef struct
67
+ {
68
+ String_set * phrase_ss;
69
+ WType wordtype[MAX_SENTENCE];
70
+ int word_used[MAXSUBL][MAX_SENTENCE];
71
+ int templist[MAX_ELTS];
72
+ constituent_t constituent[MAXCONSTITUENTS];
73
+ andlist_t andlist[MAX_ANDS];
74
+ } con_context_t;
75
+
76
+ /* ================================================================ */
77
+
78
+ static inline int uppercompare(const char * s, const char * t)
79
+ {
80
+ return (FALSE == utf8_upper_match(s,t));
81
+ }
82
+
83
+ /**
84
+ * If a constituent c has a comma at either end, we exclude the
85
+ * comma. (We continue to shift the boundary until we get to
86
+ * something inside the current sublinkage)
87
+ */
88
+ static void adjust_for_left_comma(con_context_t * ctxt, Linkage linkage, int c)
89
+ {
90
+ int w;
91
+ w = ctxt->constituent[c].left;
92
+ if (strcmp(linkage->word[w], ",") == 0)
93
+ {
94
+ w++;
95
+ while (1) {
96
+ if (ctxt->word_used[linkage->current][w] == 1) break;
97
+ w++;
98
+ }
99
+ }
100
+ ctxt->constituent[c].left = w;
101
+ }
102
+
103
+ static void adjust_for_right_comma(con_context_t *ctxt, Linkage linkage, int c)
104
+ {
105
+ int w;
106
+ w = ctxt->constituent[c].right;
107
+ if ((strcmp(linkage->word[w], ",") == 0) ||
108
+ (strcmp(linkage->word[w], "RIGHT-WALL") == 0))
109
+ {
110
+ w--;
111
+ while (1)
112
+ {
113
+ if (ctxt->word_used[linkage->current][w]==1) break;
114
+ w--;
115
+ }
116
+ }
117
+ ctxt->constituent[c].right = w;
118
+ }
119
+
120
+ static void print_constituent(con_context_t *ctxt, Linkage linkage, int c)
121
+ {
122
+ int w;
123
+ if (verbosity < 2) return;
124
+
125
+ printf(" c %2d %4s [%c] (%2d-%2d): ",
126
+ c, ctxt->constituent[c].type, ctxt->constituent[c].domain_type,
127
+ ctxt->constituent[c].left, ctxt->constituent[c].right);
128
+ for (w = ctxt->constituent[c].left; w <= ctxt->constituent[c].right; w++) {
129
+ printf("%s ", linkage->word[w]); /**PV**/
130
+ }
131
+ printf("\n");
132
+ }
133
+
134
+ /******************************************************
135
+ * These functions do the bulk of the actual
136
+ * constituent-generating; they're called once for each
137
+ * sublinkage
138
+ *********************************************************/
139
+
140
+ /**
141
+ * This function looks for constituents of type ctype1. Say it finds
142
+ * one, call it c1. It searches for the next larger constituent of
143
+ * type ctype2, call it c2. It then generates a new constituent of
144
+ * ctype3, containing all the words in c2 but not c1.
145
+ */
146
+ static int gen_comp(con_context_t *ctxt, Linkage linkage,
147
+ int numcon_total, int numcon_subl,
148
+ const char * ctype1, const char * ctype2,
149
+ const char * ctype3, int x)
150
+ {
151
+ int w, w2, w3, c, c1, c2, done;
152
+ c = numcon_total + numcon_subl;
153
+
154
+ for (c1=numcon_total; c1<numcon_total + numcon_subl; c1++)
155
+ {
156
+ /* If ctype1 is NP, it has to be an appositive to continue */
157
+ if ((x==4) && (post_process_match("MX#*", ctxt->constituent[c1].start_link)==0))
158
+ continue;
159
+
160
+ /* If ctype1 is X, and domain_type is t, it's an infinitive - skip it */
161
+ if ((x==2) && (ctxt->constituent[c1].domain_type=='t'))
162
+ continue;
163
+
164
+ /* If it's domain-type z, it's a subject-relative clause;
165
+ the VP doesn't need an NP */
166
+ if (ctxt->constituent[c1].domain_type=='z')
167
+ continue;
168
+
169
+ /* If ctype1 is X or VP, and it's not started by an S, don't generate an NP
170
+ (Neither of the two previous checks are necessary now, right?) */
171
+ if ((x==1 || x==2) &&
172
+ (((post_process_match("S", ctxt->constituent[c1].start_link) == 0) &&
173
+ (post_process_match("SX", ctxt->constituent[c1].start_link) == 0) &&
174
+ (post_process_match("SF", ctxt->constituent[c1].start_link) == 0)) ||
175
+ (post_process_match("S##w", ctxt->constituent[c1].start_link) != 0)))
176
+ continue;
177
+
178
+ /* If it's an SBAR (relative clause case), it has to be a relative clause */
179
+ if ((x==3) &&
180
+ ((post_process_match("Rn", ctxt->constituent[c1].start_link) == 0) &&
181
+ (post_process_match("R*", ctxt->constituent[c1].start_link) == 0) &&
182
+ (post_process_match("MX#r", ctxt->constituent[c1].start_link) == 0) &&
183
+ (post_process_match("Mr", ctxt->constituent[c1].start_link) == 0) &&
184
+ (post_process_match("MX#d", ctxt->constituent[c1].start_link) == 0)))
185
+ continue;
186
+
187
+ /* If ctype1 is SBAR (clause opener case), it has to be an f domain */
188
+ if ((x==5) && (ctxt->constituent[c1].domain_type!='f'))
189
+ continue;
190
+
191
+ /* If ctype1 is SBAR (pp opener case), it has to be a g domain */
192
+ if ((x==6) && (ctxt->constituent[c1].domain_type!='g'))
193
+ continue;
194
+
195
+ /* If ctype1 is NP (paraphrase case), it has to be started by an SI */
196
+ if ((x==7) && (post_process_match("SI", ctxt->constituent[c1].start_link)==0))
197
+ continue;
198
+
199
+ /* If ctype1 is VP (participle modifier case), it has to be
200
+ started by an Mv or Mg */
201
+ if ((x==8) && (post_process_match("M", ctxt->constituent[c1].start_link)==0))
202
+ continue;
203
+
204
+ /* If ctype1 is VP (participle opener case), it has
205
+ to be started by a COp */
206
+ if ((x==9) && (post_process_match("COp", ctxt->constituent[c1].start_link)==0))
207
+ continue;
208
+
209
+ /* Now start at the bounds of c1, and work outwards until you
210
+ find a larger constituent of type ctype2 */
211
+ if (!(strcmp(ctxt->constituent[c1].type, ctype1)==0))
212
+ continue;
213
+
214
+ if (verbosity >= 2)
215
+ printf("Generating complement constituent for c %d of type %s\n",
216
+ c1, ctype1);
217
+ done = 0;
218
+ for (w2=ctxt->constituent[c1].left; (done==0) && (w2>=0); w2--) {
219
+ for (w3=ctxt->constituent[c1].right; w3<linkage->num_words; w3++) {
220
+ for (c2=numcon_total; (done==0) &&
221
+ (c2 < numcon_total + numcon_subl); c2++) {
222
+ if (!((ctxt->constituent[c2].left==w2) &&
223
+ (ctxt->constituent[c2].right==w3)) || (c2==c1))
224
+ continue;
225
+ if (!(strcmp(ctxt->constituent[c2].type, ctype2)==0))
226
+ continue;
227
+
228
+ /* if the new constituent (c) is to the left
229
+ of c1, its right edge should be adjacent to the
230
+ left edge of c1 - or as close as possible
231
+ without going outside the current sublinkage.
232
+ (Or substituting right and left as necessary.) */
233
+
234
+ if ((x==5) || (x==6) || (x==9)) {
235
+ /* This is the case where c is to the
236
+ RIGHT of c1 */
237
+ w = ctxt->constituent[c1].right+1;
238
+ while(1) {
239
+ if (ctxt->word_used[linkage->current][w]==1)
240
+ break;
241
+ w++;
242
+ }
243
+ if (w > ctxt->constituent[c2].right)
244
+ {
245
+ done=1;
246
+ continue;
247
+ }
248
+ ctxt->constituent[c].left = w;
249
+ ctxt->constituent[c].right = ctxt->constituent[c2].right;
250
+ }
251
+ else {
252
+ w = ctxt->constituent[c1].left-1;
253
+ while(1) {
254
+ if (ctxt->word_used[linkage->current][w] == 1)
255
+ break;
256
+ w--;
257
+ }
258
+ if (w < ctxt->constituent[c2].left) {
259
+ done=1;
260
+ continue;
261
+ }
262
+ ctxt->constituent[c].right = w;
263
+ ctxt->constituent[c].left = ctxt->constituent[c2].left;
264
+ }
265
+
266
+ adjust_for_left_comma(ctxt, linkage, c1);
267
+ adjust_for_right_comma(ctxt, linkage, c1);
268
+
269
+ ctxt->constituent[c].type =
270
+ string_set_add(ctype3, ctxt->phrase_ss);
271
+ ctxt->constituent[c].domain_type = 'x';
272
+ ctxt->constituent[c].start_link =
273
+ string_set_add("XX", ctxt->phrase_ss);
274
+ ctxt->constituent[c].start_num =
275
+ ctxt->constituent[c1].start_num; /* bogus */
276
+ if (verbosity >= 2)
277
+ {
278
+ printf("Larger c found: c %d (%s); ",
279
+ c2, ctype2);
280
+ printf("Adding constituent:\n");
281
+ print_constituent(ctxt, linkage, c);
282
+ }
283
+ c++;
284
+ if (MAXCONSTITUENTS <= c)
285
+ {
286
+ err_ctxt ec;
287
+ ec.sent = linkage->sent;
288
+ err_msg(&ec, Error, "Error: Too many constituents (a).\n");
289
+ c--;
290
+ }
291
+ done = 1;
292
+ }
293
+ }
294
+ }
295
+ if (verbosity >= 2)
296
+ {
297
+ if (done == 0)
298
+ printf("No constituent added, because no larger %s " \
299
+ " was found\n", ctype2);
300
+ }
301
+ }
302
+ numcon_subl = c - numcon_total;
303
+ return numcon_subl;
304
+ }
305
+
306
+ /**
307
+ * Look for a constituent started by an MVs or MVg.
308
+ * Find any VP's or ADJP's that contain it (without going
309
+ * beyond a larger S or NP). Adjust them so that
310
+ * they end right before the m domain starts.
311
+ */
312
+ static void adjust_subordinate_clauses(con_context_t *ctxt, Linkage linkage,
313
+ int numcon_total,
314
+ int numcon_subl)
315
+ {
316
+ int c, w, c2, w2, done;
317
+
318
+ for (c=numcon_total; c<numcon_total + numcon_subl; c++) {
319
+ if ((post_process_match("MVs", ctxt->constituent[c].start_link) == 1) ||
320
+ (post_process_match("MVg", ctxt->constituent[c].start_link)==1)) {
321
+ done=0;
322
+ for (w2=ctxt->constituent[c].left-1; (done==0) && w2>=0; w2--) {
323
+ for (c2=numcon_total; c2<numcon_total + numcon_subl; c2++) {
324
+ if (!((ctxt->constituent[c2].left==w2) &&
325
+ (ctxt->constituent[c2].right >= ctxt->constituent[c].right)))
326
+ continue;
327
+ if ((strcmp(ctxt->constituent[c2].type, "S") == 0) ||
328
+ (strcmp(ctxt->constituent[c2].type, "NP") == 0)) {
329
+ done=1;
330
+ break;
331
+ }
332
+ if ((ctxt->constituent[c2].domain_type == 'v') ||
333
+ (ctxt->constituent[c2].domain_type == 'a')) {
334
+ w = ctxt->constituent[c].left-1;
335
+ while (1) {
336
+ if (ctxt->word_used[linkage->current][w] == 1) break;
337
+ w--;
338
+ }
339
+ ctxt->constituent[c2].right = w;
340
+
341
+ if (verbosity >= 2)
342
+ printf("Adjusting constituent %d:\n", c2);
343
+ print_constituent(ctxt, linkage, c2);
344
+ }
345
+ }
346
+ }
347
+ if (strcmp(linkage->word[ctxt->constituent[c].left], ",") == 0)
348
+ ctxt->constituent[c].left++;
349
+ }
350
+ }
351
+ }
352
+
353
+ /******************************************************
354
+ * These functions are called once, after constituents
355
+ * for each sublinkage have been generated, to merge them
356
+ * together and fix up some other things.
357
+ *
358
+ ********************************************************/
359
+
360
+ /**
361
+ * Here we're looking for the next andlist element to add on
362
+ * to a conjectural andlist, stored in the array templist.
363
+ * We go through the constituents, starting at "start".
364
+ */
365
+ static int find_next_element(con_context_t *ctxt,
366
+ Linkage linkage,
367
+ int start,
368
+ int numcon_total,
369
+ int num_elements,
370
+ int num_lists)
371
+ {
372
+ int c, a, ok, c2, c3, addedone=0, n;
373
+
374
+ assert(num_elements <= MAX_ELTS, "Constutent element array overflow!\n");
375
+
376
+ n = num_lists;
377
+ for (c=start+1; c<numcon_total; c++)
378
+ {
379
+ constituent_t *cc = &ctxt->constituent[c];
380
+
381
+ if (cc->valid == 0)
382
+ continue;
383
+ if (strcmp(ctxt->constituent[ctxt->templist[0]].type, cc->type)!=0)
384
+ continue;
385
+ ok = 1;
386
+
387
+ /* We're considering adding constituent c to the andlist.
388
+ If c is in the same sublinkage as one of the other andlist
389
+ elements, don't add it. If it overlaps with one of the other
390
+ constituents, don't add it. If there's a constituent
391
+ identical to c that occurs in a sublinkage in which one of
392
+ the other elements occurs, don't add it. */
393
+
394
+ for (a=0; a<num_elements; a++)
395
+ {
396
+ int t = ctxt->templist[a];
397
+ constituent_t *ct = &ctxt->constituent[t];
398
+
399
+ if (cc->subl == ct->subl)
400
+ ok=0;
401
+ if (((cc->left < ct->left) && (cc->right > ct->left))
402
+ ||
403
+ ((cc->right > ct->right) && (cc->left < ct->right))
404
+ ||
405
+ ((cc->right > ct->right) && (cc->left < ct->right))
406
+ ||
407
+ ((cc->left > ct->left) && (cc->right < ct->right)))
408
+ ok=0;
409
+
410
+ for (c2=0; c2<numcon_total; c2++)
411
+ {
412
+ if (ctxt->constituent[c2].canon != cc->canon)
413
+ continue;
414
+ for (c3=0; c3<numcon_total; c3++)
415
+ {
416
+ if ((ctxt->constituent[c3].canon == ct->canon)
417
+ && (ctxt->constituent[c3].subl == ctxt->constituent[c2].subl))
418
+ ok=0;
419
+ }
420
+ }
421
+ }
422
+ if (ok == 0) continue;
423
+
424
+ ctxt->templist[num_elements] = c;
425
+ addedone = 1;
426
+ num_lists = find_next_element(ctxt, linkage, c, numcon_total,
427
+ num_elements+1, num_lists);
428
+
429
+ /* Test for overlow of the and-list.
430
+ * With the current parser, the following will cause an
431
+ * overflow:
432
+ *
433
+ * I have not seen the grysbok, or the suni, or the dibitag, or
434
+ * the lechwi, or the aoul, or the gerenuk, or the blaauwbok,
435
+ * or the chevrotain, or lots of others, but who in the world
436
+ * could guess what they were or what they looked like, judging
437
+ * only from the names?
438
+ */
439
+ if (MAX_ANDS <= num_lists)
440
+ {
441
+ err_ctxt ec;
442
+ ec.sent = linkage->sent;
443
+ err_msg(&ec, Error, "Error: Constituent overflowed andlist!\n");
444
+ return MAX_ANDS;
445
+ }
446
+ }
447
+
448
+ if (addedone == 0 && num_elements > 1)
449
+ {
450
+ for (a=0; a<num_elements; a++) {
451
+ ctxt->andlist[num_lists].e[a] = ctxt->templist[a];
452
+ ctxt->andlist[num_lists].num = num_elements;
453
+ }
454
+ num_lists++;
455
+ }
456
+ return num_lists;
457
+ }
458
+
459
+ static int merge_constituents(con_context_t *ctxt, Linkage linkage, int numcon_total)
460
+ {
461
+ int c1, c2=0, c3, ok, a, n, a2, n2, match, listmatch, a3;
462
+ int num_lists, num_elements;
463
+ int leftend, rightend;
464
+
465
+ for (c1=0; c1<numcon_total; c1++)
466
+ {
467
+ ctxt->constituent[c1].valid = 1;
468
+
469
+ /* Find and invalidate any constituents with negative length */
470
+ if(ctxt->constituent[c1].right < ctxt->constituent[c1].left)
471
+ {
472
+ if(verbosity >= 2)
473
+ {
474
+ err_ctxt ec;
475
+ ec.sent = linkage->sent;
476
+ err_msg(&ec, Warn,
477
+ "Warning: Constituent %d has negative length. Deleting it.\n", c1);
478
+ }
479
+ ctxt->constituent[c1].valid = 0;
480
+ }
481
+ ctxt->constituent[c1].canon = c1;
482
+ }
483
+
484
+ /* First go through and give each constituent a canonical number
485
+ (the index number of the lowest-numbered constituent
486
+ identical to it) */
487
+
488
+ for (c1 = 0; c1 < numcon_total; c1++)
489
+ {
490
+ if (ctxt->constituent[c1].canon != c1) continue;
491
+ for (c2 = c1 + 1; c2 < numcon_total; c2++)
492
+ {
493
+ if ((ctxt->constituent[c1].left == ctxt->constituent[c2].left) &&
494
+ (ctxt->constituent[c1].right == ctxt->constituent[c2].right) &&
495
+ (strcmp(ctxt->constituent[c1].type, ctxt->constituent[c2].type) == 0))
496
+ {
497
+ ctxt->constituent[c2].canon = c1;
498
+ }
499
+ }
500
+ }
501
+
502
+ /* If constituents A and B in different sublinkages X and Y
503
+ * have one endpoint in common, but A is larger at the other end,
504
+ * and B has no duplicate in X, then declare B invalid. (Example:
505
+ * " [A [B We saw the cat B] and the dog A] "
506
+ */
507
+ for (c1 = 0; c1 < numcon_total; c1++)
508
+ {
509
+ if (ctxt->constituent[c1].valid == 0) continue;
510
+ for (c2 = 0; c2 < numcon_total; c2++)
511
+ {
512
+ if (ctxt->constituent[c2].subl == ctxt->constituent[c1].subl) continue;
513
+ ok = 1;
514
+ /* Does c2 have a duplicate in the sublinkage containing c1?
515
+ If so, bag it */
516
+ for (c3 = 0; c3 < numcon_total; c3++)
517
+ {
518
+ if ((ctxt->constituent[c2].canon == ctxt->constituent[c3].canon) &&
519
+ (ctxt->constituent[c3].subl == ctxt->constituent[c1].subl))
520
+ ok = 0;
521
+ }
522
+ for (c3 = 0; c3 < numcon_total; c3++)
523
+ {
524
+ if ((ctxt->constituent[c1].canon == ctxt->constituent[c3].canon) &&
525
+ (ctxt->constituent[c3].subl == ctxt->constituent[c2].subl))
526
+ ok = 0;
527
+ }
528
+ if (ok == 0) continue;
529
+ if ((ctxt->constituent[c1].left == ctxt->constituent[c2].left) &&
530
+ (ctxt->constituent[c1].right > ctxt->constituent[c2].right) &&
531
+ (strcmp(ctxt->constituent[c1].type, ctxt->constituent[c2].type) == 0))
532
+ {
533
+ ctxt->constituent[c2].valid = 0;
534
+ }
535
+
536
+ if ((ctxt->constituent[c1].left < ctxt->constituent[c2].left) &&
537
+ (ctxt->constituent[c1].right == ctxt->constituent[c2].right) &&
538
+ (strcmp(ctxt->constituent[c1].type, ctxt->constituent[c2].type) == 0))
539
+ {
540
+ ctxt->constituent[c2].valid = 0;
541
+ }
542
+ }
543
+ }
544
+
545
+ /* Now go through and find duplicates; if a pair is found,
546
+ * mark one as invalid. (It doesn't matter if they're in the
547
+ * same sublinkage or not)
548
+ */
549
+ for (c1 = 0; c1 < numcon_total; c1++)
550
+ {
551
+ if (ctxt->constituent[c1].valid == 0) continue;
552
+ for (c2 = c1 + 1; c2 < numcon_total; c2++)
553
+ {
554
+ if (ctxt->constituent[c2].canon == ctxt->constituent[c1].canon)
555
+ ctxt->constituent[c2].valid = 0;
556
+ }
557
+ }
558
+
559
+ /* Now we generate the and-lists. An and-list is a set of mutually
560
+ * exclusive constituents. Each constituent in the list may not
561
+ * be present in the same sublinkage as any of the others.
562
+ */
563
+ num_lists = 0;
564
+ for (c1 = 0; c1 < numcon_total; c1++)
565
+ {
566
+ if (ctxt->constituent[c1].valid == 0) continue;
567
+ num_elements = 1;
568
+ ctxt->templist[0] = c1;
569
+ num_lists = find_next_element(ctxt, linkage, c1, numcon_total,
570
+ num_elements, num_lists);
571
+
572
+ /* If we're overflowing, then punt */
573
+ if (MAX_ANDS <= num_lists)
574
+ break;
575
+ }
576
+
577
+ if (verbosity >= 2)
578
+ {
579
+ printf("And-lists:\n");
580
+ for (n=0; n<num_lists; n++)
581
+ {
582
+ printf(" %d: ", n);
583
+ for (a=0; a < ctxt->andlist[n].num; a++)
584
+ {
585
+ printf("%d ", ctxt->andlist[n].e[a]);
586
+ }
587
+ printf("\n");
588
+ }
589
+ }
590
+
591
+ /* Now we prune out any andlists that are subsumed by other
592
+ * andlists--e.g. if andlist X contains constituents A and B,
593
+ * and Y contains A B and C, we throw out X
594
+ */
595
+ for (n = 0; n < num_lists; n++)
596
+ {
597
+ ctxt->andlist[n].valid = 1;
598
+ for (n2 = 0; n2 < num_lists; n2++)
599
+ {
600
+ if (n2 == n) continue;
601
+ if (ctxt->andlist[n2].num < ctxt->andlist[n].num)
602
+ continue;
603
+
604
+ listmatch = 1;
605
+ for (a = 0; a < ctxt->andlist[n].num; a++)
606
+ {
607
+ match = 0;
608
+ for (a2 = 0; a2 < ctxt->andlist[n2].num; a2++)
609
+ {
610
+ if (ctxt->andlist[n2].e[a2] == ctxt->andlist[n].e[a])
611
+ match = 1;
612
+ }
613
+ if (match == 0) listmatch = 0;
614
+ /* At least one element was not matched by n2 */
615
+ }
616
+ if (listmatch == 1) ctxt->andlist[n].valid = 0;
617
+ }
618
+ }
619
+
620
+ /* If an element of an andlist contains an element of another
621
+ * andlist, it must contain the entire andlist.
622
+ */
623
+ for (n = 0; n < num_lists; n++)
624
+ {
625
+ if (ctxt->andlist[n].valid == 0)
626
+ continue;
627
+ for (a = 0; (a < ctxt->andlist[n].num) && (ctxt->andlist[n].valid); a++)
628
+ {
629
+ for (n2 = 0; (n2 < num_lists) && (ctxt->andlist[n].valid); n2++)
630
+ {
631
+ if ((n2 == n) || (ctxt->andlist[n2].valid == 0))
632
+ continue;
633
+ for (a2 = 0; (a2 < ctxt->andlist[n2].num) && (ctxt->andlist[n].valid); a2++)
634
+ {
635
+ c1 = ctxt->andlist[n].e[a];
636
+ c2 = ctxt->andlist[n2].e[a2];
637
+ if (c1 == c2)
638
+ continue;
639
+ if (!((ctxt->constituent[c2].left <= ctxt->constituent[c1].left) &&
640
+ (ctxt->constituent[c2].right >= ctxt->constituent[c1].right)))
641
+ continue;
642
+ if (verbosity >= 2)
643
+ printf("Found that c%d in list %d is bigger " \
644
+ "than c%d in list %d\n", c2, n2, c1, n);
645
+ ok = 1;
646
+
647
+ /* An element of n2 contains an element of n.
648
+ * Now, we check to see if that element of n2
649
+ * contains ALL the elements of n.
650
+ * If not, n is invalid.
651
+ */
652
+ for (a3 = 0; a3 < ctxt->andlist[n].num; a3++)
653
+ {
654
+ c3 = ctxt->andlist[n].e[a3];
655
+ if ((ctxt->constituent[c2].left>ctxt->constituent[c3].left) ||
656
+ (ctxt->constituent[c2].right<ctxt->constituent[c3].right))
657
+ ok = 0;
658
+ }
659
+ if (ok != 0)
660
+ continue;
661
+ ctxt->andlist[n].valid = 0;
662
+ if (verbosity >= 2)
663
+ {
664
+ printf("Eliminating andlist, " \
665
+ "n=%d, a=%d, n2=%d, a2=%d: ",
666
+ n, a, n2, a2);
667
+ for (a3 = 0; a3 < ctxt->andlist[n].num; a3++)
668
+ {
669
+ printf("%d ", ctxt->andlist[n].e[a3]);
670
+ }
671
+ printf("\n");
672
+ }
673
+ }
674
+ }
675
+ }
676
+ }
677
+
678
+ if (verbosity >= 2)
679
+ {
680
+ printf("And-lists after pruning:\n");
681
+ for (n=0; n<num_lists; n++) {
682
+ if (ctxt->andlist[n].valid==0)
683
+ continue;
684
+ printf(" %d: ", n);
685
+ for (a=0; a<ctxt->andlist[n].num; a++) {
686
+ printf("%d ", ctxt->andlist[n].e[a]);
687
+ }
688
+ printf("\n");
689
+ }
690
+ }
691
+
692
+ c1 = numcon_total;
693
+ for (n = 0; n < num_lists; n++)
694
+ {
695
+ if (ctxt->andlist[n].valid == 0) continue;
696
+ leftend = 256;
697
+ rightend = -1;
698
+ for (a = 0; a < ctxt->andlist[n].num; a++)
699
+ {
700
+ c2 = ctxt->andlist[n].e[a];
701
+ if (ctxt->constituent[c2].left < leftend)
702
+ {
703
+ leftend = ctxt->constituent[c2].left;
704
+ }
705
+ if (ctxt->constituent[c2].right > rightend)
706
+ {
707
+ rightend=ctxt->constituent[c2].right;
708
+ }
709
+ }
710
+
711
+ ctxt->constituent[c1].left = leftend;
712
+ ctxt->constituent[c1].right = rightend;
713
+ ctxt->constituent[c1].type = ctxt->constituent[c2].type;
714
+ ctxt->constituent[c1].domain_type = 'x';
715
+ ctxt->constituent[c1].valid = 1;
716
+ ctxt->constituent[c1].start_link = ctxt->constituent[c2].start_link; /* bogus */
717
+ ctxt->constituent[c1].start_num = ctxt->constituent[c2].start_num; /* bogus */
718
+
719
+ #ifdef AUX_CODE_IS_DEAD /* See comments above */
720
+ /* If a constituent within the andlist is an aux (aux==1),
721
+ * set aux for the whole-list constituent to 2, also set
722
+ * aux for the smaller constituent to 2, meaning they'll both
723
+ * be printed (as an "X"). (If aux is 2 for the smaller
724
+ * constituent going in, the same thing should be done,
725
+ * though I doubt this ever happens.)
726
+ */
727
+ for (a = 0; a < ctxt->andlist[n].num; a++)
728
+ {
729
+ c2 = ctxt->andlist[n].e[a];
730
+ if ((ctxt->constituent[c2].aux == 1) || (ctxt->constituent[c2].aux == 2))
731
+ {
732
+ ctxt->constituent[c1].aux = 2;
733
+ ctxt->constituent[c2].aux = 2;
734
+ }
735
+ }
736
+ #endif /* AUX_CODE_IS_DEAD */
737
+
738
+ if (verbosity >= 2)
739
+ printf("Adding constituent:\n");
740
+ print_constituent(ctxt, linkage, c1);
741
+ c1++;
742
+ }
743
+ numcon_total = c1;
744
+ return numcon_total;
745
+ }
746
+
747
+ /**
748
+ * Go through all the words. If a word is on the right end of
749
+ * an S (or SF or SX), wordtype[w]=STYPE. If it's also on the left end of a
750
+ * Pg*b, I, PP, or Pv, wordtype[w]=PTYPE. If it's a question-word
751
+ * used in an indirect question, wordtype[w]=QTYPE. If it's a
752
+ * question-word determiner, wordtype[w]=QDTYPE. Else wordtype[w]=NONE.
753
+ * (This function is called once for each sublinkage.)
754
+ */
755
+ static void generate_misc_word_info(con_context_t * ctxt, Linkage linkage)
756
+ {
757
+ int l1, l2, w1, w2;
758
+ const char * label1, * label2;
759
+
760
+ for (w1=0; w1<linkage->num_words; w1++)
761
+ ctxt->wordtype[w1]=NONE;
762
+
763
+ for (l1=0; l1<linkage_get_num_links(linkage); l1++) {
764
+ w1=linkage_get_link_rword(linkage, l1);
765
+ label1 = linkage_get_link_label(linkage, l1);
766
+ if ((uppercompare(label1, "S")==0) ||
767
+ (uppercompare(label1, "SX")==0) ||
768
+ (uppercompare(label1, "SF")==0)) {
769
+ ctxt->wordtype[w1] = STYPE;
770
+ for (l2=0; l2<linkage_get_num_links(linkage); l2++) {
771
+ w2=linkage_get_link_lword(linkage, l2);
772
+ label2 = linkage_get_link_label(linkage, l2);
773
+ if ((w1==w2) &&
774
+ ((post_process_match("Pg#b", label2)==1) ||
775
+ (uppercompare(label2, "I")==0) ||
776
+ (uppercompare(label2, "PP")==0) ||
777
+ (post_process_match("Pv", label2)==1))) {
778
+ /* Pvf, Pgf? */
779
+ ctxt->wordtype[w1] = PTYPE;
780
+ }
781
+ }
782
+ }
783
+ if (post_process_match("QI#d", label1)==1) {
784
+ ctxt->wordtype[w1] = QTYPE;
785
+ for (l2=0; l2<linkage_get_num_links(linkage); l2++) {
786
+ w2=linkage_get_link_lword(linkage, l2);
787
+ label2 = linkage_get_link_label(linkage, l2);
788
+ if ((w1==w2) && (post_process_match("D##w", label2)==1)) {
789
+ ctxt->wordtype[w1] = QDTYPE;
790
+ }
791
+ }
792
+ }
793
+ if (post_process_match("Mr", label1)==1) ctxt->wordtype[w1] = QDTYPE;
794
+ if (post_process_match("MX#d", label1)==1) ctxt->wordtype[w1] = QDTYPE;
795
+ }
796
+ }
797
+
798
+ static int last_minute_fixes(con_context_t *ctxt, Linkage linkage, int numcon_total)
799
+ {
800
+ int c, c2, global_leftend_found, adjustment_made,
801
+ global_rightend_found, lastword, newcon_total = 0;
802
+ Sentence sent;
803
+ sent = linkage_get_sentence(linkage);
804
+
805
+ for (c = 0; c < numcon_total; c++)
806
+ {
807
+ /* In a paraphrase construction ("John ran, he said"),
808
+ the paraphrasing clause doesn't get
809
+ an S. (This is true in Treebank II, not Treebank I) */
810
+
811
+ if (uppercompare(ctxt->constituent[c].start_link, "CP") == 0)
812
+ {
813
+ ctxt->constituent[c].valid = 0;
814
+ }
815
+
816
+ /* If it's a possessive with an "'s", the NP on the left
817
+ should be extended to include the "'s". */
818
+ if ((uppercompare(ctxt->constituent[c].start_link, "YS") == 0) ||
819
+ (uppercompare(ctxt->constituent[c].start_link, "YP") == 0))
820
+ {
821
+ ctxt->constituent[c].right++;
822
+ }
823
+
824
+ /* If a constituent has starting link MVpn, it's a time
825
+ expression like "last week"; label it as a noun phrase
826
+ (incorrectly) */
827
+
828
+ if (strcmp(ctxt->constituent[c].start_link, "MVpn") == 0)
829
+ {
830
+ ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
831
+ }
832
+ if (strcmp(ctxt->constituent[c].start_link, "COn") == 0)
833
+ {
834
+ ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
835
+ }
836
+ if (strcmp(ctxt->constituent[c].start_link, "Mpn") == 0)
837
+ {
838
+ ctxt->constituent[c].type = string_set_add("NP", ctxt->phrase_ss);
839
+ }
840
+
841
+ /* If the constituent is an S started by "but" or "and" at
842
+ the beginning of the sentence, it should be ignored. */
843
+
844
+ if ((strcmp(ctxt->constituent[c].start_link, "Wdc") == 0) &&
845
+ (ctxt->constituent[c].left == 2))
846
+ {
847
+ ctxt->constituent[c].valid = 0;
848
+ }
849
+
850
+ /* For prenominal adjectives, an ADJP constituent is assigned
851
+ if it's a hyphenated (Ah) or comparative (Am) adjective;
852
+ otherwise no ADJP is assigned, unless the phrase is more
853
+ than one word long (e.g. "very big"). The same with certain
854
+ types of adverbs. */
855
+ /* That was for Treebank I. For Treebank II, the rule only
856
+ seems to apply to prenominal adjectives (of all kinds).
857
+ However, it also applies to number expressions ("QP"). */
858
+
859
+ if ((post_process_match("A", ctxt->constituent[c].start_link) == 1) ||
860
+ (ctxt->constituent[c].domain_type == 'd') ||
861
+ (ctxt->constituent[c].domain_type == 'h')) {
862
+ if (ctxt->constituent[c].right-ctxt->constituent[c].left == 0)
863
+ {
864
+ ctxt->constituent[c].valid = 0;
865
+ }
866
+ }
867
+
868
+ if ((ctxt->constituent[c].domain_type == 'h') &&
869
+ (strcmp(linkage->word[ctxt->constituent[c].left - 1], "$") == 0))
870
+ {
871
+ ctxt->constituent[c].left--;
872
+ }
873
+
874
+ #ifdef AUX_CODE_IS_DEAD /* See comments at top */
875
+ /* If a constituent has type VP and its aux value is 2,
876
+ this means it's an aux that should be printed; change its
877
+ type to "X". If its aux value is 1, set "valid" to 0. (This
878
+ applies to Treebank I only) */
879
+
880
+ if (ctxt->constituent[c].aux == 2)
881
+ {
882
+ ctxt->constituent[c].type = string_set_add("X", ctxt->phrase_ss);
883
+ }
884
+ if (ctxt->constituent[c].aux == 1)
885
+ {
886
+ ctxt->constituent[c].valid = 0;
887
+ }
888
+ #endif /* AUX_CODE_IS_DEAD */
889
+ }
890
+
891
+ numcon_total = numcon_total + newcon_total;
892
+
893
+ /* If there's a global S constituent that includes everything
894
+ except a final period or question mark, extend it by one word */
895
+
896
+ for (c = 0; c < numcon_total; c++)
897
+ {
898
+ if ((ctxt->constituent[c].right == linkage->num_words -3) &&
899
+ (ctxt->constituent[c].left == 1) &&
900
+ (strcmp(ctxt->constituent[c].type, "S") == 0) &&
901
+ (strcmp(sent->word[linkage->num_words -2].string, ".") == 0))
902
+ ctxt->constituent[c].right++;
903
+ }
904
+
905
+ /* If there's no S boundary at the very left end of the sentence,
906
+ or the very right end, create a new S spanning the entire sentence */
907
+
908
+ lastword = linkage->num_words - 2;
909
+ global_leftend_found = 0;
910
+ global_rightend_found = 0;
911
+ for (c = 0; c < numcon_total; c++)
912
+ {
913
+ if ((ctxt->constituent[c].left == 1) && (strcmp(ctxt->constituent[c].type, "S") == 0) &&
914
+ (ctxt->constituent[c].valid == 1))
915
+ {
916
+ global_leftend_found = 1;
917
+ }
918
+ }
919
+ for (c = 0; c < numcon_total; c++)
920
+ {
921
+ if ((ctxt->constituent[c].right >= lastword) &&
922
+ (strcmp(ctxt->constituent[c].type, "S") == 0) && (ctxt->constituent[c].valid == 1))
923
+ {
924
+ global_rightend_found = 1;
925
+ }
926
+ }
927
+ if ((global_leftend_found == 0) || (global_rightend_found == 0))
928
+ {
929
+ c = numcon_total;
930
+ ctxt->constituent[c].left = 1;
931
+ ctxt->constituent[c].right = linkage->num_words-1;
932
+ ctxt->constituent[c].type = string_set_add("S", ctxt->phrase_ss);
933
+ ctxt->constituent[c].valid = 1;
934
+ ctxt->constituent[c].domain_type = 'x';
935
+ numcon_total++;
936
+ if (verbosity >= 2)
937
+ printf("Adding global sentence constituent:\n");
938
+ print_constituent(ctxt, linkage, c);
939
+ }
940
+
941
+ /* Check once more to see if constituents are nested (checking BETWEEN sublinkages
942
+ this time) */
943
+
944
+ while (1)
945
+ {
946
+ adjustment_made=0;
947
+ for (c = 0; c < numcon_total; c++)
948
+ {
949
+ if(ctxt->constituent[c].valid == 0) continue;
950
+ for (c2 = 0; c2 < numcon_total; c2++)
951
+ {
952
+ if(ctxt->constituent[c2].valid == 0) continue;
953
+ if ((ctxt->constituent[c].left < ctxt->constituent[c2].left) &&
954
+ (ctxt->constituent[c].right < ctxt->constituent[c2].right) &&
955
+ (ctxt->constituent[c].right >= ctxt->constituent[c2].left))
956
+ {
957
+ if (verbosity >= 2)
958
+ {
959
+ err_ctxt ec;
960
+ ec.sent = linkage->sent;
961
+ err_msg(&ec, Warn, "Warning: the constituents aren't nested! "
962
+ "Adjusting them. (%d, %d)\n", c, c2);
963
+ }
964
+ ctxt->constituent[c].left = ctxt->constituent[c2].left;
965
+ }
966
+ }
967
+ }
968
+ if (adjustment_made == 0) break;
969
+ }
970
+ return numcon_total;
971
+ }
972
+
973
+ /**
974
+ * This function generates a table, word_used[i][w], showing
975
+ * whether each word w is used in each sublinkage i; if so,
976
+ * the value for that cell of the table is 1.
977
+ */
978
+ static void count_words_used(con_context_t *ctxt, Linkage linkage)
979
+ {
980
+ int i, w, link, num_subl;
981
+
982
+ num_subl = linkage->num_sublinkages;
983
+ if(linkage->unionized == 1 && num_subl > 1) num_subl--;
984
+
985
+ if (verbosity >= 2)
986
+ printf("Number of sublinkages = %d\n", num_subl);
987
+
988
+ for (i=0; i<num_subl; i++)
989
+ {
990
+ for (w = 0; w < linkage->num_words; w++) ctxt->word_used[i][w] = 0;
991
+ linkage->current = i;
992
+ for (link = 0; link < linkage_get_num_links(linkage); link++)
993
+ {
994
+ ctxt->word_used[i][linkage_get_link_lword(linkage, link)] = 1;
995
+ ctxt->word_used[i][linkage_get_link_rword(linkage, link)] = 1;
996
+ }
997
+ if (verbosity >= 2)
998
+ {
999
+ printf("Sublinkage %d: ", i);
1000
+ for (w = 0; w < linkage->num_words; w++)
1001
+ {
1002
+ if (ctxt->word_used[i][w] == 0) printf("0 ");
1003
+ if (ctxt->word_used[i][w] == 1) printf("1 ");
1004
+ }
1005
+ printf("\n");
1006
+ }
1007
+ }
1008
+ }
1009
+
1010
+ static int add_constituent(con_context_t *ctxt, int c, Linkage linkage, Domain domain,
1011
+ int l, int r, const char * name)
1012
+ {
1013
+ int nwords = linkage->num_words-2;
1014
+ c++;
1015
+
1016
+ /* Avoid running off end, to walls. */
1017
+ if (l < 1) l=1;
1018
+ if (r > nwords) r = nwords;
1019
+ if (l > nwords) l = nwords;
1020
+ assert(l <= r, "negative constituent length!" );
1021
+
1022
+ ctxt->constituent[c].left = l;
1023
+ ctxt->constituent[c].right = r;
1024
+ ctxt->constituent[c].domain_type = domain.type;
1025
+ ctxt->constituent[c].start_link =
1026
+ linkage_get_link_label(linkage, domain.start_link);
1027
+ ctxt->constituent[c].start_num = domain.start_link;
1028
+ ctxt->constituent[c].type = string_set_add(name, ctxt->phrase_ss);
1029
+ return c;
1030
+ }
1031
+
1032
+ static const char * cons_of_domain(Linkage linkage, char domain_type)
1033
+ {
1034
+ switch (domain_type) {
1035
+ case 'a':
1036
+ return "ADJP";
1037
+ case 'b':
1038
+ return "SBAR";
1039
+ case 'c':
1040
+ return "VP";
1041
+ case 'd':
1042
+ return "QP";
1043
+ case 'e':
1044
+ return "ADVP";
1045
+ case 'f':
1046
+ return "SBAR";
1047
+ case 'g':
1048
+ return "PP";
1049
+ case 'h':
1050
+ return "QP";
1051
+ case 'i':
1052
+ return "ADVP";
1053
+ case 'k':
1054
+ return "PRT";
1055
+ case 'n':
1056
+ return "NP";
1057
+ case 'p':
1058
+ return "PP";
1059
+ case 'q':
1060
+ return "SINV";
1061
+ case 's':
1062
+ return "S";
1063
+ case 't':
1064
+ return "VP";
1065
+ case 'u':
1066
+ return "ADJP";
1067
+ case 'v':
1068
+ return "VP";
1069
+ case 'y':
1070
+ return "NP";
1071
+ case 'z':
1072
+ return "VP";
1073
+ default:
1074
+ {
1075
+ err_ctxt ec;
1076
+ ec.sent = linkage->sent;
1077
+ err_msg(&ec, Error, "Error: Illegal domain: %c\n", domain_type);
1078
+ return "";
1079
+ }
1080
+ }
1081
+ }
1082
+
1083
+ static int read_constituents_from_domains(con_context_t *ctxt, Linkage linkage,
1084
+ int numcon_total, int s)
1085
+ {
1086
+ int d, c, leftlimit, l, leftmost, rightmost, w, c2, numcon_subl = 0, w2;
1087
+ List_o_links * dlink;
1088
+ int rootright, rootleft, adjustment_made;
1089
+ Sublinkage * subl;
1090
+ const char * name;
1091
+ Domain domain;
1092
+
1093
+ subl = &linkage->sublinkage[s];
1094
+
1095
+ for (d = 0, c = numcon_total; d < subl->pp_data.N_domains; d++, c++)
1096
+ {
1097
+ domain = subl->pp_data.domain_array[d];
1098
+ rootright = linkage_get_link_rword(linkage, domain.start_link);
1099
+ rootleft = linkage_get_link_lword(linkage, domain.start_link);
1100
+
1101
+ if ((domain.type=='c') ||
1102
+ (domain.type=='d') ||
1103
+ (domain.type=='e') ||
1104
+ (domain.type=='f') ||
1105
+ (domain.type=='g') ||
1106
+ (domain.type=='u') ||
1107
+ (domain.type=='y'))
1108
+ {
1109
+ leftlimit = 0;
1110
+ leftmost = linkage_get_link_lword(linkage, domain.start_link);
1111
+ rightmost = linkage_get_link_lword(linkage, domain.start_link);
1112
+ }
1113
+ else
1114
+ {
1115
+ leftlimit = linkage_get_link_lword(linkage, domain.start_link) + 1;
1116
+ leftmost = linkage_get_link_rword(linkage, domain.start_link);
1117
+ rightmost = linkage_get_link_rword(linkage, domain.start_link);
1118
+ }
1119
+
1120
+ /* Start by assigning both left and right limits to the
1121
+ * right word of the start link. This will always be contained
1122
+ * in the constituent. This will also handle the case
1123
+ * where the domain contains no links.
1124
+ */
1125
+ for (dlink = domain.lol; dlink != NULL; dlink = dlink->next)
1126
+ {
1127
+ l = dlink->link;
1128
+
1129
+ if ((linkage_get_link_lword(linkage, l) < leftmost) &&
1130
+ (linkage_get_link_lword(linkage, l) >= leftlimit))
1131
+ {
1132
+ leftmost = linkage_get_link_lword(linkage, l);
1133
+ }
1134
+
1135
+ if (linkage_get_link_rword(linkage, l) > rightmost)
1136
+ {
1137
+ rightmost = linkage_get_link_rword(linkage, l);
1138
+ }
1139
+ }
1140
+
1141
+ c--;
1142
+ c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost,
1143
+ cons_of_domain(linkage, domain.type));
1144
+
1145
+ if (domain.type == 'z')
1146
+ {
1147
+ c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "S");
1148
+ }
1149
+ if (domain.type=='c')
1150
+ {
1151
+ c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "S");
1152
+ }
1153
+ if ((post_process_match("Ce*", ctxt->constituent[c].start_link)==1) ||
1154
+ (post_process_match("Rn", ctxt->constituent[c].start_link)==1))
1155
+ {
1156
+ c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "SBAR");
1157
+ }
1158
+ if ((post_process_match("R*", ctxt->constituent[c].start_link)==1) ||
1159
+ (post_process_match("MX#r", ctxt->constituent[c].start_link)==1))
1160
+ {
1161
+ w = leftmost;
1162
+ if (strcmp(linkage->word[w], ",") == 0) w++;
1163
+ c = add_constituent(ctxt, c, linkage, domain, w, w, "WHNP");
1164
+ }
1165
+ if (post_process_match("Mj", ctxt->constituent[c].start_link) == 1)
1166
+ {
1167
+ w = leftmost;
1168
+ if (strcmp(linkage->word[w], ",") == 0) w++;
1169
+ c = add_constituent(ctxt, c, linkage, domain, w, w+1, "WHPP");
1170
+ c = add_constituent(ctxt, c, linkage, domain, w+1, w+1, "WHNP");
1171
+ }
1172
+ if ((post_process_match("Ss#d", ctxt->constituent[c].start_link)==1) ||
1173
+ (post_process_match("B#d", ctxt->constituent[c].start_link)==1))
1174
+ {
1175
+ c = add_constituent(ctxt, c, linkage, domain, rootleft, rootleft, "WHNP");
1176
+ c = add_constituent(ctxt, c, linkage, domain,
1177
+ rootleft, ctxt->constituent[c-1].right, "SBAR");
1178
+ }
1179
+ if (post_process_match("CP", ctxt->constituent[c].start_link)==1)
1180
+ {
1181
+ if (strcmp(linkage->word[leftmost], ",") == 0)
1182
+ ctxt->constituent[c].left++;
1183
+ c = add_constituent(ctxt, c, linkage, domain, 1, linkage->num_words-1, "S");
1184
+ }
1185
+ if ((post_process_match("MVs", ctxt->constituent[c].start_link)==1) ||
1186
+ (domain.type=='f'))
1187
+ {
1188
+ w = ctxt->constituent[c].left;
1189
+ if (strcmp(linkage->word[w], ",") == 0)
1190
+ w++;
1191
+ if (strcmp(linkage->word[w], "when") == 0)
1192
+ {
1193
+ c = add_constituent(ctxt, c, linkage, domain, w, w, "WHADVP");
1194
+ }
1195
+ }
1196
+ if (domain.type=='t')
1197
+ {
1198
+ c = add_constituent(ctxt, c, linkage, domain, leftmost, rightmost, "S");
1199
+ }
1200
+ if ((post_process_match("QI", ctxt->constituent[c].start_link) == 1) ||
1201
+ (post_process_match("Mr", ctxt->constituent[c].start_link) == 1) ||
1202
+ (post_process_match("MX#d", ctxt->constituent[c].start_link) == 1))
1203
+ {
1204
+ w = leftmost;
1205
+ if (strcmp(linkage->word[w], ",") == 0) w++;
1206
+ if (ctxt->wordtype[w] == NONE)
1207
+ name = "WHADVP";
1208
+ else if (ctxt->wordtype[w] == QTYPE)
1209
+ name = "WHNP";
1210
+ else if (ctxt->wordtype[w] == QDTYPE)
1211
+ name = "WHNP";
1212
+ else
1213
+ assert(0, "Unexpected word type");
1214
+ c = add_constituent(ctxt, c, linkage, domain, w, w, name);
1215
+
1216
+ if (ctxt->wordtype[w] == QDTYPE)
1217
+ {
1218
+ /* Now find the finite verb to the right, start an S */
1219
+ /* Limit w2 to sentence length. */
1220
+ // for( w2=w+1; w2 < ctxt->r_limit-1; w2++ )
1221
+ for (w2 = w+1; w2 < rightmost; w2++)
1222
+ if ((ctxt->wordtype[w2] == STYPE) || (ctxt->wordtype[w2] == PTYPE)) break;
1223
+
1224
+ /* Adjust the right boundary of previous constituent */
1225
+ ctxt->constituent[c].right = w2 - 1;
1226
+ c = add_constituent(ctxt, c, linkage, domain, w2, rightmost, "S");
1227
+ }
1228
+ }
1229
+
1230
+ if (ctxt->constituent[c].domain_type == '\0')
1231
+ {
1232
+ err_ctxt ec;
1233
+ ec.sent = linkage->sent;
1234
+ err_msg(&ec, Error, "Error: no domain type assigned to constituent\n");
1235
+ }
1236
+ if (ctxt->constituent[c].start_link == NULL)
1237
+ {
1238
+ err_ctxt ec;
1239
+ ec.sent = linkage->sent;
1240
+ err_msg(&ec, Error, "Error: no type assigned to constituent\n");
1241
+ }
1242
+ }
1243
+
1244
+ numcon_subl = c - numcon_total;
1245
+ /* numcon_subl = handle_islands(linkage, numcon_total, numcon_subl); */
1246
+
1247
+ if (verbosity >= 2)
1248
+ printf("Constituents added at first stage for subl %d:\n",
1249
+ linkage->current);
1250
+ for (c = numcon_total; c < numcon_total + numcon_subl; c++)
1251
+ {
1252
+ print_constituent(ctxt, linkage, c);
1253
+ }
1254
+
1255
+ /* Opener case - generates S around main clause.
1256
+ (This must be done first; the S generated will be needed for
1257
+ later cases.) */
1258
+ numcon_subl =
1259
+ gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "S", "S", 5);
1260
+
1261
+ /* pp opener case */
1262
+ numcon_subl =
1263
+ gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "S", "S", 6);
1264
+
1265
+ /* participle opener case */
1266
+ numcon_subl =
1267
+ gen_comp(ctxt, linkage, numcon_total, numcon_subl, "S", "S", "S", 9);
1268
+
1269
+ /* Subject-phrase case; every main VP generates an S */
1270
+ numcon_subl =
1271
+ gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "S", "NP", 1);
1272
+
1273
+ /* Relative clause case; an SBAR generates a complement NP */
1274
+ numcon_subl =
1275
+ gen_comp(ctxt, linkage, numcon_total, numcon_subl, "SBAR", "NP", "NP", 3);
1276
+
1277
+ /* Participle modifier case */
1278
+ numcon_subl =
1279
+ gen_comp(ctxt, linkage, numcon_total, numcon_subl, "VP", "NP", "NP", 8);
1280
+
1281
+ /* PP modifying NP */
1282
+ numcon_subl =
1283
+ gen_comp(ctxt, linkage, numcon_total, numcon_subl, "PP", "NP", "NP", 8);
1284
+
1285
+ /* Appositive case */
1286
+ numcon_subl =
1287
+ gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "NP", "NP", 4);
1288
+
1289
+ /* S-V inversion case; an NP generates a complement VP */
1290
+ numcon_subl =
1291
+ gen_comp(ctxt, linkage, numcon_total, numcon_subl, "NP", "SINV", "VP", 7);
1292
+
1293
+ adjust_subordinate_clauses(ctxt, linkage, numcon_total, numcon_subl);
1294
+ for (c = numcon_total; c < numcon_total + numcon_subl; c++)
1295
+ {
1296
+ if ((ctxt->constituent[c].domain_type=='p') &&
1297
+ (strcmp(linkage->word[ctxt->constituent[c].left], ",")==0))
1298
+ {
1299
+ ctxt->constituent[c].left++;
1300
+ }
1301
+ }
1302
+
1303
+ /* Make sure the constituents are nested. If two constituents
1304
+ * are not nested: whichever constituent has the furthest left
1305
+ * boundary, shift that boundary rightwards to the left boundary
1306
+ * of the other one.
1307
+ */
1308
+ while (1)
1309
+ {
1310
+ adjustment_made = 0;
1311
+ for (c = numcon_total; c < numcon_total + numcon_subl; c++)
1312
+ {
1313
+ for (c2 = numcon_total; c2 < numcon_total + numcon_subl; c2++)
1314
+ {
1315
+ if ((ctxt->constituent[c].left < ctxt->constituent[c2].left) &&
1316
+ (ctxt->constituent[c].right < ctxt->constituent[c2].right) &&
1317
+ (ctxt->constituent[c].right >= ctxt->constituent[c2].left))
1318
+ {
1319
+ /* We've found two overlapping constituents.
1320
+ If one is larger, except the smaller one
1321
+ includes an extra comma, adjust the smaller one
1322
+ to exclude the comma */
1323
+
1324
+ if ((strcmp(linkage->word[ctxt->constituent[c2].right], ",") == 0) ||
1325
+ (strcmp(linkage->word[ctxt->constituent[c2].right],
1326
+ "RIGHT-WALL") == 0))
1327
+ {
1328
+ if (verbosity >= 2)
1329
+ printf("Adjusting %d to fix comma overlap\n", c2);
1330
+ adjust_for_right_comma(ctxt, linkage, c2);
1331
+ adjustment_made = 1;
1332
+ }
1333
+ else if (strcmp(linkage->word[ctxt->constituent[c].left], ",") == 0)
1334
+ {
1335
+ if (verbosity >= 2)
1336
+ printf("Adjusting c %d to fix comma overlap\n", c);
1337
+ adjust_for_left_comma(ctxt, linkage, c);
1338
+ adjustment_made = 1;
1339
+ }
1340
+ else
1341
+ {
1342
+ if (verbosity >= 2)
1343
+ {
1344
+ err_ctxt ec;
1345
+ ec.sent = linkage->sent;
1346
+ err_msg(&ec, Warn,
1347
+ "Warning: the constituents aren't nested! "
1348
+ "Adjusting them. (%d, %d)\n", c, c2);
1349
+ }
1350
+ ctxt->constituent[c].left = ctxt->constituent[c2].left;
1351
+ }
1352
+ }
1353
+ }
1354
+ }
1355
+ if (adjustment_made == 0) break;
1356
+ }
1357
+
1358
+ #ifdef AUX_CODE_IS_DEAD
1359
+ /* The code here is ifdef-dead as it appears to be dead, as the computation it does
1360
+ * is immediately undone in the very next block.
1361
+ */
1362
+ /* This labels certain words as auxiliaries (such as forms of "be"
1363
+ * with passives, forms of "have" wth past participles,
1364
+ * "to" with infinitives). These words start VP's which include
1365
+ * them. In Treebank I, these don't get printed unless they're part of an
1366
+ * andlist, in which case they get labeled "X". (this is why we need to
1367
+ * label them as "aux".) In Treebank II, however, they seem to be treated
1368
+ * just like other verbs, so the "aux" stuff isn't needed.
1369
+ */
1370
+ for (c = numcon_total; c < numcon_total + numcon_subl; c++)
1371
+ {
1372
+ ctxt->constituent[c].subl = linkage->current;
1373
+ if (((ctxt->constituent[c].domain_type == 'v') &&
1374
+ (ctxt->wordtype[linkage_get_link_rword(linkage,
1375
+ ctxt->constituent[c].start_num)] == PTYPE))
1376
+ ||
1377
+ ((ctxt->constituent[c].domain_type == 't') &&
1378
+ (strcmp(ctxt->constituent[c].type, "VP") == 0)))
1379
+ {
1380
+ ctxt->constituent[c].aux = 1;
1381
+ }
1382
+ else
1383
+ {
1384
+ ctxt->constituent[c].aux = 0;
1385
+ }
1386
+ }
1387
+ #endif /* AUX_CODE_IS_DEAD */
1388
+
1389
+ if (MAXCONSTITUENTS <= numcon_total + numcon_subl)
1390
+ {
1391
+ err_ctxt ec;
1392
+ ec.sent = linkage->sent;
1393
+ err_msg(&ec, Error, "Error: Too many constituents (a2).\n");
1394
+ numcon_total = MAXCONSTITUENTS - numcon_subl;
1395
+ }
1396
+ for (c = numcon_total; c < numcon_total + numcon_subl; c++)
1397
+ {
1398
+ ctxt->constituent[c].subl = linkage->current;
1399
+ #ifdef AUX_CODE_IS_DEAD /* See comments at top */
1400
+ ctxt->constituent[c].aux = 0;
1401
+ #endif /* AUX_CODE_IS_DEAD */
1402
+ }
1403
+
1404
+ return numcon_subl;
1405
+ }
1406
+
1407
+ static char * exprint_constituent_structure(con_context_t *ctxt, Linkage linkage, int numcon_total)
1408
+ {
1409
+ int have_opened = 1;
1410
+ int c, w;
1411
+ int leftdone[MAXCONSTITUENTS];
1412
+ int rightdone[MAXCONSTITUENTS];
1413
+ int best, bestright, bestleft;
1414
+ Sentence sent;
1415
+ char s[100], * p;
1416
+ String * cs = string_new();
1417
+
1418
+ assert (numcon_total < MAXCONSTITUENTS, "Too many constituents (b)");
1419
+ sent = linkage_get_sentence(linkage);
1420
+
1421
+ for (c = 0; c < numcon_total; c++)
1422
+ {
1423
+ leftdone[c] = 0;
1424
+ rightdone[c] = 0;
1425
+ }
1426
+
1427
+ if (verbosity >= 2)
1428
+ printf("\n");
1429
+
1430
+ for (w = 1; w < linkage->num_words; w++)
1431
+ {
1432
+ /* Skip left wall; don't skip right wall, since it may
1433
+ have constituent boundaries */
1434
+
1435
+ while(1)
1436
+ {
1437
+ best = -1;
1438
+ bestright = -1;
1439
+ for (c = 0; c < numcon_total; c++)
1440
+ {
1441
+ if ((ctxt->constituent[c].left == w) &&
1442
+ (leftdone[c] == 0) && (ctxt->constituent[c].valid == 1) &&
1443
+ (ctxt->constituent[c].right >= bestright)) {
1444
+ best = c;
1445
+ bestright = ctxt->constituent[c].right;
1446
+ }
1447
+ }
1448
+ if (best == -1)
1449
+ break;
1450
+
1451
+ leftdone[best] = 1;
1452
+ /* have_open is a hack to avoid printing anything until
1453
+ * bracket is opened */
1454
+ if (w == 1) have_opened = 0;
1455
+ #ifdef AUX_CODE_IS_DEAD /* See comments at top */
1456
+ if (ctxt->constituent[best].aux == 1) continue;
1457
+ #endif /* AUX_CODE_IS_DEAD */
1458
+ have_opened = 1;
1459
+ append_string(cs, "%c%s ", OPEN_BRACKET, ctxt->constituent[best].type);
1460
+ }
1461
+
1462
+ /* Don't print out right wall */
1463
+ if (have_opened && (w < linkage->num_words - 1))
1464
+ {
1465
+ char *p;
1466
+ strcpy(s, sent->word[w].string);
1467
+
1468
+ /* Constituent processing will crash if the sentence contains
1469
+ * square brackets, so we have to do something ... replace
1470
+ * them with curly braces ... will have to do.
1471
+ */
1472
+ p = strchr(s, OPEN_BRACKET);
1473
+ while(p)
1474
+ {
1475
+ *p = '{';
1476
+ p = strchr(p, OPEN_BRACKET);
1477
+ }
1478
+
1479
+ p = strchr(s, CLOSE_BRACKET);
1480
+ while(p)
1481
+ {
1482
+ *p = '}';
1483
+ p = strchr(p, CLOSE_BRACKET);
1484
+ }
1485
+
1486
+ /* Now, if the first character of the word was
1487
+ originally uppercase, we put it back that way */
1488
+ if (sent->word[w].firstupper == 1)
1489
+ upcase_utf8_str(s, s, MAX_WORD);
1490
+ append_string(cs, "%s ", s);
1491
+ }
1492
+
1493
+ while(1)
1494
+ {
1495
+ best = -1;
1496
+ bestleft = -1;
1497
+ for(c = 0; c < numcon_total; c++)
1498
+ {
1499
+ if ((ctxt->constituent[c].right == w) &&
1500
+ (rightdone[c] == 0) && (ctxt->constituent[c].valid == 1) &&
1501
+ (ctxt->constituent[c].left > bestleft)) {
1502
+ best = c;
1503
+ bestleft = ctxt->constituent[c].left;
1504
+ }
1505
+ }
1506
+ if (best == -1)
1507
+ break;
1508
+ rightdone[best] = 1;
1509
+ #ifdef AUX_CODE_IS_DEAD /* See comments at top */
1510
+ if (ctxt->constituent[best].aux == 1)
1511
+ continue;
1512
+ #endif /* AUX_CODE_IS_DEAD */
1513
+ append_string(cs, "%s%c ", ctxt->constituent[best].type, CLOSE_BRACKET);
1514
+ }
1515
+ }
1516
+
1517
+ append_string(cs, "\n");
1518
+ p = string_copy(cs);
1519
+ string_delete(cs);
1520
+ return p;
1521
+ }
1522
+
1523
+ static char * do_print_flat_constituents(con_context_t *ctxt, Linkage linkage)
1524
+ {
1525
+ int num_words;
1526
+ Sentence sent;
1527
+ Postprocessor * pp;
1528
+ int s, numcon_total, numcon_subl, num_subl;
1529
+ char * q;
1530
+
1531
+ sent = linkage_get_sentence(linkage);
1532
+ ctxt->phrase_ss = string_set_create();
1533
+ pp = linkage->sent->dict->constituent_pp;
1534
+ numcon_total = 0;
1535
+
1536
+ count_words_used(ctxt, linkage);
1537
+
1538
+ num_subl = linkage->num_sublinkages;
1539
+ if (num_subl > MAXSUBL)
1540
+ {
1541
+ num_subl = MAXSUBL;
1542
+ if (verbosity >= 2)
1543
+ printf("Number of sublinkages exceeds maximum: only considering first %d sublinkages\n", MAXSUBL);
1544
+ }
1545
+
1546
+ if (linkage->unionized == 1 && num_subl > 1) num_subl--;
1547
+ for (s = 0; s < num_subl; s++)
1548
+ {
1549
+ linkage_set_current_sublinkage(linkage, s);
1550
+ linkage_post_process(linkage, pp);
1551
+ num_words = linkage_get_num_words(linkage);
1552
+ generate_misc_word_info(ctxt, linkage);
1553
+ numcon_subl = read_constituents_from_domains(ctxt, linkage, numcon_total, s);
1554
+ numcon_total = numcon_total + numcon_subl;
1555
+ if (MAXCONSTITUENTS <= numcon_total)
1556
+ {
1557
+ err_ctxt ec;
1558
+ ec.sent = linkage->sent;
1559
+ err_msg(&ec, Error, "Error: Too many constituents (c).\n");
1560
+ numcon_total = MAXCONSTITUENTS-1;
1561
+ break;
1562
+ }
1563
+ }
1564
+ numcon_total = merge_constituents(ctxt, linkage, numcon_total);
1565
+ if (MAXCONSTITUENTS <= numcon_total)
1566
+ {
1567
+ err_ctxt ec;
1568
+ ec.sent = linkage->sent;
1569
+ err_msg(&ec, Error, "Error: Too many constituents (d).\n");
1570
+ numcon_total = MAXCONSTITUENTS-1;
1571
+ }
1572
+ numcon_total = last_minute_fixes(ctxt, linkage, numcon_total);
1573
+ if (MAXCONSTITUENTS <= numcon_total)
1574
+ {
1575
+ err_ctxt ec;
1576
+ ec.sent = linkage->sent;
1577
+ err_msg(&ec, Error, "Error: Too many constituents (e).\n");
1578
+ numcon_total = MAXCONSTITUENTS-1;
1579
+ }
1580
+ q = exprint_constituent_structure(ctxt, linkage, numcon_total);
1581
+ string_set_delete(ctxt->phrase_ss);
1582
+ ctxt->phrase_ss = NULL;
1583
+ return q;
1584
+ }
1585
+
1586
+ static char * print_flat_constituents(Linkage linkage)
1587
+ {
1588
+ /* In principle, the ctxt could be allocated on stack, instead of
1589
+ * with malloc(). However, The java6 jvm (and MS Windows jvm's)
1590
+ * gives JNI clients only a small amount of stack space. Alloc'ing
1591
+ * this (rather large) structure on stack will blow up the JVM.
1592
+ * This was discovered only after much work. Bummer.
1593
+ */
1594
+ char * p;
1595
+ con_context_t *ctxt = (con_context_t *) malloc (sizeof(con_context_t));
1596
+ memset(ctxt, 0, sizeof(con_context_t));
1597
+ p = do_print_flat_constituents(ctxt, linkage);
1598
+ free(ctxt);
1599
+ return p;
1600
+ }
1601
+
1602
+ static CType token_type (char *token)
1603
+ {
1604
+ if ((token[0] == OPEN_BRACKET) && (strlen(token) > 1))
1605
+ return OPEN_TOK;
1606
+ if ((strlen(token) > 1) && (token[strlen(token) - 1] == CLOSE_BRACKET))
1607
+ return CLOSE_TOK;
1608
+ return WORD_TOK;
1609
+ }
1610
+
1611
+ static CNode * make_CNode(char *q)
1612
+ {
1613
+ CNode * cn;
1614
+ cn = (CNode *) exalloc(sizeof(CNode));
1615
+ cn->label = (char *) exalloc(sizeof(char)*(strlen(q)+1));
1616
+ strcpy(cn->label, q);
1617
+ cn->child = cn->next = (CNode *) NULL;
1618
+ cn->next = (CNode *) NULL;
1619
+ cn->start = cn->end = -1;
1620
+ return cn;
1621
+ }
1622
+
1623
+ static CNode * parse_string(CNode * n, char **saveptr)
1624
+ {
1625
+ char *q;
1626
+ CNode *m, *last_child=NULL;
1627
+
1628
+ while ((q = strtok_r(NULL, " ", saveptr))) {
1629
+ switch (token_type(q)) {
1630
+ case CLOSE_TOK :
1631
+ q[strlen(q)-1]='\0';
1632
+ assert(strcmp(q, n->label)==0,
1633
+ "Constituent tree: Labels do not match.");
1634
+ return n;
1635
+ break;
1636
+ case OPEN_TOK:
1637
+ m = make_CNode(q+1);
1638
+ m = parse_string(m, saveptr);
1639
+ break;
1640
+ case WORD_TOK:
1641
+ m = make_CNode(q);
1642
+ break;
1643
+ default:
1644
+ assert(0, "Constituent tree: Illegal token type");
1645
+ }
1646
+ if (n->child == NULL) {
1647
+ last_child = n->child = m;
1648
+ }
1649
+ else {
1650
+ last_child->next = m;
1651
+ last_child = m;
1652
+ }
1653
+ }
1654
+ assert(0, "Constituent tree: Constituent did not close");
1655
+ return NULL;
1656
+ }
1657
+
1658
+ static void print_tree(String * cs, int indent, CNode * n, int o1, int o2)
1659
+ {
1660
+ int i, child_offset;
1661
+ CNode * m;
1662
+
1663
+ if (n == NULL) return;
1664
+
1665
+ if (indent)
1666
+ for (i = 0; i < o1; ++i)
1667
+ append_string(cs, " ");
1668
+ append_string(cs, "(%s ", n->label);
1669
+ child_offset = o2 + strlen(n->label) + 2;
1670
+
1671
+ for (m = n->child; m != NULL; m = m->next)
1672
+ {
1673
+ if (m->child == NULL)
1674
+ {
1675
+ char * p;
1676
+ /* If the original string has left or right parens in it,
1677
+ * the printed string will be messed up by these ...
1678
+ * so replace them by curly braces. What else can one do?
1679
+ */
1680
+ p = strchr(m->label, '(');
1681
+ while(p)
1682
+ {
1683
+ *p = '{';
1684
+ p = strchr(p, '(');
1685
+ }
1686
+
1687
+ p = strchr(m->label, ')');
1688
+ while(p)
1689
+ {
1690
+ *p = '}';
1691
+ p = strchr(p, ')');
1692
+ }
1693
+
1694
+ append_string(cs, "%s", m->label);
1695
+ if ((m->next != NULL) && (m->next->child == NULL))
1696
+ append_string(cs, " ");
1697
+ }
1698
+ else
1699
+ {
1700
+ if (m != n->child)
1701
+ {
1702
+ if (indent) append_string(cs, "\n");
1703
+ else append_string(cs, " ");
1704
+ print_tree(cs, indent, m, child_offset, child_offset);
1705
+ }
1706
+ else
1707
+ {
1708
+ print_tree(cs, indent, m, 0, child_offset);
1709
+ }
1710
+ if ((m->next != NULL) && (m->next->child == NULL))
1711
+ {
1712
+ if (indent)
1713
+ {
1714
+ append_string(cs, "\n");
1715
+ for (i = 0; i < child_offset; ++i)
1716
+ append_string(cs, " ");
1717
+ }
1718
+ else append_string(cs, " ");
1719
+ }
1720
+ }
1721
+ }
1722
+ append_string(cs, ")");
1723
+ }
1724
+
1725
+ static int assign_spans(CNode * n, int start) {
1726
+ int num_words=0;
1727
+ CNode * m=NULL;
1728
+ if (n==NULL) return 0;
1729
+ n->start = start;
1730
+ if (n->child == NULL) {
1731
+ n->end = start;
1732
+ return 1;
1733
+ }
1734
+ else {
1735
+ for (m=n->child; m!=NULL; m=m->next) {
1736
+ num_words += assign_spans(m, start+num_words);
1737
+ }
1738
+ n->end = start+num_words-1;
1739
+ }
1740
+ return num_words;
1741
+ }
1742
+
1743
+ CNode * linkage_constituent_tree(Linkage linkage)
1744
+ {
1745
+ char *p, *q, *saveptr;
1746
+ int len;
1747
+ CNode * root;
1748
+
1749
+ p = print_flat_constituents(linkage);
1750
+
1751
+ len = strlen(p);
1752
+ q = strtok_r(p, " ", &saveptr);
1753
+ assert(token_type(q) == OPEN_TOK, "Illegal beginning of string");
1754
+ root = make_CNode(q+1);
1755
+ root = parse_string(root, &saveptr);
1756
+ assign_spans(root, 0);
1757
+ exfree(p, sizeof(char)*(len+1));
1758
+ return root;
1759
+ }
1760
+
1761
+ void linkage_free_constituent_tree(CNode * n)
1762
+ {
1763
+ CNode *m, *x;
1764
+ for (m=n->child; m!=NULL; m=x) {
1765
+ x=m->next;
1766
+ linkage_free_constituent_tree(m);
1767
+ }
1768
+ exfree(n->label, sizeof(char)*(strlen(n->label)+1));
1769
+ exfree(n, sizeof(CNode));
1770
+ }
1771
+
1772
+ /**
1773
+ * Print out the constituent tree.
1774
+ * mode 1: treebank-style constituent tree
1775
+ * mode 2: flat, bracketed tree [A like [B this B] A]
1776
+ * mode 3: flat, treebank-style tree (A like (B this) )
1777
+ */
1778
+ char * linkage_print_constituent_tree(Linkage linkage, int mode)
1779
+ {
1780
+ String * cs;
1781
+ CNode * root;
1782
+ char * p;
1783
+
1784
+ if ((mode == 0) || (linkage->sent->dict->constituent_pp == NULL))
1785
+ {
1786
+ return NULL;
1787
+ }
1788
+ else if (mode == 1 || mode == 3)
1789
+ {
1790
+ cs = string_new();
1791
+ root = linkage_constituent_tree(linkage);
1792
+ print_tree(cs, (mode==1), root, 0, 0);
1793
+ linkage_free_constituent_tree(root);
1794
+ append_string(cs, "\n");
1795
+ p = string_copy(cs);
1796
+ string_delete(cs);
1797
+ return p;
1798
+ }
1799
+ else if (mode == 2)
1800
+ {
1801
+ return print_flat_constituents(linkage);
1802
+ }
1803
+ assert(0, "Illegal mode in linkage_print_constituent_tree");
1804
+ return NULL;
1805
+ }
1806
+
1807
+ void linkage_free_constituent_tree_str(char * s)
1808
+ {
1809
+ exfree(s, strlen(s)+1);
1810
+ }
1811
+
1812
+ const char * linkage_constituent_node_get_label(const CNode *n)
1813
+ {
1814
+ return n->label;
1815
+ }
1816
+
1817
+
1818
+ CNode * linkage_constituent_node_get_child(const CNode *n)
1819
+ {
1820
+ return n->child;
1821
+ }
1822
+
1823
+ CNode * linkage_constituent_node_get_next(const CNode *n)
1824
+ {
1825
+ return n->next;
1826
+ }
1827
+
1828
+ int linkage_constituent_node_get_start(const CNode *n)
1829
+ {
1830
+ return n->start;
1831
+ }
1832
+
1833
+ int linkage_constituent_node_get_end(const CNode *n)
1834
+ {
1835
+ return n->end;
1836
+ }