grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,1317 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+
15
+ #include <stdarg.h>
16
+ #include "api.h"
17
+
18
+ /**
19
+ * The functions in this file do several things: () take a linkage
20
+ * involving fat links and expand it into a sequence of linkages
21
+ * (involving a subset of the given words), one for each way of
22
+ * eliminating the conjunctions. () determine if a linkage involving
23
+ * fat links has a structural violation. () make sure each of the expanded
24
+ * linkages has a consistent post-processing behavior. () compute the
25
+ * cost of the linkage.
26
+ */
27
+
28
+ typedef struct patch_element_struct Patch_element;
29
+ struct patch_element_struct
30
+ {
31
+ char used; /* TRUE if this link is used, else FALSE */
32
+ char changed;/* TRUE if this link changed, else FALSE */
33
+ int newl; /* the new value of the left end */
34
+ int newr; /* the new value of the right end */
35
+ };
36
+
37
+ struct analyze_context_s
38
+ {
39
+ List_o_links *word_links[MAX_SENTENCE]; /* ptr to l.o.l. out of word */
40
+ int structure_violation;
41
+
42
+ int dfs_root_word[MAX_SENTENCE]; /* for the depth-first search */
43
+ int dfs_height[MAX_SENTENCE]; /* to determine the order to do the root word dfs */
44
+ int height_perm[MAX_SENTENCE]; /* permute the vertices from highest to lowest */
45
+
46
+ /* The following are all for computing the cost of and lists */
47
+ int visited[MAX_SENTENCE];
48
+ int and_element_sizes[MAX_SENTENCE];
49
+ int and_element[MAX_SENTENCE];
50
+ int N_and_elements;
51
+ int outside_word[MAX_SENTENCE];
52
+ int N_outside_words;
53
+ Patch_element patch_array[MAX_LINKS];
54
+ };
55
+
56
+ typedef struct CON_node_struct CON_node;
57
+ typedef struct CON_list_struct CON_list;
58
+ typedef struct DIS_list_struct DIS_list;
59
+ typedef struct Links_to_patch_struct Links_to_patch;
60
+
61
+ struct DIS_node_struct
62
+ {
63
+ CON_list * cl; /* the list of children */
64
+ List_o_links * lol;/* the links that comprise this region of the graph */
65
+ int word; /* the word defining this node */
66
+ };
67
+
68
+ struct CON_node_struct
69
+ {
70
+ DIS_list * dl; /* the list of children */
71
+ DIS_list * current;/* defines the current child */
72
+ int word; /* the word defining this node */
73
+ };
74
+
75
+ struct DIS_list_struct
76
+ {
77
+ DIS_list * next;
78
+ DIS_node * dn;
79
+ };
80
+
81
+ struct CON_list_struct
82
+ {
83
+ CON_list * next;
84
+ CON_node * cn;
85
+ };
86
+
87
+ struct Links_to_patch_struct
88
+ {
89
+ Links_to_patch * next;
90
+ int link;
91
+ char dir; /* this is 'r' or 'l' depending on which end of the link
92
+ is to be patched. */
93
+ };
94
+
95
+ void zero_sublinkage(Sublinkage *s)
96
+ {
97
+ int i;
98
+ s->pp_info = NULL;
99
+ s->violation = NULL;
100
+ for (i=0; i<s->num_links; i++) s->link[i] = NULL;
101
+
102
+ memset(&s->pp_data, 0, sizeof(PP_data));
103
+ }
104
+
105
+ static Sublinkage * x_create_sublinkage(Parse_info pi)
106
+ {
107
+ Sublinkage *s = (Sublinkage *) xalloc (sizeof(Sublinkage));
108
+ s->link = (Link **) xalloc(MAX_LINKS*sizeof(Link *));
109
+ s->num_links = MAX_LINKS;
110
+
111
+ zero_sublinkage(s);
112
+
113
+ s->num_links = pi->N_links;
114
+ assert(pi->N_links < MAX_LINKS, "Too many links");
115
+ return s;
116
+ }
117
+
118
+ static Sublinkage * ex_create_sublinkage(Parse_info pi)
119
+ {
120
+ Sublinkage *s = (Sublinkage *) exalloc (sizeof(Sublinkage));
121
+ s->link = (Link **) exalloc(pi->N_links*sizeof(Link *));
122
+ s->num_links = pi->N_links;
123
+
124
+ zero_sublinkage(s);
125
+
126
+ assert(pi->N_links < MAX_LINKS, "Too many links");
127
+ return s;
128
+ }
129
+
130
+ static void free_sublinkage(Sublinkage *s)
131
+ {
132
+ int i;
133
+ for (i=0; i<MAX_LINKS; i++) {
134
+ if (s->link[i]!=NULL) exfree_link(s->link[i]);
135
+ }
136
+ xfree(s->link, MAX_LINKS*sizeof(Link));
137
+ xfree(s, sizeof(Sublinkage));
138
+ }
139
+
140
+ static void replace_link_name(Link *l, const char *s)
141
+ {
142
+ /* XXX can get some perf improvement by avoiding strlen! */
143
+ char * t;
144
+ exfree((char *) l->name, sizeof(char)*(strlen(l->name)+1));
145
+ t = (char *) exalloc(sizeof(char)*(strlen(s)+1));
146
+ strcpy(t, s);
147
+ l->name = t;
148
+ }
149
+
150
+ static void copy_full_link(Link **dest, Link *src)
151
+ {
152
+ if (*dest != NULL) exfree_link(*dest);
153
+ *dest = excopy_link(src);
154
+ }
155
+
156
+ /* end new code 9/97 ALB */
157
+
158
+
159
+ /**
160
+ * Constructs a graph in the wordlinks array based on the contents of
161
+ * the global link_array. Makes the wordlinks array point to a list of
162
+ * words neighboring each word (actually a list of links). This is a
163
+ * directed graph, constructed for dealing with "and". For a link in
164
+ * which the priorities are UP or DOWN_priority, the edge goes from the
165
+ * one labeled DOWN to the one labeled UP.
166
+ * Don't generate links edges for the bogus comma connectors.
167
+ */
168
+ static void build_digraph(analyze_context_t *actx, Parse_info pi)
169
+ {
170
+ int i, link, N_fat;
171
+ Link *lp;
172
+ List_o_links * lol;
173
+ N_fat = 0;
174
+
175
+ for (i = 0; i < pi->N_words; i++)
176
+ {
177
+ actx->word_links[i] = NULL;
178
+ }
179
+
180
+ for (link = 0; link < pi->N_links; link++)
181
+ {
182
+ lp = &(pi->link_array[link]);
183
+ i = lp->lc->label;
184
+ if (i < NORMAL_LABEL) { /* one of those special links for either-or, etc */
185
+ continue;
186
+ }
187
+
188
+ lol = (List_o_links *) xalloc(sizeof(List_o_links));
189
+ lol->next = actx->word_links[lp->l];
190
+ actx->word_links[lp->l] = lol;
191
+ lol->link = link;
192
+ lol->word = lp->r;
193
+ i = lp->lc->priority;
194
+ if (i == THIN_priority) {
195
+ lol->dir = 0;
196
+ } else if (i == DOWN_priority) {
197
+ lol->dir = 1;
198
+ } else {
199
+ lol->dir = -1;
200
+ }
201
+
202
+ lol = (List_o_links *) xalloc(sizeof(List_o_links));
203
+ lol->next = actx->word_links[lp->r];
204
+ actx->word_links[lp->r] = lol;
205
+ lol->link = link;
206
+ lol->word = lp->l;
207
+ i = lp->rc->priority;
208
+ if (i == THIN_priority) {
209
+ lol->dir = 0;
210
+ } else if (i == DOWN_priority) {
211
+ lol->dir = 1;
212
+ } else {
213
+ lol->dir = -1;
214
+ }
215
+ }
216
+ }
217
+
218
+ /**
219
+ * Returns TRUE if there is at least one fat link pointing out of this word.
220
+ */
221
+ static int is_CON_word(int w, List_o_links **wordlinks)
222
+ {
223
+ List_o_links * lol;
224
+ for (lol = wordlinks[w]; lol != NULL; lol = lol->next)
225
+ {
226
+ if (lol->dir == 1) return TRUE;
227
+ }
228
+ return FALSE;
229
+ }
230
+
231
+ static DIS_node * build_DIS_node(analyze_context_t*, int);
232
+
233
+ /**
234
+ * This word is a CON word (has fat links down). Build the tree for it.
235
+ */
236
+ static CON_node * build_CON_node(analyze_context_t *actx, int w)
237
+ {
238
+ List_o_links * lol;
239
+ CON_node * a;
240
+ DIS_list * d, *dx;
241
+ d = NULL;
242
+ for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
243
+ {
244
+ if (lol->dir == 1)
245
+ {
246
+ dx = (DIS_list *) xalloc (sizeof (DIS_list));
247
+ dx->next = d;
248
+ d = dx;
249
+ d->dn = build_DIS_node(actx, lol->word);
250
+ }
251
+ }
252
+ a = (CON_node *) xalloc(sizeof (CON_node));
253
+ a->dl = a->current = d;
254
+ a->word = w;
255
+ return a;
256
+ }
257
+
258
+ /**
259
+ * Does a depth-first-search starting from w. Puts on the front of the
260
+ * list pointed to by c all of the CON nodes it finds, and returns the
261
+ * result. Also construct the list of all edges reached as part of this
262
+ * DIS_node search and append it to the lol list of start_dn.
263
+ *
264
+ * Both of the structure violations actually occur, and represent
265
+ * linkages that have improper structure. Fortunately, they
266
+ * seem to be rather rare.
267
+ */
268
+ static CON_list * c_dfs(analyze_context_t *actx,
269
+ int w, DIS_node * start_dn, CON_list * c)
270
+ {
271
+ CON_list *cx;
272
+ List_o_links * lol, *lolx;
273
+ if (actx->dfs_root_word[w] != -1)
274
+ {
275
+ if (actx->dfs_root_word[w] != start_dn->word)
276
+ {
277
+ actx->structure_violation = TRUE;
278
+ }
279
+ return c;
280
+ }
281
+ actx->dfs_root_word[w] = start_dn->word;
282
+ for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
283
+ {
284
+ if (lol->dir < 0) /* a backwards link */
285
+ {
286
+ if (actx->dfs_root_word[lol->word] == -1)
287
+ {
288
+ actx->structure_violation = TRUE;
289
+ }
290
+ }
291
+ else if (lol->dir == 0)
292
+ {
293
+ /* Make a copy of the link */
294
+ lolx = (List_o_links *) xalloc(sizeof(List_o_links));
295
+ lolx->word = lol->word;
296
+ lolx->dir = lol->dir;
297
+ lolx->link = lol->link;
298
+
299
+ /* Chain it into place */
300
+ lolx->next = start_dn->lol;
301
+ start_dn->lol = lolx;
302
+ c = c_dfs(actx, lol->word, start_dn, c);
303
+ }
304
+ }
305
+
306
+ /* if the current node is CON, put it first */
307
+ if (is_CON_word(w, actx->word_links))
308
+ {
309
+ cx = (CON_list *) xalloc(sizeof(CON_list));
310
+ cx->next = c;
311
+ c = cx;
312
+ c->cn = build_CON_node(actx, w);
313
+ }
314
+ return c;
315
+ }
316
+
317
+ /**
318
+ * This node is connected to its parent via a fat link. Search the
319
+ * region reachable via thin links, and put all reachable nodes with fat
320
+ * links out of them in its list of children.
321
+ */
322
+ static DIS_node * build_DIS_node(analyze_context_t *actx,
323
+ int w)
324
+ {
325
+ DIS_node * dn;
326
+ dn = (DIS_node *) xalloc(sizeof (DIS_node));
327
+ dn->word = w; /* must do this before dfs so it knows the start word */
328
+ dn->lol = NULL;
329
+ dn->cl = c_dfs(actx, w, dn, NULL);
330
+ return dn;
331
+ }
332
+
333
+ static void height_dfs(analyze_context_t *actx, int w, int height)
334
+ {
335
+ List_o_links * lol;
336
+ if (actx->dfs_height[w] != 0) return;
337
+
338
+ actx->dfs_height[w] = height;
339
+
340
+ for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
341
+ {
342
+ /* The dir is 1 for a down link. */
343
+ height_dfs(actx, lol->word, height - lol->dir);
344
+ }
345
+ }
346
+
347
+ /**
348
+ * Simple insertion sort; should be plenty fast enough, since sentences
349
+ * are almost always shorter than 30 words or so. In fact, this is
350
+ * almost surely faster than qsort for such small arrays.
351
+ */
352
+ static void insort (analyze_context_t *actx, int nwords)
353
+ {
354
+ int i, j;
355
+ for (i=1; i<nwords; i++)
356
+ {
357
+ int heig = actx->dfs_height[i];
358
+ int perm = actx->height_perm[i];
359
+ j = i;
360
+ while (j>0 && (heig > actx->dfs_height[j-1]))
361
+ {
362
+ actx->dfs_height[j] = actx->dfs_height[j-1];
363
+ actx->height_perm[j] = actx->height_perm[j-1];
364
+ j--;
365
+ }
366
+ actx->dfs_height[j] = heig;
367
+ actx->height_perm[j] = perm;
368
+ }
369
+ }
370
+
371
+ static DIS_node * build_DIS_CON_tree(analyze_context_t *actx, Parse_info pi)
372
+ {
373
+ int xw, w;
374
+ DIS_node * dnroot, * dn;
375
+ CON_list * child, * xchild;
376
+ List_o_links * lol, * xlol;
377
+
378
+ /* The algorithm used here to build the DIS_CON tree depends on
379
+ * the search percolating down from the "top" of the tree. The
380
+ * original version of this started its search at the wall. This
381
+ * was fine because doing a DFS from the wall explores the tree in
382
+ * the right order.
383
+ *
384
+ * However, in order to handle null links correctly, a more careful
385
+ * ordering process must be used to explore the tree. We use
386
+ * dfs_height[] for this, and sort in height order.
387
+ *
388
+ * XXX Is the sort order correct here? This is not obvious; I think
389
+ * we want highest to lowest ... XXX is the height being calculated
390
+ * correctly? Looks weird to me ... XXX
391
+ */
392
+
393
+ for (w=0; w < pi->N_words; w++) actx->dfs_height[w] = 0;
394
+ for (w=0; w < pi->N_words; w++) height_dfs(actx, w, MAX_SENTENCE);
395
+
396
+ for (w=0; w < pi->N_words; w++) actx->height_perm[w] = w;
397
+
398
+ /* Sort the heights, keeping only the permuted order. */
399
+ insort (actx, pi->N_words);
400
+
401
+ for (w=0; w<pi->N_words; w++) actx->dfs_root_word[w] = -1;
402
+
403
+ dnroot = NULL;
404
+ for (xw = 0; xw < pi->N_words; xw++)
405
+ {
406
+ w = actx->height_perm[xw];
407
+ if (actx->dfs_root_word[w] == -1)
408
+ {
409
+ dn = build_DIS_node(actx, w);
410
+ if (dnroot == NULL)
411
+ {
412
+ dnroot = dn;
413
+ }
414
+ else
415
+ {
416
+ for (child = dn->cl; child != NULL; child = xchild)
417
+ {
418
+ xchild = child->next;
419
+ child->next = dnroot->cl;
420
+ dnroot->cl = child;
421
+ }
422
+ for (lol = dn->lol; lol != NULL; lol = xlol)
423
+ {
424
+ xlol = lol->next;
425
+ lol->next = dnroot->lol;
426
+ dnroot->lol = lol;
427
+ }
428
+ xfree((void *) dn, sizeof(DIS_node));
429
+ }
430
+ }
431
+ }
432
+ return dnroot;
433
+ }
434
+
435
+ static int advance_CON(CON_node *);
436
+
437
+ /**
438
+ * Cyclically advance the current state of this DIS node.
439
+ * If it's now at the beginning of its cycle, return FALSE;
440
+ * otherwise return TRUE. Together with the advance_CON()
441
+ * function, this can be used to iterate over the entire
442
+ * DIS_CON tree.
443
+ */
444
+ static int advance_DIS(DIS_node * dn)
445
+ {
446
+ CON_list * cl;
447
+ for (cl = dn->cl; cl != NULL; cl = cl->next)
448
+ {
449
+ if (advance_CON(cl->cn)) return TRUE;
450
+ }
451
+ return FALSE;
452
+ }
453
+
454
+ /**
455
+ * Cyclically advance the current state of this CON node.
456
+ * If it's now at the beginning of its cycle return FALSE,
457
+ * otherwise return TRUE. Together with the advance_CON()
458
+ * function, this can be used to iterate over the entire
459
+ * DIS_CON tree.
460
+ */
461
+ static int advance_CON(CON_node * cn)
462
+ {
463
+ if (advance_DIS(cn->current->dn))
464
+ {
465
+ return TRUE;
466
+ }
467
+ else
468
+ {
469
+ if (cn->current->next == NULL)
470
+ {
471
+ cn->current = cn->dl;
472
+ return FALSE;
473
+ }
474
+ else
475
+ {
476
+ cn->current = cn->current->next;
477
+ return TRUE;
478
+ }
479
+ }
480
+ }
481
+
482
+ static void fill_patch_array_CON(analyze_context_t *, CON_node *, Links_to_patch *);
483
+
484
+ /**
485
+ * Patches up appropriate links in the patch_array for this DIS_node
486
+ * and this patch list.
487
+ */
488
+ static void fill_patch_array_DIS(analyze_context_t *actx,
489
+ DIS_node * dn, Links_to_patch * ltp)
490
+ {
491
+ CON_list * cl;
492
+ List_o_links * lol;
493
+ Links_to_patch * ltpx;
494
+
495
+ for (lol = dn->lol; lol != NULL; lol = lol->next)
496
+ {
497
+ actx->patch_array[lol->link].used = TRUE;
498
+ }
499
+
500
+ if ((dn->cl == NULL) || (dn->cl->cn->word != dn->word))
501
+ {
502
+ for (; ltp != NULL; ltp = ltpx)
503
+ {
504
+ ltpx = ltp->next;
505
+ actx->patch_array[ltp->link].changed = TRUE;
506
+ if (ltp->dir == 'l')
507
+ {
508
+ actx->patch_array[ltp->link].newl = dn->word;
509
+ }
510
+ else
511
+ {
512
+ actx->patch_array[ltp->link].newr = dn->word;
513
+ }
514
+ xfree((void *) ltp, sizeof(Links_to_patch));
515
+ }
516
+ }
517
+
518
+ /* ltp != NULL at this point means that dn has child which is a cn
519
+ which is the same word */
520
+ for (cl = dn->cl; cl != NULL; cl = cl->next)
521
+ {
522
+ fill_patch_array_CON(actx, cl->cn, ltp);
523
+ ltp = NULL;
524
+ }
525
+ }
526
+
527
+ static void fill_patch_array_CON(analyze_context_t *actx,
528
+ CON_node * cn, Links_to_patch * ltp)
529
+ {
530
+ List_o_links * lol;
531
+ Links_to_patch *ltpx;
532
+
533
+ for (lol = actx->word_links[cn->word]; lol != NULL; lol = lol->next)
534
+ {
535
+ if (lol->dir == 0)
536
+ {
537
+ ltpx = (Links_to_patch *) xalloc(sizeof(Links_to_patch));
538
+ ltpx->next = ltp;
539
+ ltp = ltpx;
540
+ ltp->link = lol->link;
541
+ if (lol->word > cn->word) {
542
+ ltp->dir = 'l';
543
+ } else {
544
+ ltp->dir = 'r';
545
+ }
546
+ }
547
+ }
548
+ fill_patch_array_DIS(actx, cn->current->dn, ltp);
549
+ }
550
+
551
+ static void free_digraph(analyze_context_t *actx, Parse_info pi)
552
+ {
553
+ List_o_links * lol, *lolx;
554
+ int i;
555
+ for (i = 0; i < pi->N_words; i++)
556
+ {
557
+ for (lol = actx->word_links[i]; lol != NULL; lol = lolx)
558
+ {
559
+ lolx = lol->next;
560
+ xfree((void *) lol, sizeof(List_o_links));
561
+ }
562
+ }
563
+ }
564
+
565
+ static void free_CON_tree(CON_node *);
566
+
567
+ void free_DIS_tree(DIS_node * dn)
568
+ {
569
+ List_o_links * lol, *lolx;
570
+ CON_list *cl, *clx;
571
+ for (lol = dn->lol; lol != NULL; lol = lolx)
572
+ {
573
+ lolx = lol->next;
574
+ xfree((void *) lol, sizeof(List_o_links));
575
+ }
576
+ for (cl = dn->cl; cl != NULL; cl = clx)
577
+ {
578
+ clx = cl->next;
579
+ free_CON_tree(cl->cn);
580
+ xfree((void *) cl, sizeof(CON_list));
581
+ }
582
+ xfree((void *) dn, sizeof(DIS_node));
583
+ }
584
+
585
+ static void free_CON_tree(CON_node * cn)
586
+ {
587
+ DIS_list *dl, *dlx;
588
+ for (dl = cn->dl; dl != NULL; dl = dlx)
589
+ {
590
+ dlx = dl->next;
591
+ free_DIS_tree(dl->dn);
592
+ xfree((void *) dl, sizeof(DIS_list));
593
+ }
594
+ xfree((void *) cn, sizeof(CON_node));
595
+ }
596
+
597
+ /** scope out this and element */
598
+ static void and_dfs_full(analyze_context_t *actx, int w)
599
+ {
600
+ List_o_links *lol;
601
+ if (actx->visited[w]) return;
602
+ actx->visited[w] = TRUE;
603
+ actx->and_element_sizes[actx->N_and_elements]++;
604
+
605
+ for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
606
+ {
607
+ if (lol->dir >= 0)
608
+ {
609
+ and_dfs_full(actx, lol->word);
610
+ }
611
+ }
612
+ }
613
+
614
+ /** get down the tree past all the commas */
615
+ static void and_dfs_commas(analyze_context_t *actx, Sentence sent, int w)
616
+ {
617
+ List_o_links *lol;
618
+ if (actx->visited[w]) return;
619
+
620
+ actx->visited[w] = TRUE;
621
+
622
+ for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
623
+ {
624
+ /* we only consider UP or DOWN priority links here */
625
+ if (lol->dir == 1)
626
+ {
627
+ if (strcmp(sent->word[lol->word].string, ",") == 0)
628
+ {
629
+ /* pointing to a comma */
630
+ and_dfs_commas(actx, sent, lol->word);
631
+ }
632
+ else
633
+ {
634
+ actx->and_element[actx->N_and_elements] = lol->word;
635
+ and_dfs_full(actx, lol->word);
636
+ actx->N_and_elements++;
637
+ }
638
+ }
639
+ if (lol->dir == 0)
640
+ {
641
+ actx->outside_word[actx->N_outside_words] = lol->word;
642
+ actx->N_outside_words++;
643
+ }
644
+ }
645
+ }
646
+
647
+ /**
648
+ * This function computes the "and cost", resulting from inequalities
649
+ * in the length of and-list elements. It also computes other
650
+ * information used to construct the "andlist" structure of linkage_info.
651
+ */
652
+ static Andlist * build_andlist(analyze_context_t *actx, Sentence sent)
653
+ {
654
+ int w, i, min, max, j, cost;
655
+ char * s;
656
+ Andlist * new_andlist, * old_andlist;
657
+ Parse_info pi = sent->parse_info;
658
+
659
+ old_andlist = NULL;
660
+ cost = 0;
661
+
662
+ for(w = 0; w<pi->N_words; w++)
663
+ {
664
+ s = sent->word[w].string;
665
+ if (sent->is_conjunction[w])
666
+ {
667
+ actx->N_and_elements = 0;
668
+ actx->N_outside_words = 0;
669
+ for(i=0; i<pi->N_words; i++)
670
+ {
671
+ actx->visited[i] = FALSE;
672
+ actx->and_element_sizes[i] = 0;
673
+ }
674
+ if (sent->dict->left_wall_defined)
675
+ actx->visited[0] = TRUE;
676
+
677
+ and_dfs_commas(actx, sent, w);
678
+ if (actx->N_and_elements == 0) continue;
679
+
680
+ new_andlist = (Andlist *) xalloc(sizeof(Andlist));
681
+ new_andlist->num_elements = actx->N_and_elements;
682
+ new_andlist->num_outside_words = actx->N_outside_words;
683
+
684
+ for (i=0; i < actx->N_and_elements; i++)
685
+ {
686
+ new_andlist->element[i] = actx->and_element[i];
687
+ }
688
+ for (i=0; i < actx->N_outside_words; i++)
689
+ {
690
+ new_andlist->outside_word[i] = actx->outside_word[i];
691
+ }
692
+ new_andlist->conjunction = w;
693
+ new_andlist->next = old_andlist;
694
+ old_andlist = new_andlist;
695
+
696
+ if (actx->N_and_elements > 0)
697
+ {
698
+ min = MAX_SENTENCE;
699
+ max = 0;
700
+ for (i=0; i < actx->N_and_elements; i++)
701
+ {
702
+ j = actx->and_element_sizes[i];
703
+ if (j < min) min = j;
704
+ if (j > max) max = j;
705
+ }
706
+ cost += max-min;
707
+ }
708
+ }
709
+ }
710
+ if (old_andlist) old_andlist->cost = cost;
711
+ return old_andlist;
712
+ }
713
+
714
+ /**
715
+ * This function defines the cost of a link as a function of its length.
716
+ */
717
+ static inline int cost_for_length(int length)
718
+ {
719
+ return length-1;
720
+ }
721
+
722
+ /**
723
+ * Computes the cost of the current parse of the current sentence,
724
+ * due to the length of the links.
725
+ */
726
+ static int link_cost(Parse_info pi)
727
+ {
728
+ int lcost, i;
729
+ lcost = 0;
730
+ for (i = 0; i < pi->N_links; i++)
731
+ {
732
+ lcost += cost_for_length(pi->link_array[i].r - pi->link_array[i].l);
733
+ }
734
+ return lcost;
735
+ }
736
+
737
+ static int null_cost(Parse_info pi)
738
+ {
739
+ /* computes the number of null links in the linkage */
740
+ /* No one seems to care about this -- ALB */
741
+ return 0;
742
+ }
743
+
744
+ static int unused_word_cost(Parse_info pi)
745
+ {
746
+ int lcost, i;
747
+ lcost = 0;
748
+ for (i = 0; i < pi->N_words; i++)
749
+ lcost += (pi->chosen_disjuncts[i] == NULL);
750
+ return lcost;
751
+ }
752
+
753
+ /**
754
+ * Computes the cost of the current parse of the current sentence
755
+ * due to the cost of the chosen disjuncts.
756
+ */
757
+ static float disjunct_cost(Parse_info pi)
758
+ {
759
+ int i;
760
+ float lcost;
761
+ lcost = 0.0;
762
+ for (i = 0; i < pi->N_words; i++)
763
+ {
764
+ if (pi->chosen_disjuncts[i] != NULL)
765
+ lcost += pi->chosen_disjuncts[i]->cost;
766
+ }
767
+ return lcost;
768
+ }
769
+
770
+ /**
771
+ * Returns TRUE if string s represents a strictly smaller match set
772
+ * than does t. An almost identical function appears in and.c.
773
+ * The difference is that here we don't require s and t to be the
774
+ * same length.
775
+ */
776
+ static int strictly_smaller_name(const char * s, const char * t)
777
+ {
778
+ int strictness, ss, tt;
779
+ strictness = 0;
780
+ while ((*s!='\0') || (*t!='\0'))
781
+ {
782
+ if (*s == '\0') {
783
+ ss = '*';
784
+ } else {
785
+ ss = *s;
786
+ s++;
787
+ }
788
+ if (*t == '\0') {
789
+ tt = '*';
790
+ } else {
791
+ tt = *t;
792
+ t++;
793
+ }
794
+ if (ss == tt) continue;
795
+ if ((tt == '*') || (ss == '^')) {
796
+ strictness++;
797
+ } else {
798
+ return FALSE;
799
+ }
800
+ }
801
+ return (strictness > 0);
802
+ }
803
+
804
+ /**
805
+ * The name of the link is set to be the GCD of the names of
806
+ * its two endpoints. Must be called after each extract_links(),
807
+ * etc. since that call issues a brand-new set of links into
808
+ * parse_info.
809
+ */
810
+ static void compute_link_names(Sentence sent)
811
+ {
812
+ int i;
813
+ Parse_info pi = sent->parse_info;
814
+
815
+ for (i = 0; i < pi->N_links; i++)
816
+ {
817
+ pi->link_array[i].name = intersect_strings(sent,
818
+ connector_get_string(pi->link_array[i].lc),
819
+ connector_get_string(pi->link_array[i].rc));
820
+ }
821
+ }
822
+
823
+ /**
824
+ * This fills in the sublinkage->link[].name field. We assume that
825
+ * link_array[].name have already been filled in. As above, in the
826
+ * standard case, the name is just the GCD of the two end points.
827
+ * If pluralization has occurred, then we want to use the name
828
+ * already in link_array[].name. We detect this in two ways.
829
+ * If the endpoints don't match, then we know pluralization
830
+ * has occured. If they do, but the name in link_array[].name
831
+ * is *less* restrictive, then pluralization must have occured.
832
+ */
833
+ static void compute_pp_link_names(Sentence sent, Sublinkage *sublinkage)
834
+ {
835
+ int i;
836
+ const char * s;
837
+ Parse_info pi = sent->parse_info;
838
+
839
+ for (i = 0; i < pi->N_links; i++)
840
+ {
841
+ if (sublinkage->link[i]->l == -1) continue;
842
+ /* NULL's here are quite unexpected -- I think there's a bug
843
+ * elsewhere in the code. But for now, punt. Here's a sentence
844
+ * that triggers a NULL -- "His convalescence was relatively brief
845
+ * and he was able to return and fight at The Wilderness,
846
+ * Spotsylvania and Cold Harbor."
847
+ */
848
+ if (NULL == sublinkage->link[i]->lc) continue;
849
+ if (NULL == sublinkage->link[i]->rc) continue;
850
+ if (!x_match(sent, sublinkage->link[i]->lc, sublinkage->link[i]->rc))
851
+ {
852
+ replace_link_name(sublinkage->link[i], pi->link_array[i].name);
853
+ }
854
+ else
855
+ {
856
+ s = intersect_strings(sent,
857
+ connector_get_string(sublinkage->link[i]->lc),
858
+ connector_get_string(sublinkage->link[i]->rc));
859
+
860
+ if (strictly_smaller_name(s, pi->link_array[i].name))
861
+ replace_link_name(sublinkage->link[i], pi->link_array[i].name);
862
+ else
863
+ replace_link_name(sublinkage->link[i], s);
864
+ }
865
+ }
866
+ }
867
+
868
+ /********************** exported functions *****************************/
869
+
870
+ void init_analyze(Sentence s)
871
+ {
872
+ analyze_context_t *actx = s->analyze_ctxt;
873
+
874
+ if (NULL == actx)
875
+ {
876
+ actx = (analyze_context_t *) malloc (sizeof(analyze_context_t));
877
+ s->analyze_ctxt = actx;
878
+ }
879
+
880
+ actx->structure_violation = FALSE;
881
+ }
882
+
883
+ void free_analyze(Sentence s)
884
+ {
885
+ if (s->analyze_ctxt != NULL) free(s->analyze_ctxt);
886
+ s->analyze_ctxt = NULL;
887
+ }
888
+
889
+ /**
890
+ * This uses link_array. It enumerates and post-processes
891
+ * all the linkages represented by this one. We know this contains
892
+ * at least one fat link.
893
+ */
894
+ Linkage_info analyze_fat_linkage(Sentence sent, Parse_Options opts, int analyze_pass)
895
+ {
896
+ int i;
897
+ Linkage_info li;
898
+ DIS_node *d_root;
899
+ PP_node *pp;
900
+ Postprocessor *postprocessor;
901
+ Sublinkage *sublinkage;
902
+ Parse_info pi = sent->parse_info;
903
+ PP_node accum; /* for domain ancestry check */
904
+ D_type_list * dtl0, * dtl1; /* for domain ancestry check */
905
+
906
+ analyze_context_t *actx = sent->analyze_ctxt;
907
+
908
+ sublinkage = x_create_sublinkage(pi);
909
+ postprocessor = sent->dict->postprocessor;
910
+ build_digraph(actx, pi);
911
+ actx->structure_violation = FALSE;
912
+ d_root = build_DIS_CON_tree(actx, pi); /* may set structure_violation to TRUE */
913
+
914
+ memset(&li, 0, sizeof(li));
915
+ li.N_violations = 0;
916
+ li.improper_fat_linkage = actx->structure_violation;
917
+ li.inconsistent_domains = FALSE;
918
+ li.unused_word_cost = unused_word_cost(sent->parse_info);
919
+ if (opts->use_sat_solver)
920
+ {
921
+ li.disjunct_cost = 0.0;
922
+ }
923
+ else
924
+ {
925
+ li.disjunct_cost = disjunct_cost(pi);
926
+ }
927
+ li.null_cost = null_cost(pi);
928
+ li.link_cost = link_cost(pi);
929
+ li.corpus_cost = -1.0f;
930
+ li.and_cost = 0;
931
+ li.andlist = NULL;
932
+
933
+ if (actx->structure_violation)
934
+ {
935
+ li.N_violations++;
936
+ free_sublinkage(sublinkage);
937
+ free_digraph(actx, pi);
938
+ free_DIS_tree(d_root);
939
+ for (i = 0; i < pi->N_links; i++)
940
+ {
941
+ pi->link_array[i].name = "";
942
+ }
943
+ return li;
944
+ }
945
+
946
+ if (analyze_pass == PP_SECOND_PASS)
947
+ {
948
+ li.andlist = build_andlist(actx, sent);
949
+ li.and_cost = 0;
950
+ if (li.andlist) li.and_cost = li.andlist->cost;
951
+ }
952
+ else li.and_cost = 0;
953
+
954
+ compute_link_names(sent);
955
+
956
+ for (i=0; i<pi->N_links; i++) accum.d_type_array[i] = NULL;
957
+
958
+ /* loop through all the sub linkages */
959
+ for (;;)
960
+ {
961
+ for (i=0; i<pi->N_links; i++)
962
+ {
963
+ actx->patch_array[i].used = actx->patch_array[i].changed = FALSE;
964
+ actx->patch_array[i].newl = pi->link_array[i].l;
965
+ actx->patch_array[i].newr = pi->link_array[i].r;
966
+ copy_full_link(&sublinkage->link[i], &(pi->link_array[i]));
967
+ }
968
+ fill_patch_array_DIS(actx, d_root, NULL);
969
+
970
+ for (i=0; i<pi->N_links; i++)
971
+ {
972
+ if (actx->patch_array[i].changed || actx->patch_array[i].used)
973
+ {
974
+ sublinkage->link[i]->l = actx->patch_array[i].newl;
975
+ sublinkage->link[i]->r = actx->patch_array[i].newr;
976
+ }
977
+ else if ((actx->dfs_root_word[pi->link_array[i].l] != -1) &&
978
+ (actx->dfs_root_word[pi->link_array[i].r] != -1))
979
+ {
980
+ sublinkage->link[i]->l = -1;
981
+ }
982
+ }
983
+
984
+ if (0 == opts->use_sat_solver)
985
+ {
986
+ compute_pp_link_array_connectors(sent, sublinkage);
987
+ compute_pp_link_names(sent, sublinkage);
988
+ }
989
+
990
+ /* 'analyze_pass' logic added ALB 1/97 */
991
+ if (analyze_pass==PP_FIRST_PASS) {
992
+ post_process_scan_linkage(postprocessor,opts,sent,sublinkage);
993
+ if (!advance_DIS(d_root)) break;
994
+ else continue;
995
+ }
996
+
997
+ pp = post_process(postprocessor, opts, sent, sublinkage, TRUE);
998
+
999
+ if (pp==NULL) {
1000
+ if (postprocessor != NULL) li.N_violations = 1;
1001
+ }
1002
+ else if (pp->violation == NULL) {
1003
+ /* the purpose of this stuff is to make sure the domain
1004
+ ancestry for a link in each of its sentences is consistent. */
1005
+
1006
+ for (i=0; i<pi->N_links; i++) {
1007
+ if (sublinkage->link[i]->l == -1) continue;
1008
+ if (accum.d_type_array[i] == NULL) {
1009
+ accum.d_type_array[i] = copy_d_type(pp->d_type_array[i]);
1010
+ } else {
1011
+ dtl0 = pp->d_type_array[i];
1012
+ dtl1 = accum.d_type_array[i];
1013
+ while((dtl0 != NULL) && (dtl1 != NULL) && (dtl0->type == dtl1->type)) {
1014
+ dtl0 = dtl0->next;
1015
+ dtl1 = dtl1->next;
1016
+ }
1017
+ if ((dtl0 != NULL) || (dtl1 != NULL)) break;
1018
+ }
1019
+ }
1020
+ if (i != pi->N_links) {
1021
+ li.N_violations++;
1022
+ li.inconsistent_domains = TRUE;
1023
+ }
1024
+ }
1025
+ else if (pp->violation!=NULL) {
1026
+ li.N_violations++;
1027
+ }
1028
+
1029
+ if (!advance_DIS(d_root)) break;
1030
+ }
1031
+
1032
+ for (i=0; i<pi->N_links; ++i) {
1033
+ free_d_type(accum.d_type_array[i]);
1034
+ }
1035
+
1036
+ /* if (display_on && (li.N_violations != 0) &&
1037
+ (verbosity > 3) && should_print_messages)
1038
+ printf("P.P. violation in one part of conjunction.\n"); */
1039
+ free_sublinkage(sublinkage);
1040
+ free_digraph(actx, pi);
1041
+ free_DIS_tree(d_root);
1042
+ return li;
1043
+ }
1044
+
1045
+ /**
1046
+ * This uses link_array. It post-processes
1047
+ * this linkage, and prints the appropriate thing. There are no fat
1048
+ * links in it.
1049
+ */
1050
+ Linkage_info analyze_thin_linkage(Sentence sent, Parse_Options opts, int analyze_pass)
1051
+ {
1052
+ int i;
1053
+ Linkage_info li;
1054
+ PP_node * pp;
1055
+ Postprocessor * postprocessor;
1056
+ Sublinkage *sublinkage;
1057
+ Parse_info pi = sent->parse_info;
1058
+ analyze_context_t *actx = sent->analyze_ctxt;
1059
+
1060
+ sublinkage = x_create_sublinkage(pi);
1061
+ postprocessor = sent->dict->postprocessor;
1062
+
1063
+ compute_link_names(sent);
1064
+ for (i=0; i<pi->N_links; i++)
1065
+ {
1066
+ copy_full_link(&(sublinkage->link[i]), &(pi->link_array[i]));
1067
+ }
1068
+
1069
+ if (analyze_pass == PP_FIRST_PASS)
1070
+ {
1071
+ post_process_scan_linkage(postprocessor, opts, sent, sublinkage);
1072
+ free_sublinkage(sublinkage);
1073
+ memset(&li, 0, sizeof(li));
1074
+ return li;
1075
+ }
1076
+
1077
+ build_digraph(actx, pi);
1078
+
1079
+ /* The code below can be used to generate the "islands" array.
1080
+ * For this to work, however, you have to call "build_digraph"
1081
+ * first (as in analyze_fat_linkage). and then "free_digraph".
1082
+ */
1083
+ pp = post_process(postprocessor, opts, sent, sublinkage, TRUE);
1084
+
1085
+ memset(&li, 0, sizeof(li));
1086
+ li.N_violations = 0;
1087
+ li.and_cost = 0;
1088
+ li.unused_word_cost = unused_word_cost(sent->parse_info);
1089
+ li.improper_fat_linkage = FALSE;
1090
+ li.inconsistent_domains = FALSE;
1091
+ if (opts->use_sat_solver)
1092
+ {
1093
+ li.disjunct_cost = 0.0;
1094
+ }
1095
+ else
1096
+ {
1097
+ li.disjunct_cost = disjunct_cost(pi);
1098
+ }
1099
+ li.null_cost = null_cost(pi);
1100
+ li.link_cost = link_cost(pi);
1101
+ li.corpus_cost = -1.0f;
1102
+ li.andlist = NULL;
1103
+
1104
+ if (pp == NULL)
1105
+ {
1106
+ if (postprocessor != NULL) li.N_violations = 1;
1107
+ }
1108
+ else if (pp->violation != NULL)
1109
+ {
1110
+ li.N_violations++;
1111
+ }
1112
+
1113
+ free_sublinkage(sublinkage);
1114
+ free_digraph(actx, pi);
1115
+ return li;
1116
+ }
1117
+
1118
+ void extract_thin_linkage(Sentence sent, Parse_Options opts, Linkage linkage)
1119
+ {
1120
+ int i;
1121
+ Parse_info pi = sent->parse_info;
1122
+
1123
+ linkage->num_sublinkages = 1;
1124
+ linkage->sublinkage = ex_create_sublinkage(pi);
1125
+
1126
+ compute_link_names(sent);
1127
+ for (i=0; i<pi->N_links; ++i)
1128
+ {
1129
+ linkage->sublinkage->link[i] = excopy_link(&(pi->link_array[i]));
1130
+ }
1131
+ }
1132
+
1133
+ #ifdef DBG
1134
+ static void prt_lol(Sentence sent , List_o_links *lol)
1135
+ {
1136
+ /* It appears that the list of links is always even in length:
1137
+ * The head word first, followed by a modifier.
1138
+ */
1139
+ while (lol)
1140
+ {
1141
+ // printf ("%d ", lol->link);
1142
+ printf ("%s ", sent->word[lol->word].string);
1143
+ lol = lol->next;
1144
+ }
1145
+ }
1146
+
1147
+ static void prt_con_list(Sentence, CON_list *);
1148
+ static void prt_dis_list(Sentence sent, DIS_list *dis)
1149
+ {
1150
+ while(dis)
1151
+ {
1152
+ /* There are three possibilities:
1153
+ * Either there's another conjunction (and we should print it)
1154
+ * Or there's a head word, with its modifiers in its list-o-links,
1155
+ * Or there's just the bare, naked word by itself.
1156
+ */
1157
+ if (dis->dn->cl)
1158
+ {
1159
+ prt_con_list(sent, dis->dn->cl);
1160
+ }
1161
+ else if (dis->dn->lol)
1162
+ {
1163
+ printf("[");
1164
+ prt_lol(sent, dis->dn->lol);
1165
+ printf("]");
1166
+ }
1167
+ else
1168
+ {
1169
+ int wd = dis->dn->word;
1170
+ printf("%s ", sent->word[wd].string);
1171
+ }
1172
+ dis = dis->next;
1173
+ }
1174
+ }
1175
+
1176
+ static void prt_con_list(Sentence sent, CON_list *con)
1177
+ {
1178
+ while(con)
1179
+ {
1180
+ int wd = con->cn->word;
1181
+ printf("(%s ", sent->word[wd].string);
1182
+ prt_dis_list(sent, con->cn->dl);
1183
+ printf(") ");
1184
+ con = con->next;
1185
+ }
1186
+ }
1187
+ static void prt_dis_con_tree(Sentence sent, DIS_node *dis)
1188
+ {
1189
+ prt_con_list(sent, dis->cl);
1190
+ printf ("\n");
1191
+ }
1192
+ #else
1193
+ static inline void prt_dis_con_tree(Sentence sent, DIS_node *dis) {}
1194
+ #endif
1195
+
1196
+ /**
1197
+ * This procedure mimics analyze_fat_linkage in order to
1198
+ * extract the sublinkages and copy them to the Linkage
1199
+ * data structure passed in.
1200
+ */
1201
+ void extract_fat_linkage(Sentence sent, Parse_Options opts, Linkage linkage)
1202
+ {
1203
+ int i, j, N_thin_links;
1204
+ DIS_node *d_root;
1205
+ int num_sublinkages;
1206
+ Sublinkage * sublinkage;
1207
+ Parse_info pi = sent->parse_info;
1208
+
1209
+ analyze_context_t *actx = sent->analyze_ctxt;
1210
+
1211
+ build_digraph(actx, pi);
1212
+ actx->structure_violation = FALSE;
1213
+ d_root = build_DIS_CON_tree(actx, pi);
1214
+
1215
+ if (actx->structure_violation)
1216
+ {
1217
+ compute_link_names(sent);
1218
+ linkage->num_sublinkages=1;
1219
+ linkage->sublinkage = ex_create_sublinkage(pi);
1220
+
1221
+ /* This will have fat links! */
1222
+ for (i=0; i<pi->N_links; ++i)
1223
+ {
1224
+ linkage->sublinkage->link[i] = excopy_link(&(pi->link_array[i]));
1225
+ }
1226
+
1227
+ free_digraph(actx, pi);
1228
+ free_DIS_tree(d_root);
1229
+ return;
1230
+ }
1231
+
1232
+ /* first get number of sublinkages and allocate space */
1233
+ num_sublinkages = 0;
1234
+ for (;;) {
1235
+ num_sublinkages++;
1236
+ if (!advance_DIS(d_root)) break;
1237
+ }
1238
+
1239
+ linkage->num_sublinkages = num_sublinkages;
1240
+ linkage->sublinkage =
1241
+ (Sublinkage *) exalloc(sizeof(Sublinkage)*num_sublinkages);
1242
+ for (i=0; i<num_sublinkages; ++i) {
1243
+ linkage->sublinkage[i].link = NULL;
1244
+ linkage->sublinkage[i].pp_info = NULL;
1245
+ linkage->sublinkage[i].violation = NULL;
1246
+ }
1247
+
1248
+ /* now fill out the sublinkage arrays */
1249
+ compute_link_names(sent);
1250
+
1251
+ sublinkage = x_create_sublinkage(pi);
1252
+ num_sublinkages = 0;
1253
+ for (;;)
1254
+ {
1255
+ for (i = 0; i < pi->N_links; i++)
1256
+ {
1257
+ actx->patch_array[i].used = actx->patch_array[i].changed = FALSE;
1258
+ actx->patch_array[i].newl = pi->link_array[i].l;
1259
+ actx->patch_array[i].newr = pi->link_array[i].r;
1260
+ copy_full_link(&sublinkage->link[i], &(pi->link_array[i]));
1261
+ }
1262
+ fill_patch_array_DIS(actx, d_root, NULL);
1263
+
1264
+ for (i = 0; i < pi->N_links; i++)
1265
+ {
1266
+ if (actx->patch_array[i].changed || actx->patch_array[i].used)
1267
+ {
1268
+ sublinkage->link[i]->l = actx->patch_array[i].newl;
1269
+ sublinkage->link[i]->r = actx->patch_array[i].newr;
1270
+ }
1271
+ else if ((actx->dfs_root_word[pi->link_array[i].l] != -1) &&
1272
+ (actx->dfs_root_word[pi->link_array[i].r] != -1))
1273
+ {
1274
+ sublinkage->link[i]->l = -1;
1275
+ }
1276
+ }
1277
+
1278
+ if (0 == opts->use_sat_solver)
1279
+ {
1280
+ compute_pp_link_array_connectors(sent, sublinkage);
1281
+ compute_pp_link_names(sent, sublinkage);
1282
+ }
1283
+
1284
+ /* Don't copy the fat links into the linkage */
1285
+ N_thin_links = 0;
1286
+ for (i = 0; i < pi->N_links; ++i)
1287
+ {
1288
+ if (sublinkage->link[i]->l == -1) continue;
1289
+ N_thin_links++;
1290
+ }
1291
+
1292
+ linkage->sublinkage[num_sublinkages].num_links = N_thin_links;
1293
+ linkage->sublinkage[num_sublinkages].link =
1294
+ (Link **) exalloc(sizeof(Link *)*N_thin_links);
1295
+ linkage->sublinkage[num_sublinkages].pp_info = NULL;
1296
+ linkage->sublinkage[num_sublinkages].violation = NULL;
1297
+
1298
+ for (i = 0, j = 0; i < pi->N_links; ++i)
1299
+ {
1300
+ if (sublinkage->link[i]->l == -1) continue;
1301
+ linkage->sublinkage[num_sublinkages].link[j++] =
1302
+ excopy_link(sublinkage->link[i]);
1303
+ }
1304
+
1305
+ num_sublinkages++;
1306
+ if (!advance_DIS(d_root)) break;
1307
+ }
1308
+
1309
+ free_sublinkage(sublinkage);
1310
+ free_digraph(actx, pi);
1311
+ if (linkage->dis_con_tree)
1312
+ free_DIS_tree(linkage->dis_con_tree);
1313
+ linkage->dis_con_tree = d_root;
1314
+
1315
+ prt_dis_con_tree(sent, d_root);
1316
+ }
1317
+