grammar_police 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (345) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +2 -0
  5. data/c/.DS_Store +0 -0
  6. data/c/link-grammar.c +65 -0
  7. data/c/link-grammar.h +60 -0
  8. data/c/link-grammar.o +0 -0
  9. data/c/link-grammar.so +0 -0
  10. data/c/link-grammar/.DS_Store +0 -0
  11. data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
  12. data/c/link-grammar/.deps/and.Plo +202 -0
  13. data/c/link-grammar/.deps/api.Plo +244 -0
  14. data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
  15. data/c/link-grammar/.deps/command-line.Plo +201 -0
  16. data/c/link-grammar/.deps/constituents.Plo +201 -0
  17. data/c/link-grammar/.deps/count.Plo +202 -0
  18. data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
  19. data/c/link-grammar/.deps/disjuncts.Plo +123 -0
  20. data/c/link-grammar/.deps/error.Plo +121 -0
  21. data/c/link-grammar/.deps/expand.Plo +133 -0
  22. data/c/link-grammar/.deps/extract-links.Plo +198 -0
  23. data/c/link-grammar/.deps/fast-match.Plo +200 -0
  24. data/c/link-grammar/.deps/idiom.Plo +200 -0
  25. data/c/link-grammar/.deps/jni-client.Plo +217 -0
  26. data/c/link-grammar/.deps/link-parser.Po +1 -0
  27. data/c/link-grammar/.deps/massage.Plo +202 -0
  28. data/c/link-grammar/.deps/post-process.Plo +202 -0
  29. data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
  30. data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
  31. data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
  32. data/c/link-grammar/.deps/prefix.Plo +102 -0
  33. data/c/link-grammar/.deps/preparation.Plo +202 -0
  34. data/c/link-grammar/.deps/print-util.Plo +200 -0
  35. data/c/link-grammar/.deps/print.Plo +201 -0
  36. data/c/link-grammar/.deps/prune.Plo +202 -0
  37. data/c/link-grammar/.deps/read-dict.Plo +223 -0
  38. data/c/link-grammar/.deps/read-regex.Plo +123 -0
  39. data/c/link-grammar/.deps/regex-morph.Plo +131 -0
  40. data/c/link-grammar/.deps/resources.Plo +203 -0
  41. data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  42. data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  43. data/c/link-grammar/.deps/string-set.Plo +198 -0
  44. data/c/link-grammar/.deps/tokenize.Plo +160 -0
  45. data/c/link-grammar/.deps/utilities.Plo +222 -0
  46. data/c/link-grammar/.deps/word-file.Plo +201 -0
  47. data/c/link-grammar/.deps/word-utils.Plo +212 -0
  48. data/c/link-grammar/.libs/analyze-linkage.o +0 -0
  49. data/c/link-grammar/.libs/and.o +0 -0
  50. data/c/link-grammar/.libs/api.o +0 -0
  51. data/c/link-grammar/.libs/build-disjuncts.o +0 -0
  52. data/c/link-grammar/.libs/command-line.o +0 -0
  53. data/c/link-grammar/.libs/constituents.o +0 -0
  54. data/c/link-grammar/.libs/count.o +0 -0
  55. data/c/link-grammar/.libs/disjunct-utils.o +0 -0
  56. data/c/link-grammar/.libs/disjuncts.o +0 -0
  57. data/c/link-grammar/.libs/error.o +0 -0
  58. data/c/link-grammar/.libs/expand.o +0 -0
  59. data/c/link-grammar/.libs/extract-links.o +0 -0
  60. data/c/link-grammar/.libs/fast-match.o +0 -0
  61. data/c/link-grammar/.libs/idiom.o +0 -0
  62. data/c/link-grammar/.libs/jni-client.o +0 -0
  63. data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  64. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  65. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  66. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  67. data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
  68. data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  69. data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  70. data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  71. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  72. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  73. data/c/link-grammar/.libs/liblink-grammar.a +0 -0
  74. data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
  75. data/c/link-grammar/.libs/liblink-grammar.la +41 -0
  76. data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
  77. data/c/link-grammar/.libs/massage.o +0 -0
  78. data/c/link-grammar/.libs/post-process.o +0 -0
  79. data/c/link-grammar/.libs/pp_knowledge.o +0 -0
  80. data/c/link-grammar/.libs/pp_lexer.o +0 -0
  81. data/c/link-grammar/.libs/pp_linkset.o +0 -0
  82. data/c/link-grammar/.libs/prefix.o +0 -0
  83. data/c/link-grammar/.libs/preparation.o +0 -0
  84. data/c/link-grammar/.libs/print-util.o +0 -0
  85. data/c/link-grammar/.libs/print.o +0 -0
  86. data/c/link-grammar/.libs/prune.o +0 -0
  87. data/c/link-grammar/.libs/read-dict.o +0 -0
  88. data/c/link-grammar/.libs/read-regex.o +0 -0
  89. data/c/link-grammar/.libs/regex-morph.o +0 -0
  90. data/c/link-grammar/.libs/resources.o +0 -0
  91. data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
  92. data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
  93. data/c/link-grammar/.libs/string-set.o +0 -0
  94. data/c/link-grammar/.libs/tokenize.o +0 -0
  95. data/c/link-grammar/.libs/utilities.o +0 -0
  96. data/c/link-grammar/.libs/word-file.o +0 -0
  97. data/c/link-grammar/.libs/word-utils.o +0 -0
  98. data/c/link-grammar/Makefile +900 -0
  99. data/c/link-grammar/Makefile.am +202 -0
  100. data/c/link-grammar/Makefile.in +900 -0
  101. data/c/link-grammar/analyze-linkage.c +1317 -0
  102. data/c/link-grammar/analyze-linkage.h +24 -0
  103. data/c/link-grammar/and.c +1603 -0
  104. data/c/link-grammar/and.h +27 -0
  105. data/c/link-grammar/api-structures.h +362 -0
  106. data/c/link-grammar/api-types.h +72 -0
  107. data/c/link-grammar/api.c +1887 -0
  108. data/c/link-grammar/api.h +96 -0
  109. data/c/link-grammar/autoit/.DS_Store +0 -0
  110. data/c/link-grammar/autoit/README +10 -0
  111. data/c/link-grammar/autoit/_LGTest.au3 +22 -0
  112. data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  113. data/c/link-grammar/build-disjuncts.c +487 -0
  114. data/c/link-grammar/build-disjuncts.h +21 -0
  115. data/c/link-grammar/command-line.c +458 -0
  116. data/c/link-grammar/command-line.h +15 -0
  117. data/c/link-grammar/constituents.c +1836 -0
  118. data/c/link-grammar/constituents.h +26 -0
  119. data/c/link-grammar/corpus/.DS_Store +0 -0
  120. data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
  121. data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
  122. data/c/link-grammar/corpus/Makefile +527 -0
  123. data/c/link-grammar/corpus/Makefile.am +46 -0
  124. data/c/link-grammar/corpus/Makefile.in +527 -0
  125. data/c/link-grammar/corpus/README +17 -0
  126. data/c/link-grammar/corpus/cluster.c +286 -0
  127. data/c/link-grammar/corpus/cluster.h +32 -0
  128. data/c/link-grammar/corpus/corpus.c +483 -0
  129. data/c/link-grammar/corpus/corpus.h +46 -0
  130. data/c/link-grammar/count.c +828 -0
  131. data/c/link-grammar/count.h +25 -0
  132. data/c/link-grammar/disjunct-utils.c +261 -0
  133. data/c/link-grammar/disjunct-utils.h +27 -0
  134. data/c/link-grammar/disjuncts.c +138 -0
  135. data/c/link-grammar/disjuncts.h +13 -0
  136. data/c/link-grammar/error.c +92 -0
  137. data/c/link-grammar/error.h +35 -0
  138. data/c/link-grammar/expand.c +67 -0
  139. data/c/link-grammar/expand.h +13 -0
  140. data/c/link-grammar/externs.h +22 -0
  141. data/c/link-grammar/extract-links.c +625 -0
  142. data/c/link-grammar/extract-links.h +16 -0
  143. data/c/link-grammar/fast-match.c +309 -0
  144. data/c/link-grammar/fast-match.h +17 -0
  145. data/c/link-grammar/idiom.c +373 -0
  146. data/c/link-grammar/idiom.h +15 -0
  147. data/c/link-grammar/jni-client.c +779 -0
  148. data/c/link-grammar/jni-client.h +236 -0
  149. data/c/link-grammar/liblink-grammar-java.la +42 -0
  150. data/c/link-grammar/liblink-grammar.la +41 -0
  151. data/c/link-grammar/link-features.h +37 -0
  152. data/c/link-grammar/link-features.h.in +37 -0
  153. data/c/link-grammar/link-grammar-java.def +31 -0
  154. data/c/link-grammar/link-grammar.def +194 -0
  155. data/c/link-grammar/link-includes.h +465 -0
  156. data/c/link-grammar/link-parser.c +849 -0
  157. data/c/link-grammar/massage.c +329 -0
  158. data/c/link-grammar/massage.h +13 -0
  159. data/c/link-grammar/post-process.c +1113 -0
  160. data/c/link-grammar/post-process.h +45 -0
  161. data/c/link-grammar/pp_knowledge.c +376 -0
  162. data/c/link-grammar/pp_knowledge.h +14 -0
  163. data/c/link-grammar/pp_lexer.c +1920 -0
  164. data/c/link-grammar/pp_lexer.h +19 -0
  165. data/c/link-grammar/pp_linkset.c +158 -0
  166. data/c/link-grammar/pp_linkset.h +20 -0
  167. data/c/link-grammar/prefix.c +482 -0
  168. data/c/link-grammar/prefix.h +139 -0
  169. data/c/link-grammar/preparation.c +412 -0
  170. data/c/link-grammar/preparation.h +20 -0
  171. data/c/link-grammar/print-util.c +87 -0
  172. data/c/link-grammar/print-util.h +32 -0
  173. data/c/link-grammar/print.c +1085 -0
  174. data/c/link-grammar/print.h +16 -0
  175. data/c/link-grammar/prune.c +1864 -0
  176. data/c/link-grammar/prune.h +17 -0
  177. data/c/link-grammar/read-dict.c +1785 -0
  178. data/c/link-grammar/read-dict.h +29 -0
  179. data/c/link-grammar/read-regex.c +161 -0
  180. data/c/link-grammar/read-regex.h +12 -0
  181. data/c/link-grammar/regex-morph.c +126 -0
  182. data/c/link-grammar/regex-morph.h +17 -0
  183. data/c/link-grammar/resources.c +180 -0
  184. data/c/link-grammar/resources.h +23 -0
  185. data/c/link-grammar/sat-solver/.DS_Store +0 -0
  186. data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  187. data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  188. data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
  189. data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  190. data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  191. data/c/link-grammar/sat-solver/Makefile +527 -0
  192. data/c/link-grammar/sat-solver/Makefile.am +29 -0
  193. data/c/link-grammar/sat-solver/Makefile.in +527 -0
  194. data/c/link-grammar/sat-solver/clock.hpp +33 -0
  195. data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  196. data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  197. data/c/link-grammar/sat-solver/guiding.hpp +244 -0
  198. data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  199. data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  200. data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
  201. data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  202. data/c/link-grammar/sat-solver/trie.hpp +118 -0
  203. data/c/link-grammar/sat-solver/util.cpp +23 -0
  204. data/c/link-grammar/sat-solver/util.hpp +14 -0
  205. data/c/link-grammar/sat-solver/variables.cpp +5 -0
  206. data/c/link-grammar/sat-solver/variables.hpp +829 -0
  207. data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
  208. data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
  209. data/c/link-grammar/spellcheck-aspell.c +148 -0
  210. data/c/link-grammar/spellcheck-hun.c +136 -0
  211. data/c/link-grammar/spellcheck.h +34 -0
  212. data/c/link-grammar/string-set.c +169 -0
  213. data/c/link-grammar/string-set.h +16 -0
  214. data/c/link-grammar/structures.h +498 -0
  215. data/c/link-grammar/tokenize.c +1049 -0
  216. data/c/link-grammar/tokenize.h +15 -0
  217. data/c/link-grammar/utilities.c +847 -0
  218. data/c/link-grammar/utilities.h +281 -0
  219. data/c/link-grammar/word-file.c +124 -0
  220. data/c/link-grammar/word-file.h +15 -0
  221. data/c/link-grammar/word-utils.c +526 -0
  222. data/c/link-grammar/word-utils.h +152 -0
  223. data/data/.DS_Store +0 -0
  224. data/data/Makefile +511 -0
  225. data/data/Makefile.am +4 -0
  226. data/data/Makefile.in +511 -0
  227. data/data/de/.DS_Store +0 -0
  228. data/data/de/4.0.affix +7 -0
  229. data/data/de/4.0.dict +474 -0
  230. data/data/de/Makefile +387 -0
  231. data/data/de/Makefile.am +9 -0
  232. data/data/de/Makefile.in +387 -0
  233. data/data/en/.DS_Store +0 -0
  234. data/data/en/4.0.affix +26 -0
  235. data/data/en/4.0.batch +1002 -0
  236. data/data/en/4.0.biolg.batch +411 -0
  237. data/data/en/4.0.constituent-knowledge +127 -0
  238. data/data/en/4.0.dict +8759 -0
  239. data/data/en/4.0.dict.m4 +6928 -0
  240. data/data/en/4.0.enwiki.batch +14 -0
  241. data/data/en/4.0.fixes.batch +2776 -0
  242. data/data/en/4.0.knowledge +306 -0
  243. data/data/en/4.0.regex +225 -0
  244. data/data/en/4.0.voa.batch +114 -0
  245. data/data/en/Makefile +554 -0
  246. data/data/en/Makefile.am +19 -0
  247. data/data/en/Makefile.in +554 -0
  248. data/data/en/README +173 -0
  249. data/data/en/tiny.dict +157 -0
  250. data/data/en/words/.DS_Store +0 -0
  251. data/data/en/words/Makefile +456 -0
  252. data/data/en/words/Makefile.am +78 -0
  253. data/data/en/words/Makefile.in +456 -0
  254. data/data/en/words/currency +205 -0
  255. data/data/en/words/currency.p +28 -0
  256. data/data/en/words/entities.given-bisex.sing +39 -0
  257. data/data/en/words/entities.given-female.sing +4141 -0
  258. data/data/en/words/entities.given-male.sing +1633 -0
  259. data/data/en/words/entities.locations.sing +68 -0
  260. data/data/en/words/entities.national.sing +253 -0
  261. data/data/en/words/entities.organizations.sing +7 -0
  262. data/data/en/words/entities.us-states.sing +11 -0
  263. data/data/en/words/units.1 +45 -0
  264. data/data/en/words/units.1.dot +4 -0
  265. data/data/en/words/units.3 +2 -0
  266. data/data/en/words/units.4 +5 -0
  267. data/data/en/words/units.4.dot +1 -0
  268. data/data/en/words/words-medical.adv.1 +1191 -0
  269. data/data/en/words/words-medical.prep.1 +67 -0
  270. data/data/en/words/words-medical.v.4.1 +2835 -0
  271. data/data/en/words/words-medical.v.4.2 +2848 -0
  272. data/data/en/words/words-medical.v.4.3 +3011 -0
  273. data/data/en/words/words-medical.v.4.4 +3036 -0
  274. data/data/en/words/words-medical.v.4.5 +3050 -0
  275. data/data/en/words/words.adj.1 +6794 -0
  276. data/data/en/words/words.adj.2 +638 -0
  277. data/data/en/words/words.adj.3 +667 -0
  278. data/data/en/words/words.adv.1 +1573 -0
  279. data/data/en/words/words.adv.2 +67 -0
  280. data/data/en/words/words.adv.3 +157 -0
  281. data/data/en/words/words.adv.4 +80 -0
  282. data/data/en/words/words.n.1 +11464 -0
  283. data/data/en/words/words.n.1.wiki +264 -0
  284. data/data/en/words/words.n.2.s +2017 -0
  285. data/data/en/words/words.n.2.s.biolg +1 -0
  286. data/data/en/words/words.n.2.s.wiki +298 -0
  287. data/data/en/words/words.n.2.x +65 -0
  288. data/data/en/words/words.n.2.x.wiki +10 -0
  289. data/data/en/words/words.n.3 +5717 -0
  290. data/data/en/words/words.n.t +23 -0
  291. data/data/en/words/words.v.1.1 +1038 -0
  292. data/data/en/words/words.v.1.2 +1043 -0
  293. data/data/en/words/words.v.1.3 +1052 -0
  294. data/data/en/words/words.v.1.4 +1023 -0
  295. data/data/en/words/words.v.1.p +17 -0
  296. data/data/en/words/words.v.10.1 +14 -0
  297. data/data/en/words/words.v.10.2 +15 -0
  298. data/data/en/words/words.v.10.3 +88 -0
  299. data/data/en/words/words.v.10.4 +17 -0
  300. data/data/en/words/words.v.2.1 +1253 -0
  301. data/data/en/words/words.v.2.2 +1304 -0
  302. data/data/en/words/words.v.2.3 +1280 -0
  303. data/data/en/words/words.v.2.4 +1285 -0
  304. data/data/en/words/words.v.2.5 +1287 -0
  305. data/data/en/words/words.v.4.1 +2472 -0
  306. data/data/en/words/words.v.4.2 +2487 -0
  307. data/data/en/words/words.v.4.3 +2441 -0
  308. data/data/en/words/words.v.4.4 +2478 -0
  309. data/data/en/words/words.v.4.5 +2483 -0
  310. data/data/en/words/words.v.5.1 +98 -0
  311. data/data/en/words/words.v.5.2 +98 -0
  312. data/data/en/words/words.v.5.3 +103 -0
  313. data/data/en/words/words.v.5.4 +102 -0
  314. data/data/en/words/words.v.6.1 +388 -0
  315. data/data/en/words/words.v.6.2 +401 -0
  316. data/data/en/words/words.v.6.3 +397 -0
  317. data/data/en/words/words.v.6.4 +405 -0
  318. data/data/en/words/words.v.6.5 +401 -0
  319. data/data/en/words/words.v.8.1 +117 -0
  320. data/data/en/words/words.v.8.2 +118 -0
  321. data/data/en/words/words.v.8.3 +118 -0
  322. data/data/en/words/words.v.8.4 +119 -0
  323. data/data/en/words/words.v.8.5 +119 -0
  324. data/data/en/words/words.y +104 -0
  325. data/data/lt/.DS_Store +0 -0
  326. data/data/lt/4.0.affix +6 -0
  327. data/data/lt/4.0.constituent-knowledge +24 -0
  328. data/data/lt/4.0.dict +135 -0
  329. data/data/lt/4.0.knowledge +38 -0
  330. data/data/lt/Makefile +389 -0
  331. data/data/lt/Makefile.am +11 -0
  332. data/data/lt/Makefile.in +389 -0
  333. data/grammar_police.gemspec +23 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_police.rb +11 -0
  336. data/lib/grammar_police/.DS_Store +0 -0
  337. data/lib/grammar_police/dictionary.rb +30 -0
  338. data/lib/grammar_police/linkage.rb +26 -0
  339. data/lib/grammar_police/parse_options.rb +32 -0
  340. data/lib/grammar_police/sentence.rb +44 -0
  341. data/lib/grammar_police/version.rb +3 -0
  342. data/tests/.DS_Store +0 -0
  343. data/tests/count_linkages.rb +29 -0
  344. data/tests/sentences.txt +86 -0
  345. metadata +408 -0
@@ -0,0 +1,13 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2008, 2009 Linas Vepstas */
3
+ /* All rights reserved */
4
+ /* */
5
+ /* Use of the link grammar parsing system is subject to the terms of the */
6
+ /* license set forth in the LICENSE file included with this software, */
7
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
8
+ /* This license allows free redistribution and use in source and binary */
9
+ /* forms, with or without modification, subject to certain conditions. */
10
+ /* */
11
+ /*************************************************************************/
12
+
13
+ void lg_compute_disjunct_strings(Sentence, Linkage_info *);
@@ -0,0 +1,92 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #include <stdio.h>
15
+ #include <stdlib.h>
16
+ #include <string.h>
17
+ #include <stdarg.h>
18
+
19
+ #ifdef USE_PTHREADS
20
+ #include <pthread.h>
21
+ #endif
22
+
23
+ #include "error.h"
24
+ #include "structures.h"
25
+ #include "api-structures.h"
26
+
27
+ #ifdef _MSC_VER
28
+ #define DLLEXPORT __declspec(dllexport)
29
+ #else
30
+ #define DLLEXPORT
31
+ #endif
32
+
33
+ /* ============================================================ */
34
+
35
+ static void verr_msg(err_ctxt *ec, severity sev, const char *fmt, va_list args)
36
+ {
37
+ fprintf(stderr, "link-grammar: ");
38
+ vfprintf(stderr, fmt, args);
39
+
40
+ if ((Info != sev) && ec->sent != NULL)
41
+ {
42
+ int i;
43
+ fprintf(stderr, "\tFailing sentence was:\n\t");
44
+ for (i=0; i<ec->sent->length; i++)
45
+ {
46
+ fprintf(stderr, "%s ", ec->sent->word[i].string);
47
+ }
48
+ fprintf(stderr, "\n");
49
+ }
50
+ }
51
+
52
+ void err_msg(err_ctxt *ec, severity sev, const char *fmt, ...)
53
+ {
54
+ va_list args;
55
+ va_start(args, fmt);
56
+ verr_msg(ec, sev, fmt, args);
57
+ va_end(args);
58
+ }
59
+
60
+ void prt_error(const char *fmt, ...)
61
+ {
62
+ severity sev;
63
+ err_ctxt ec;
64
+ va_list args;
65
+
66
+ sev = Error;
67
+ if (0 == strncmp(fmt, "Fatal", 5)) sev = Fatal;
68
+ if (0 == strncmp(fmt, "Error:", 6)) sev = Error;
69
+ if (0 == strncmp(fmt, "Warn", 4)) sev = Warn;
70
+ if (0 == strncmp(fmt, "Info:", 5)) sev = Info;
71
+
72
+ ec.sent = NULL;
73
+ va_start(args, fmt);
74
+ verr_msg(&ec, sev, fmt, args);
75
+ va_end(args);
76
+ }
77
+
78
+ /* ============================================================ */
79
+ /* These are deprecated, obsolete, and unused, but are still here
80
+ * because these are exported in the public API. Do not use these.
81
+ */
82
+ DLLEXPORT int lperrno = 0;
83
+ DLLEXPORT char lperrmsg[1];
84
+
85
+ extern void lperror_clear(void);
86
+ void lperror_clear(void)
87
+ {
88
+ lperrmsg[0] = 0x0;
89
+ lperrno = 0;
90
+ }
91
+
92
+ /* ============================================================ */
@@ -0,0 +1,35 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ #ifndef _LINK_GRAMMAR_ERROR_H_
14
+ #define _LINK_GRAMMAR_ERROR_H_
15
+
16
+ #include "link-includes.h"
17
+
18
+ typedef struct
19
+ {
20
+ Sentence sent;
21
+ } err_ctxt;
22
+
23
+ typedef enum
24
+ {
25
+ Fatal = 1,
26
+ Error,
27
+ Warn,
28
+ Info,
29
+ Debug
30
+ } severity;
31
+
32
+ void err_msg(err_ctxt *, severity, const char *fmt, ...) GNUC_PRINTF(3,4);
33
+
34
+ #endif
35
+
@@ -0,0 +1,67 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2009 Linas Vepstas */
3
+ /* All rights reserved */
4
+ /* */
5
+ /* Use of the link grammar parsing system is subject to the terms of the */
6
+ /* license set forth in the LICENSE file included with this software, */
7
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
8
+ /* This license allows free redistribution and use in source and binary */
9
+ /* forms, with or without modification, subject to certain conditions. */
10
+ /* */
11
+ /*************************************************************************/
12
+ /*
13
+ * expand.c
14
+ *
15
+ * Enlarge the range of possible disjunct to consider while parsing.
16
+ */
17
+
18
+ #include "api-structures.h"
19
+ #include "expand.h"
20
+ #include "disjunct-utils.h"
21
+ #include "word-utils.h"
22
+ #include "corpus/cluster.h"
23
+
24
+ /* ========================================================= */
25
+
26
+ static Disjunct * build_expansion_disjuncts(Cluster *clu, X_node *x)
27
+ {
28
+ Disjunct *dj;
29
+ dj = lg_cluster_get_disjuncts(clu, x->string);
30
+ if (dj) printf("Expanded %s \n", x->string);
31
+ return dj;
32
+ }
33
+
34
+ /**
35
+ * Increase the number of disjuncts associated to each word in the
36
+ * sentence by working with word-clusters. Return true if the number
37
+ * of disjuncts were expanded, else return false.
38
+ */
39
+ int lg_expand_disjunct_list(Sentence sent)
40
+ {
41
+ int w;
42
+
43
+ Cluster *clu = lg_cluster_new();
44
+
45
+ int expanded = FALSE;
46
+ for (w = 0; w < sent->length; w++)
47
+ {
48
+ X_node * x;
49
+ Disjunct * d = sent->word[w].d;
50
+ for (x = sent->word[w].x; x != NULL; x = x->next)
51
+ {
52
+ Disjunct *dx = build_expansion_disjuncts(clu, x);
53
+ if (dx)
54
+ {
55
+ int cnt = count_disjuncts(d);
56
+ d = catenate_disjuncts(dx, d);
57
+ d = eliminate_duplicate_disjuncts(d);
58
+ if (cnt < count_disjuncts(d)) expanded = TRUE;
59
+ }
60
+ }
61
+ sent->word[w].d = d;
62
+ }
63
+ lg_cluster_delete(clu);
64
+
65
+ return expanded;
66
+ }
67
+
@@ -0,0 +1,13 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2009 Linas Vepstas */
3
+ /* All rights reserved */
4
+ /* */
5
+ /* Use of the link grammar parsing system is subject to the terms of the */
6
+ /* license set forth in the LICENSE file included with this software, */
7
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
8
+ /* This license allows free redistribution and use in source and binary */
9
+ /* forms, with or without modification, subject to certain conditions. */
10
+ /* */
11
+ /*************************************************************************/
12
+
13
+ int lg_expand_disjunct_list(Sentence sent);
@@ -0,0 +1,22 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ /* verbosity global is held in utilities.c */
15
+ extern int verbosity; /* the verbosity level for error messages */
16
+
17
+ /* size of random table for computing the
18
+ hash functions. must be a power of 2 */
19
+ #define RTSIZE 256
20
+
21
+ extern unsigned int randtable[RTSIZE]; /* random table for hashing */
22
+
@@ -0,0 +1,625 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* Copyright (c) 2010 Linas Vepstas */
5
+ /* All rights reserved */
6
+ /* */
7
+ /* Use of the link grammar parsing system is subject to the terms of the */
8
+ /* license set forth in the LICENSE file included with this software, */
9
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
10
+ /* This license allows free redistribution and use in source and binary */
11
+ /* forms, with or without modification, subject to certain conditions. */
12
+ /* */
13
+ /*************************************************************************/
14
+
15
+ #include "api.h"
16
+
17
+ /**
18
+ * The first thing we do is we build a data structure to represent the
19
+ * result of the entire parse search. There will be a set of nodes
20
+ * built for each call to the count() function that returned a non-zero
21
+ * value, AND which is part of a valid linkage. Each of these nodes
22
+ * represents a valid continuation, and contains pointers to two other
23
+ * sets (one for the left continuation and one for the right
24
+ * continuation).
25
+ */
26
+
27
+ static Parse_set * dummy_set(void)
28
+ {
29
+ static Parse_set ds;
30
+ ds.first = ds.current = NULL;
31
+ ds.count = 1;
32
+ return &ds;
33
+ }
34
+
35
+ /** Returns an empty set of parses */
36
+ static Parse_set * empty_set(void)
37
+ {
38
+ Parse_set *s;
39
+ s = (Parse_set *) xalloc(sizeof(Parse_set));
40
+ s->first = s->current = NULL;
41
+ s->count = 0;
42
+ return s;
43
+ }
44
+
45
+ static void free_set(Parse_set *s)
46
+ {
47
+ Parse_choice *p, *xp;
48
+ if (s == NULL) return;
49
+ for (p=s->first; p != NULL; p = xp) {
50
+ xp = p->next;
51
+ xfree((void *)p, sizeof(*p));
52
+ }
53
+ xfree((void *)s, sizeof(*s));
54
+ }
55
+
56
+ static Parse_choice * make_choice(Parse_set *lset, int llw, int lrw, Connector * llc, Connector * lrc,
57
+ Parse_set *rset, int rlw, int rrw, Connector * rlc, Connector * rrc,
58
+ Disjunct *ld, Disjunct *md, Disjunct *rd)
59
+ {
60
+ Parse_choice *pc;
61
+ pc = (Parse_choice *) xalloc(sizeof(*pc));
62
+ pc->next = NULL;
63
+ pc->set[0] = lset;
64
+ pc->link[0].l = llw;
65
+ pc->link[0].r = lrw;
66
+ pc->link[0].lc = llc;
67
+ pc->link[0].rc = lrc;
68
+ pc->set[1] = rset;
69
+ pc->link[1].l = rlw;
70
+ pc->link[1].r = rrw;
71
+ pc->link[1].lc = rlc;
72
+ pc->link[1].rc = rrc;
73
+ pc->ld = ld;
74
+ pc->md = md;
75
+ pc->rd = rd;
76
+ return pc;
77
+ }
78
+
79
+ /**
80
+ * Put this parse_choice into a given set. The current pointer is always
81
+ * left pointing to the end of the list.
82
+ */
83
+ static void put_choice_in_set(Parse_set *s, Parse_choice *pc)
84
+ {
85
+ if (s->first == NULL)
86
+ {
87
+ s->first = pc;
88
+ }
89
+ else
90
+ {
91
+ s->current->next = pc;
92
+ }
93
+ s->current = pc;
94
+ pc->next = NULL;
95
+ }
96
+
97
+ /**
98
+ * Allocate the parse info struct
99
+ *
100
+ * A piecewise exponential function determines the size of the hash
101
+ * table. Probably should make use of the actual number of disjuncts,
102
+ * rather than just the number of words.
103
+ */
104
+ Parse_info parse_info_new(int nwords)
105
+ {
106
+ int log2_table_size;
107
+ Parse_info pi;
108
+
109
+ pi = (Parse_info) xalloc(sizeof(struct Parse_info_struct));
110
+ memset(pi, 0, sizeof(struct Parse_info_struct));
111
+ pi->N_words = nwords;
112
+ pi->parse_set = NULL;
113
+
114
+ pi->chosen_disjuncts = (Disjunct **) xalloc(nwords * sizeof(Disjunct *));
115
+ memset(pi->chosen_disjuncts, 0, nwords * sizeof(Disjunct *));
116
+
117
+ pi->image_array = (Image_node **) xalloc(nwords * sizeof(Image_node *));
118
+ memset(pi->image_array, 0, nwords * sizeof(Image_node *));
119
+
120
+ pi->has_fat_down = (char *) xalloc(nwords * sizeof(Boolean));
121
+ memset(pi->has_fat_down, 0, nwords * sizeof(Boolean));
122
+
123
+ /* Alloc the x_table */
124
+ if (nwords >= 10) {
125
+ log2_table_size = 14;
126
+ } else if (nwords >= 4) {
127
+ log2_table_size = nwords;
128
+ } else {
129
+ log2_table_size = 4;
130
+ }
131
+ pi->log2_x_table_size = log2_table_size;
132
+ pi->x_table_size = (1 << log2_table_size);
133
+
134
+ /*printf("Allocating x_table of size %d\n", x_table_size);*/
135
+ pi->x_table = (X_table_connector**) xalloc(pi->x_table_size * sizeof(X_table_connector*));
136
+ memset(pi->x_table, 0, pi->x_table_size * sizeof(X_table_connector*));
137
+
138
+ return pi;
139
+ }
140
+
141
+ /**
142
+ * This is the function that should be used to free the set structure. Since
143
+ * it's a dag, a recursive free function won't work. Every time we create
144
+ * a set element, we put it in the hash table, so this is OK.
145
+ */
146
+ void free_parse_info(Parse_info pi)
147
+ {
148
+ int i, len;
149
+ X_table_connector *t, *x;
150
+
151
+ len = pi->N_words;
152
+ xfree(pi->chosen_disjuncts, len * sizeof(Disjunct *));
153
+ xfree(pi->image_array, len * sizeof(Image_node*));
154
+ xfree(pi->has_fat_down, len * sizeof(Boolean));
155
+
156
+ for (i=0; i<pi->x_table_size; i++)
157
+ {
158
+ for(t = pi->x_table[i]; t!= NULL; t=x)
159
+ {
160
+ x = t->next;
161
+ free_set(t->set);
162
+ xfree((void *) t, sizeof(X_table_connector));
163
+ }
164
+ }
165
+ pi->parse_set = NULL;
166
+
167
+ /*printf("Freeing x_table of size %d\n", x_table_size);*/
168
+ xfree((void *) pi->x_table, pi->x_table_size * sizeof(X_table_connector*));
169
+ pi->x_table_size = 0;
170
+ pi->x_table = NULL;
171
+
172
+ xfree((void *) pi, sizeof(struct Parse_info_struct));
173
+ }
174
+
175
+ /**
176
+ * Returns the pointer to this info, NULL if not there.
177
+ */
178
+ static X_table_connector * x_table_pointer(int lw, int rw, Connector *le, Connector *re,
179
+ int cost, Parse_info pi)
180
+ {
181
+ X_table_connector *t;
182
+ t = pi->x_table[pair_hash(pi->log2_x_table_size, lw, rw, le, re, cost)];
183
+ for (; t != NULL; t = t->next) {
184
+ if ((t->lw == lw) && (t->rw == rw) && (t->le == le) && (t->re == re) && (t->cost == cost)) return t;
185
+ }
186
+ return NULL;
187
+ }
188
+
189
+ #if DEAD_CODE
190
+ Parse_set * x_table_lookup(int lw, int rw, Connector *le, Connector *re,
191
+ int cost, Parse_info pi) {
192
+ /* returns the count for this quintuple if there, -1 otherwise */
193
+ X_table_connector *t = x_table_pointer(lw, rw, le, re, cost, pi);
194
+
195
+ if (t == NULL) return -1; else return t->set;
196
+ }
197
+ #endif
198
+
199
+ /**
200
+ * Stores the value in the x_table. Assumes it's not already there.
201
+ */
202
+ static X_table_connector * x_table_store(int lw, int rw, Connector *le, Connector *re,
203
+ int cost, Parse_set * set, Parse_info pi)
204
+ {
205
+ X_table_connector *t, *n;
206
+ int h;
207
+
208
+ n = (X_table_connector *) xalloc(sizeof(X_table_connector));
209
+ n->set = set;
210
+ n->lw = lw; n->rw = rw; n->le = le; n->re = re; n->cost = cost;
211
+ h = pair_hash(pi->log2_x_table_size, lw, rw, le, re, cost);
212
+ t = pi->x_table[h];
213
+ n->next = t;
214
+ pi->x_table[h] = n;
215
+ return n;
216
+ }
217
+
218
+ #ifdef UNUSED_FUNCTION
219
+ static void x_table_update(int lw, int rw, Connector *le, Connector *re,
220
+ int cost, Parse_set * set, Parse_info pi) {
221
+ /* Stores the value in the x_table. Unlike x_table_store, it assumes it's already there */
222
+ X_table_connector *t = x_table_pointer(lw, rw, le, re, cost, pi);
223
+
224
+ assert(t != NULL, "This entry is supposed to be in the x_table.");
225
+ t->set = set;
226
+ }
227
+ #endif
228
+
229
+
230
+ /**
231
+ * returns NULL if there are no ways to parse, or returns a pointer
232
+ * to a set structure representing all the ways to parse.
233
+ *
234
+ * This code is similar to code in count.c
235
+ * (grep for end_word in these files).
236
+ */
237
+ static Parse_set * parse_set(Sentence sent,
238
+ Disjunct *ld, Disjunct *rd, int lw, int rw,
239
+ Connector *le, Connector *re, int cost,
240
+ int islands_ok, Parse_info pi)
241
+ {
242
+ Disjunct * d, * dis;
243
+ int start_word, end_word, w;
244
+ int lcost, rcost, Lmatch, Rmatch;
245
+ int i, j;
246
+ Parse_set *ls[4], *rs[4], *lset, *rset;
247
+ Parse_choice * a_choice;
248
+
249
+ Match_node * m, *m1;
250
+ X_table_connector *xt;
251
+ s64 count;
252
+
253
+ assert(cost >= 0, "parse_set() called with cost < 0.");
254
+
255
+ count = table_lookup(sent, lw, rw, le, re, cost);
256
+
257
+ /*
258
+ assert(count >= 0, "parse_set() called on params that were not in the table.");
259
+ Actually, we can't assert this, because of the pseudocount technique that's
260
+ used in count(). It's not the case that every call to parse_set() has already
261
+ been put into the table.
262
+ */
263
+
264
+ if ((count == 0) || (count == -1)) return NULL;
265
+
266
+ xt = x_table_pointer(lw, rw, le, re, cost, pi);
267
+
268
+ if (xt != NULL) return xt->set; /* we've already computed it */
269
+
270
+ /* Start it out with the empty set of options. */
271
+ /* This entry must be updated before we return. */
272
+ xt = x_table_store(lw, rw, le, re, cost, empty_set(), pi);
273
+
274
+ xt->set->count = count; /* the count we already computed */
275
+ /* this count is non-zero */
276
+
277
+ if (rw == 1 + lw) return xt->set;
278
+
279
+ if ((le == NULL) && (re == NULL))
280
+ {
281
+ if (!islands_ok && (lw != -1)) return xt->set;
282
+
283
+ if (cost == 0) return xt->set;
284
+
285
+ w = lw + 1;
286
+ for (dis = sent->word[w].d; dis != NULL; dis = dis->next)
287
+ {
288
+ if (dis->left == NULL)
289
+ {
290
+ rs[0] = parse_set(sent, dis, NULL, w, rw, dis->right,
291
+ NULL, cost-1, islands_ok, pi);
292
+ if (rs[0] == NULL) continue;
293
+ a_choice = make_choice(dummy_set(), lw, w, NULL, NULL,
294
+ rs[0], w, rw, NULL, NULL,
295
+ NULL, NULL, NULL);
296
+ put_choice_in_set(xt->set, a_choice);
297
+ }
298
+ }
299
+ rs[0] = parse_set(sent, NULL, NULL, w, rw, NULL, NULL,
300
+ cost-1, islands_ok, pi);
301
+ if (rs[0] != NULL)
302
+ {
303
+ a_choice = make_choice(dummy_set(), lw, w, NULL, NULL,
304
+ rs[0], w, rw, NULL, NULL,
305
+ NULL, NULL, NULL);
306
+ put_choice_in_set(xt->set, a_choice);
307
+ }
308
+ return xt->set;
309
+ }
310
+
311
+ if (le == NULL)
312
+ {
313
+ start_word = lw + 1;
314
+ }
315
+ else
316
+ {
317
+ start_word = le->word;
318
+ }
319
+
320
+ if (re == NULL)
321
+ {
322
+ end_word = rw;
323
+ }
324
+ else
325
+ {
326
+ end_word = re->word + 1;
327
+ }
328
+
329
+ for (w = start_word; w < end_word; w++)
330
+ {
331
+ m1 = m = form_match_list(sent, w, le, lw, re, rw);
332
+ for (; m!=NULL; m=m->next)
333
+ {
334
+ d = m->d;
335
+ for (lcost = 0; lcost <= cost; lcost++)
336
+ {
337
+ rcost = cost-lcost;
338
+ /* now lcost and rcost are the costs we're assigning to
339
+ * those parts respectively */
340
+
341
+ /* Now, we determine if (based on table only) we can see that
342
+ the current range is not parsable. */
343
+
344
+ Lmatch = (le != NULL) && (d->left != NULL) && do_match(sent, le, d->left, lw, w);
345
+ Rmatch = (d->right != NULL) && (re != NULL) && do_match(sent, d->right, re, w, rw);
346
+ for (i=0; i<4; i++) {ls[i] = rs[i] = NULL;}
347
+ if (Lmatch)
348
+ {
349
+ ls[0] = parse_set(sent, ld, d, lw, w, le->next, d->left->next, lcost, islands_ok, pi);
350
+ if (le->multi) ls[1] = parse_set(sent, ld, d, lw, w, le, d->left->next, lcost, islands_ok, pi);
351
+ if (d->left->multi) ls[2] = parse_set(sent, ld, d, lw, w, le->next, d->left, lcost, islands_ok, pi);
352
+ if (le->multi && d->left->multi) ls[3] = parse_set(sent, ld, d, lw, w, le, d->left, lcost, islands_ok, pi);
353
+ }
354
+ if (Rmatch)
355
+ {
356
+ rs[0] = parse_set(sent, d, rd, w, rw, d->right->next, re->next, rcost, islands_ok, pi);
357
+ if (d->right->multi) rs[1] = parse_set(sent, d, rd, w,rw,d->right,re->next, rcost, islands_ok, pi);
358
+ if (re->multi) rs[2] = parse_set(sent, d, rd, w, rw, d->right->next, re, rcost, islands_ok, pi);
359
+ if (d->right->multi && re->multi) rs[3] = parse_set(sent, d, rd, w, rw, d->right, re, rcost, islands_ok, pi);
360
+ }
361
+
362
+ for (i=0; i<4; i++)
363
+ {
364
+ /* this ordering is probably not consistent with that
365
+ * needed to use list_links */
366
+ if (ls[i] == NULL) continue;
367
+ for (j=0; j<4; j++)
368
+ {
369
+ if (rs[j] == NULL) continue;
370
+ a_choice = make_choice(ls[i], lw, w, le, d->left,
371
+ rs[j], w, rw, d->right, re,
372
+ ld, d, rd);
373
+ put_choice_in_set(xt->set, a_choice);
374
+ }
375
+ }
376
+
377
+ if (ls[0] != NULL || ls[1] != NULL || ls[2] != NULL || ls[3] != NULL)
378
+ {
379
+ /* evaluate using the left match, but not the right */
380
+ rset = parse_set(sent, d, rd, w, rw, d->right, re, rcost, islands_ok, pi);
381
+ if (rset != NULL)
382
+ {
383
+ for (i=0; i<4; i++)
384
+ {
385
+ if (ls[i] == NULL) continue;
386
+ /* this ordering is probably not consistent with
387
+ * that needed to use list_links */
388
+ a_choice = make_choice(ls[i], lw, w, le, d->left,
389
+ rset, w, rw, NULL /* d->right */,
390
+ re, /* the NULL indicates no link*/
391
+ ld, d, rd);
392
+ put_choice_in_set(xt->set, a_choice);
393
+ }
394
+ }
395
+ }
396
+ if ((le == NULL) && (rs[0] != NULL ||
397
+ rs[1] != NULL || rs[2] != NULL || rs[3] != NULL))
398
+ {
399
+ /* evaluate using the right match, but not the left */
400
+ lset = parse_set(sent, ld, d, lw, w, le, d->left, lcost, islands_ok, pi);
401
+
402
+ if (lset != NULL)
403
+ {
404
+ for (i=0; i<4; i++)
405
+ {
406
+ if (rs[i] == NULL) continue;
407
+ /* this ordering is probably not consistent with
408
+ * that needed to use list_links */
409
+ a_choice = make_choice(lset, lw, w, NULL /* le */,
410
+ d->left, /* NULL indicates no link */
411
+ rs[i], w, rw, d->right, re,
412
+ ld, d, rd);
413
+ put_choice_in_set(xt->set, a_choice);
414
+ }
415
+ }
416
+ }
417
+ }
418
+ }
419
+ put_match_list(sent, m1);
420
+ }
421
+ xt->set->current = xt->set->first;
422
+ return xt->set;
423
+ }
424
+
425
+ /**
426
+ * return TRUE if and only if overflow in the number of parses occured.
427
+ * Use a 64-bit int for counting.
428
+ */
429
+ static int verify_set_node(Parse_set *set)
430
+ {
431
+ Parse_choice *pc;
432
+ s64 n;
433
+ if (set == NULL || set->first == NULL) return FALSE;
434
+ n = 0;
435
+ for (pc = set->first; pc != NULL; pc = pc->next)
436
+ {
437
+ n += pc->set[0]->count * pc->set[1]->count;
438
+ if (PARSE_NUM_OVERFLOW < n) return TRUE;
439
+ }
440
+ return FALSE;
441
+ }
442
+
443
+ static int verify_set(Parse_info pi)
444
+ {
445
+ int i;
446
+
447
+ assert(pi->x_table != NULL, "called verify_set when x_table==NULL");
448
+ for (i=0; i<pi->x_table_size; i++)
449
+ {
450
+ X_table_connector *t;
451
+ for(t = pi->x_table[i]; t != NULL; t = t->next)
452
+ {
453
+ if (verify_set_node(t->set)) return TRUE;
454
+ }
455
+ }
456
+ return FALSE;
457
+ }
458
+
459
+ /**
460
+ * This is the top level call that computes the whole parse_set. It
461
+ * points whole_set at the result. It creates the necessary hash
462
+ * table (x_table) which will be freed at the same time the
463
+ * whole_set is freed.
464
+ *
465
+ * It also assumes that count() has been run, and that hash table is
466
+ * filled with the values thus computed. Therefore this function
467
+ * must be structured just like parse() (the main function for
468
+ * count()).
469
+ *
470
+ * If the number of linkages gets huge, then the counts can overflow.
471
+ * We check if this has happened when verifying the parse set.
472
+ * This routine returns TRUE iff overflowed occurred.
473
+ */
474
+
475
+ int build_parse_set(Sentence sent, int cost, Parse_Options opts)
476
+ {
477
+ Parse_set * whole_set;
478
+
479
+ whole_set =
480
+ parse_set(sent, NULL, NULL, -1, sent->length, NULL, NULL, cost+1,
481
+ opts->islands_ok, sent->parse_info);
482
+
483
+ if ((whole_set != NULL) && (whole_set->current != NULL)) {
484
+ whole_set->current = whole_set->first;
485
+ }
486
+
487
+ sent->parse_info->parse_set = whole_set;
488
+
489
+ return verify_set(sent->parse_info);
490
+ }
491
+
492
+ static void initialize_links(Parse_info pi)
493
+ {
494
+ pi->N_links = 0;
495
+ memset(pi->chosen_disjuncts, 0, pi->N_words * sizeof(Disjunct *));
496
+ }
497
+
498
+ static void issue_link(Parse_info pi, Disjunct * ld, Disjunct * rd, Link link)
499
+ {
500
+ assert(pi->N_links <= MAX_LINKS-1, "Too many links");
501
+ pi->link_array[pi->N_links] = link;
502
+ pi->N_links++;
503
+
504
+ pi->chosen_disjuncts[link.l] = ld;
505
+ pi->chosen_disjuncts[link.r] = rd;
506
+ }
507
+
508
+ static void issue_links_for_choice(Parse_info pi, Parse_choice *pc)
509
+ {
510
+ if (pc->link[0].lc != NULL) { /* there is a link to generate */
511
+ issue_link(pi, pc->ld, pc->md, pc->link[0]);
512
+ }
513
+ if (pc->link[1].lc != NULL) {
514
+ issue_link(pi, pc->md, pc->rd, pc->link[1]);
515
+ }
516
+ }
517
+
518
+ #ifdef NOT_USED_ANYWHERE
519
+ static void build_current_linkage_recursive(Parse_info pi, Parse_set *set)
520
+ {
521
+ if (set == NULL) return;
522
+ if (set->current == NULL) return;
523
+
524
+ issue_links_for_choice(pi, set->current);
525
+ build_current_linkage_recursive(pi, set->current->set[0]);
526
+ build_current_linkage_recursive(pi, set->current->set[1]);
527
+ }
528
+
529
+ /**
530
+ * This function takes the "current" point in the given set and
531
+ * generates the linkage that it represents.
532
+ */
533
+ void build_current_linkage(Parse_info pi)
534
+ {
535
+ initialize_links(pi);
536
+ build_current_linkage_recursive(pi, pi->parse_set);
537
+ }
538
+
539
+ /**
540
+ * Advance the "current" linkage to the next one
541
+ * return 1 if there's a "carry" from this node,
542
+ * which indicates that the scan of this node has
543
+ * just been completed, and it's now back to it's
544
+ * starting state.
545
+ */
546
+ static int advance_linkage(Parse_info pi, Parse_set * set)
547
+ {
548
+ if (set == NULL) return 1; /* probably can't happen */
549
+ if (set->first == NULL) return 1; /* the empty set */
550
+ if (advance_linkage(pi, set->current->set[0]) == 1) {
551
+ if (advance_linkage(pi, set->current->set[1]) == 1) {
552
+ if (set->current->next == NULL) {
553
+ set->current = set->first;
554
+ return 1;
555
+ }
556
+ set->current = set->current->next;
557
+ }
558
+ }
559
+ return 0;
560
+ }
561
+
562
+ static void advance_parse_set(Parse_info pi)
563
+ {
564
+ advance_linkage(pi, pi->parse_set);
565
+ }
566
+ #endif
567
+
568
+ static void list_links(Parse_info pi, Parse_set * set, int index)
569
+ {
570
+ Parse_choice *pc;
571
+ s64 n;
572
+
573
+ if (set == NULL || set->first == NULL) return;
574
+ for (pc = set->first; pc != NULL; pc = pc->next) {
575
+ n = pc->set[0]->count * pc->set[1]->count;
576
+ if (index < n) break;
577
+ index -= n;
578
+ }
579
+ assert(pc != NULL, "walked off the end in list_links");
580
+ issue_links_for_choice(pi, pc);
581
+ list_links(pi, pc->set[0], index % pc->set[0]->count);
582
+ list_links(pi, pc->set[1], index / pc->set[0]->count);
583
+ }
584
+
585
+ static void list_random_links(Parse_info pi, Parse_set * set)
586
+ {
587
+ Parse_choice *pc;
588
+ int num_pc, new_index;
589
+
590
+ if (set == NULL || set->first == NULL) return;
591
+ num_pc = 0;
592
+ for (pc = set->first; pc != NULL; pc = pc->next) {
593
+ num_pc++;
594
+ }
595
+
596
+ new_index = rand_r(&pi->rand_state) % num_pc;
597
+
598
+ num_pc = 0;
599
+ for (pc = set->first; pc != NULL; pc = pc->next) {
600
+ if (new_index == num_pc) break;
601
+ num_pc++;
602
+ }
603
+
604
+ assert(pc != NULL, "Couldn't get a random parse choice");
605
+ issue_links_for_choice(pi, pc);
606
+ list_random_links(pi, pc->set[0]);
607
+ list_random_links(pi, pc->set[1]);
608
+ }
609
+
610
+ /**
611
+ * Generate the list of all links of the index'th parsing of the
612
+ * sentence. For this to work, you must have already called parse, and
613
+ * already built the whole_set.
614
+ */
615
+ void extract_links(int index, int cost, Parse_info pi)
616
+ {
617
+ initialize_links(pi);
618
+ pi->rand_state = index;
619
+ if (index < 0) {
620
+ list_random_links(pi, pi->parse_set);
621
+ }
622
+ else {
623
+ list_links(pi, pi->parse_set, index);
624
+ }
625
+ }