grammar_police 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (345) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +2 -0
  5. data/c/.DS_Store +0 -0
  6. data/c/link-grammar.c +65 -0
  7. data/c/link-grammar.h +60 -0
  8. data/c/link-grammar.o +0 -0
  9. data/c/link-grammar.so +0 -0
  10. data/c/link-grammar/.DS_Store +0 -0
  11. data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
  12. data/c/link-grammar/.deps/and.Plo +202 -0
  13. data/c/link-grammar/.deps/api.Plo +244 -0
  14. data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
  15. data/c/link-grammar/.deps/command-line.Plo +201 -0
  16. data/c/link-grammar/.deps/constituents.Plo +201 -0
  17. data/c/link-grammar/.deps/count.Plo +202 -0
  18. data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
  19. data/c/link-grammar/.deps/disjuncts.Plo +123 -0
  20. data/c/link-grammar/.deps/error.Plo +121 -0
  21. data/c/link-grammar/.deps/expand.Plo +133 -0
  22. data/c/link-grammar/.deps/extract-links.Plo +198 -0
  23. data/c/link-grammar/.deps/fast-match.Plo +200 -0
  24. data/c/link-grammar/.deps/idiom.Plo +200 -0
  25. data/c/link-grammar/.deps/jni-client.Plo +217 -0
  26. data/c/link-grammar/.deps/link-parser.Po +1 -0
  27. data/c/link-grammar/.deps/massage.Plo +202 -0
  28. data/c/link-grammar/.deps/post-process.Plo +202 -0
  29. data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
  30. data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
  31. data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
  32. data/c/link-grammar/.deps/prefix.Plo +102 -0
  33. data/c/link-grammar/.deps/preparation.Plo +202 -0
  34. data/c/link-grammar/.deps/print-util.Plo +200 -0
  35. data/c/link-grammar/.deps/print.Plo +201 -0
  36. data/c/link-grammar/.deps/prune.Plo +202 -0
  37. data/c/link-grammar/.deps/read-dict.Plo +223 -0
  38. data/c/link-grammar/.deps/read-regex.Plo +123 -0
  39. data/c/link-grammar/.deps/regex-morph.Plo +131 -0
  40. data/c/link-grammar/.deps/resources.Plo +203 -0
  41. data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  42. data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  43. data/c/link-grammar/.deps/string-set.Plo +198 -0
  44. data/c/link-grammar/.deps/tokenize.Plo +160 -0
  45. data/c/link-grammar/.deps/utilities.Plo +222 -0
  46. data/c/link-grammar/.deps/word-file.Plo +201 -0
  47. data/c/link-grammar/.deps/word-utils.Plo +212 -0
  48. data/c/link-grammar/.libs/analyze-linkage.o +0 -0
  49. data/c/link-grammar/.libs/and.o +0 -0
  50. data/c/link-grammar/.libs/api.o +0 -0
  51. data/c/link-grammar/.libs/build-disjuncts.o +0 -0
  52. data/c/link-grammar/.libs/command-line.o +0 -0
  53. data/c/link-grammar/.libs/constituents.o +0 -0
  54. data/c/link-grammar/.libs/count.o +0 -0
  55. data/c/link-grammar/.libs/disjunct-utils.o +0 -0
  56. data/c/link-grammar/.libs/disjuncts.o +0 -0
  57. data/c/link-grammar/.libs/error.o +0 -0
  58. data/c/link-grammar/.libs/expand.o +0 -0
  59. data/c/link-grammar/.libs/extract-links.o +0 -0
  60. data/c/link-grammar/.libs/fast-match.o +0 -0
  61. data/c/link-grammar/.libs/idiom.o +0 -0
  62. data/c/link-grammar/.libs/jni-client.o +0 -0
  63. data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  64. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  65. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  66. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  67. data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
  68. data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  69. data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  70. data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  71. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  72. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  73. data/c/link-grammar/.libs/liblink-grammar.a +0 -0
  74. data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
  75. data/c/link-grammar/.libs/liblink-grammar.la +41 -0
  76. data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
  77. data/c/link-grammar/.libs/massage.o +0 -0
  78. data/c/link-grammar/.libs/post-process.o +0 -0
  79. data/c/link-grammar/.libs/pp_knowledge.o +0 -0
  80. data/c/link-grammar/.libs/pp_lexer.o +0 -0
  81. data/c/link-grammar/.libs/pp_linkset.o +0 -0
  82. data/c/link-grammar/.libs/prefix.o +0 -0
  83. data/c/link-grammar/.libs/preparation.o +0 -0
  84. data/c/link-grammar/.libs/print-util.o +0 -0
  85. data/c/link-grammar/.libs/print.o +0 -0
  86. data/c/link-grammar/.libs/prune.o +0 -0
  87. data/c/link-grammar/.libs/read-dict.o +0 -0
  88. data/c/link-grammar/.libs/read-regex.o +0 -0
  89. data/c/link-grammar/.libs/regex-morph.o +0 -0
  90. data/c/link-grammar/.libs/resources.o +0 -0
  91. data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
  92. data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
  93. data/c/link-grammar/.libs/string-set.o +0 -0
  94. data/c/link-grammar/.libs/tokenize.o +0 -0
  95. data/c/link-grammar/.libs/utilities.o +0 -0
  96. data/c/link-grammar/.libs/word-file.o +0 -0
  97. data/c/link-grammar/.libs/word-utils.o +0 -0
  98. data/c/link-grammar/Makefile +900 -0
  99. data/c/link-grammar/Makefile.am +202 -0
  100. data/c/link-grammar/Makefile.in +900 -0
  101. data/c/link-grammar/analyze-linkage.c +1317 -0
  102. data/c/link-grammar/analyze-linkage.h +24 -0
  103. data/c/link-grammar/and.c +1603 -0
  104. data/c/link-grammar/and.h +27 -0
  105. data/c/link-grammar/api-structures.h +362 -0
  106. data/c/link-grammar/api-types.h +72 -0
  107. data/c/link-grammar/api.c +1887 -0
  108. data/c/link-grammar/api.h +96 -0
  109. data/c/link-grammar/autoit/.DS_Store +0 -0
  110. data/c/link-grammar/autoit/README +10 -0
  111. data/c/link-grammar/autoit/_LGTest.au3 +22 -0
  112. data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  113. data/c/link-grammar/build-disjuncts.c +487 -0
  114. data/c/link-grammar/build-disjuncts.h +21 -0
  115. data/c/link-grammar/command-line.c +458 -0
  116. data/c/link-grammar/command-line.h +15 -0
  117. data/c/link-grammar/constituents.c +1836 -0
  118. data/c/link-grammar/constituents.h +26 -0
  119. data/c/link-grammar/corpus/.DS_Store +0 -0
  120. data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
  121. data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
  122. data/c/link-grammar/corpus/Makefile +527 -0
  123. data/c/link-grammar/corpus/Makefile.am +46 -0
  124. data/c/link-grammar/corpus/Makefile.in +527 -0
  125. data/c/link-grammar/corpus/README +17 -0
  126. data/c/link-grammar/corpus/cluster.c +286 -0
  127. data/c/link-grammar/corpus/cluster.h +32 -0
  128. data/c/link-grammar/corpus/corpus.c +483 -0
  129. data/c/link-grammar/corpus/corpus.h +46 -0
  130. data/c/link-grammar/count.c +828 -0
  131. data/c/link-grammar/count.h +25 -0
  132. data/c/link-grammar/disjunct-utils.c +261 -0
  133. data/c/link-grammar/disjunct-utils.h +27 -0
  134. data/c/link-grammar/disjuncts.c +138 -0
  135. data/c/link-grammar/disjuncts.h +13 -0
  136. data/c/link-grammar/error.c +92 -0
  137. data/c/link-grammar/error.h +35 -0
  138. data/c/link-grammar/expand.c +67 -0
  139. data/c/link-grammar/expand.h +13 -0
  140. data/c/link-grammar/externs.h +22 -0
  141. data/c/link-grammar/extract-links.c +625 -0
  142. data/c/link-grammar/extract-links.h +16 -0
  143. data/c/link-grammar/fast-match.c +309 -0
  144. data/c/link-grammar/fast-match.h +17 -0
  145. data/c/link-grammar/idiom.c +373 -0
  146. data/c/link-grammar/idiom.h +15 -0
  147. data/c/link-grammar/jni-client.c +779 -0
  148. data/c/link-grammar/jni-client.h +236 -0
  149. data/c/link-grammar/liblink-grammar-java.la +42 -0
  150. data/c/link-grammar/liblink-grammar.la +41 -0
  151. data/c/link-grammar/link-features.h +37 -0
  152. data/c/link-grammar/link-features.h.in +37 -0
  153. data/c/link-grammar/link-grammar-java.def +31 -0
  154. data/c/link-grammar/link-grammar.def +194 -0
  155. data/c/link-grammar/link-includes.h +465 -0
  156. data/c/link-grammar/link-parser.c +849 -0
  157. data/c/link-grammar/massage.c +329 -0
  158. data/c/link-grammar/massage.h +13 -0
  159. data/c/link-grammar/post-process.c +1113 -0
  160. data/c/link-grammar/post-process.h +45 -0
  161. data/c/link-grammar/pp_knowledge.c +376 -0
  162. data/c/link-grammar/pp_knowledge.h +14 -0
  163. data/c/link-grammar/pp_lexer.c +1920 -0
  164. data/c/link-grammar/pp_lexer.h +19 -0
  165. data/c/link-grammar/pp_linkset.c +158 -0
  166. data/c/link-grammar/pp_linkset.h +20 -0
  167. data/c/link-grammar/prefix.c +482 -0
  168. data/c/link-grammar/prefix.h +139 -0
  169. data/c/link-grammar/preparation.c +412 -0
  170. data/c/link-grammar/preparation.h +20 -0
  171. data/c/link-grammar/print-util.c +87 -0
  172. data/c/link-grammar/print-util.h +32 -0
  173. data/c/link-grammar/print.c +1085 -0
  174. data/c/link-grammar/print.h +16 -0
  175. data/c/link-grammar/prune.c +1864 -0
  176. data/c/link-grammar/prune.h +17 -0
  177. data/c/link-grammar/read-dict.c +1785 -0
  178. data/c/link-grammar/read-dict.h +29 -0
  179. data/c/link-grammar/read-regex.c +161 -0
  180. data/c/link-grammar/read-regex.h +12 -0
  181. data/c/link-grammar/regex-morph.c +126 -0
  182. data/c/link-grammar/regex-morph.h +17 -0
  183. data/c/link-grammar/resources.c +180 -0
  184. data/c/link-grammar/resources.h +23 -0
  185. data/c/link-grammar/sat-solver/.DS_Store +0 -0
  186. data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  187. data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  188. data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
  189. data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  190. data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  191. data/c/link-grammar/sat-solver/Makefile +527 -0
  192. data/c/link-grammar/sat-solver/Makefile.am +29 -0
  193. data/c/link-grammar/sat-solver/Makefile.in +527 -0
  194. data/c/link-grammar/sat-solver/clock.hpp +33 -0
  195. data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  196. data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  197. data/c/link-grammar/sat-solver/guiding.hpp +244 -0
  198. data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  199. data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  200. data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
  201. data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  202. data/c/link-grammar/sat-solver/trie.hpp +118 -0
  203. data/c/link-grammar/sat-solver/util.cpp +23 -0
  204. data/c/link-grammar/sat-solver/util.hpp +14 -0
  205. data/c/link-grammar/sat-solver/variables.cpp +5 -0
  206. data/c/link-grammar/sat-solver/variables.hpp +829 -0
  207. data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
  208. data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
  209. data/c/link-grammar/spellcheck-aspell.c +148 -0
  210. data/c/link-grammar/spellcheck-hun.c +136 -0
  211. data/c/link-grammar/spellcheck.h +34 -0
  212. data/c/link-grammar/string-set.c +169 -0
  213. data/c/link-grammar/string-set.h +16 -0
  214. data/c/link-grammar/structures.h +498 -0
  215. data/c/link-grammar/tokenize.c +1049 -0
  216. data/c/link-grammar/tokenize.h +15 -0
  217. data/c/link-grammar/utilities.c +847 -0
  218. data/c/link-grammar/utilities.h +281 -0
  219. data/c/link-grammar/word-file.c +124 -0
  220. data/c/link-grammar/word-file.h +15 -0
  221. data/c/link-grammar/word-utils.c +526 -0
  222. data/c/link-grammar/word-utils.h +152 -0
  223. data/data/.DS_Store +0 -0
  224. data/data/Makefile +511 -0
  225. data/data/Makefile.am +4 -0
  226. data/data/Makefile.in +511 -0
  227. data/data/de/.DS_Store +0 -0
  228. data/data/de/4.0.affix +7 -0
  229. data/data/de/4.0.dict +474 -0
  230. data/data/de/Makefile +387 -0
  231. data/data/de/Makefile.am +9 -0
  232. data/data/de/Makefile.in +387 -0
  233. data/data/en/.DS_Store +0 -0
  234. data/data/en/4.0.affix +26 -0
  235. data/data/en/4.0.batch +1002 -0
  236. data/data/en/4.0.biolg.batch +411 -0
  237. data/data/en/4.0.constituent-knowledge +127 -0
  238. data/data/en/4.0.dict +8759 -0
  239. data/data/en/4.0.dict.m4 +6928 -0
  240. data/data/en/4.0.enwiki.batch +14 -0
  241. data/data/en/4.0.fixes.batch +2776 -0
  242. data/data/en/4.0.knowledge +306 -0
  243. data/data/en/4.0.regex +225 -0
  244. data/data/en/4.0.voa.batch +114 -0
  245. data/data/en/Makefile +554 -0
  246. data/data/en/Makefile.am +19 -0
  247. data/data/en/Makefile.in +554 -0
  248. data/data/en/README +173 -0
  249. data/data/en/tiny.dict +157 -0
  250. data/data/en/words/.DS_Store +0 -0
  251. data/data/en/words/Makefile +456 -0
  252. data/data/en/words/Makefile.am +78 -0
  253. data/data/en/words/Makefile.in +456 -0
  254. data/data/en/words/currency +205 -0
  255. data/data/en/words/currency.p +28 -0
  256. data/data/en/words/entities.given-bisex.sing +39 -0
  257. data/data/en/words/entities.given-female.sing +4141 -0
  258. data/data/en/words/entities.given-male.sing +1633 -0
  259. data/data/en/words/entities.locations.sing +68 -0
  260. data/data/en/words/entities.national.sing +253 -0
  261. data/data/en/words/entities.organizations.sing +7 -0
  262. data/data/en/words/entities.us-states.sing +11 -0
  263. data/data/en/words/units.1 +45 -0
  264. data/data/en/words/units.1.dot +4 -0
  265. data/data/en/words/units.3 +2 -0
  266. data/data/en/words/units.4 +5 -0
  267. data/data/en/words/units.4.dot +1 -0
  268. data/data/en/words/words-medical.adv.1 +1191 -0
  269. data/data/en/words/words-medical.prep.1 +67 -0
  270. data/data/en/words/words-medical.v.4.1 +2835 -0
  271. data/data/en/words/words-medical.v.4.2 +2848 -0
  272. data/data/en/words/words-medical.v.4.3 +3011 -0
  273. data/data/en/words/words-medical.v.4.4 +3036 -0
  274. data/data/en/words/words-medical.v.4.5 +3050 -0
  275. data/data/en/words/words.adj.1 +6794 -0
  276. data/data/en/words/words.adj.2 +638 -0
  277. data/data/en/words/words.adj.3 +667 -0
  278. data/data/en/words/words.adv.1 +1573 -0
  279. data/data/en/words/words.adv.2 +67 -0
  280. data/data/en/words/words.adv.3 +157 -0
  281. data/data/en/words/words.adv.4 +80 -0
  282. data/data/en/words/words.n.1 +11464 -0
  283. data/data/en/words/words.n.1.wiki +264 -0
  284. data/data/en/words/words.n.2.s +2017 -0
  285. data/data/en/words/words.n.2.s.biolg +1 -0
  286. data/data/en/words/words.n.2.s.wiki +298 -0
  287. data/data/en/words/words.n.2.x +65 -0
  288. data/data/en/words/words.n.2.x.wiki +10 -0
  289. data/data/en/words/words.n.3 +5717 -0
  290. data/data/en/words/words.n.t +23 -0
  291. data/data/en/words/words.v.1.1 +1038 -0
  292. data/data/en/words/words.v.1.2 +1043 -0
  293. data/data/en/words/words.v.1.3 +1052 -0
  294. data/data/en/words/words.v.1.4 +1023 -0
  295. data/data/en/words/words.v.1.p +17 -0
  296. data/data/en/words/words.v.10.1 +14 -0
  297. data/data/en/words/words.v.10.2 +15 -0
  298. data/data/en/words/words.v.10.3 +88 -0
  299. data/data/en/words/words.v.10.4 +17 -0
  300. data/data/en/words/words.v.2.1 +1253 -0
  301. data/data/en/words/words.v.2.2 +1304 -0
  302. data/data/en/words/words.v.2.3 +1280 -0
  303. data/data/en/words/words.v.2.4 +1285 -0
  304. data/data/en/words/words.v.2.5 +1287 -0
  305. data/data/en/words/words.v.4.1 +2472 -0
  306. data/data/en/words/words.v.4.2 +2487 -0
  307. data/data/en/words/words.v.4.3 +2441 -0
  308. data/data/en/words/words.v.4.4 +2478 -0
  309. data/data/en/words/words.v.4.5 +2483 -0
  310. data/data/en/words/words.v.5.1 +98 -0
  311. data/data/en/words/words.v.5.2 +98 -0
  312. data/data/en/words/words.v.5.3 +103 -0
  313. data/data/en/words/words.v.5.4 +102 -0
  314. data/data/en/words/words.v.6.1 +388 -0
  315. data/data/en/words/words.v.6.2 +401 -0
  316. data/data/en/words/words.v.6.3 +397 -0
  317. data/data/en/words/words.v.6.4 +405 -0
  318. data/data/en/words/words.v.6.5 +401 -0
  319. data/data/en/words/words.v.8.1 +117 -0
  320. data/data/en/words/words.v.8.2 +118 -0
  321. data/data/en/words/words.v.8.3 +118 -0
  322. data/data/en/words/words.v.8.4 +119 -0
  323. data/data/en/words/words.v.8.5 +119 -0
  324. data/data/en/words/words.y +104 -0
  325. data/data/lt/.DS_Store +0 -0
  326. data/data/lt/4.0.affix +6 -0
  327. data/data/lt/4.0.constituent-knowledge +24 -0
  328. data/data/lt/4.0.dict +135 -0
  329. data/data/lt/4.0.knowledge +38 -0
  330. data/data/lt/Makefile +389 -0
  331. data/data/lt/Makefile.am +11 -0
  332. data/data/lt/Makefile.in +389 -0
  333. data/grammar_police.gemspec +23 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_police.rb +11 -0
  336. data/lib/grammar_police/.DS_Store +0 -0
  337. data/lib/grammar_police/dictionary.rb +30 -0
  338. data/lib/grammar_police/linkage.rb +26 -0
  339. data/lib/grammar_police/parse_options.rb +32 -0
  340. data/lib/grammar_police/sentence.rb +44 -0
  341. data/lib/grammar_police/version.rb +3 -0
  342. data/tests/.DS_Store +0 -0
  343. data/tests/count_linkages.rb +29 -0
  344. data/tests/sentences.txt +86 -0
  345. metadata +408 -0
@@ -0,0 +1,136 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2009 Linas Vepstas */
3
+ /* Copyright (c) 2009 Vikas N. Kumar */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #ifndef SPELLCHECK_HUN_C
15
+ #define
16
+
17
+ #include <stdio.h>
18
+ #include <stdlib.h>
19
+ #include "link-includes.h"
20
+ #include "spellcheck.h"
21
+ #include "utilities.h" /* For Win32 compatibility */
22
+
23
+ #ifdef HAVE_HUNSPELL
24
+
25
+ #ifndef HUNSPELL_DICT_DIR
26
+ #define HUNSPELL_DICT_DIR (char *)0
27
+ #endif /* HUNSPELL_DICT_DIR */
28
+
29
+ static const char *hunspell_dict_dirs[] = {
30
+ "/usr/share/myspell/dicts",
31
+ "/usr/share/hunspell/dicts",
32
+ "/usr/local/share/myspell/dicts",
33
+ "/usr/local/share/hunspell/dicts",
34
+ "/usr/share/myspell",
35
+ "/usr/share/hunspell",
36
+ "/usr/local/share/myspell",
37
+ "/usr/local/share/hunspell",
38
+ HUNSPELL_DICT_DIR
39
+ };
40
+
41
+ static const char *spellcheck_lang_mapping[] = {
42
+ "en" /* link-grammar language */, "en-US" /* hunspell filename */,
43
+ "en" /* link-grammar language */, "en_US" /* hunspell filename */
44
+ };
45
+
46
+ #define FPATHLEN 256
47
+ static char hunspell_aff_file[FPATHLEN];
48
+ static char hunspell_dic_file[FPATHLEN];
49
+
50
+ #include <hunspell.h>
51
+ #include <string.h>
52
+
53
+ void * spellcheck_create(const char * lang)
54
+ {
55
+ size_t i = 0, j = 0;
56
+ Hunhandle *h = NULL;
57
+
58
+ memset(hunspell_aff_file, 0, FPATHLEN);
59
+ memset(hunspell_dic_file, 0, FPATHLEN);
60
+ for (i = 0; i < sizeof(spellcheck_lang_mapping)/sizeof(char *); i += 2)
61
+ {
62
+ if (0 != strcmp(lang, spellcheck_lang_mapping[i])) continue;
63
+
64
+ /* check in each hunspell_dict_dir if the files exist */
65
+ for (j = 0; j < sizeof(hunspell_dict_dirs)/sizeof(char *); ++j)
66
+ {
67
+ FILE *fh;
68
+ /* if the directory name is NULL then ignore */
69
+ if (hunspell_dict_dirs[j] == NULL) continue;
70
+
71
+ snprintf(hunspell_aff_file, FPATHLEN, "%s/%s.aff", hunspell_dict_dirs[j],
72
+ spellcheck_lang_mapping[i+1]);
73
+ snprintf(hunspell_dic_file, FPATHLEN, "%s/%s.dic", hunspell_dict_dirs[j],
74
+ spellcheck_lang_mapping[i+1]);
75
+
76
+ /* Some versions of Hunspell_create() will succeed even if
77
+ * there are no dictionary files. So test for permissions.
78
+ */
79
+ fh = fopen(hunspell_aff_file, "r");
80
+ if (fh) fclose (fh);
81
+ else continue;
82
+
83
+ fh = fopen(hunspell_dic_file, "r");
84
+ if (fh) fclose (fh);
85
+ else continue;
86
+
87
+ h = Hunspell_create(hunspell_aff_file, hunspell_dic_file);
88
+ /* if hunspell handle was created break from loop */
89
+ if (h != NULL)
90
+ break;
91
+ }
92
+ /* if hunspell handle was created break from loop */
93
+ if (h != NULL) break;
94
+ }
95
+ return h;
96
+ }
97
+
98
+ void spellcheck_destroy(void * chk)
99
+ {
100
+ Hunhandle *h = (Hunhandle *) chk;
101
+ Hunspell_destroy(h);
102
+ }
103
+
104
+ /**
105
+ * Return boolean: 1 if spelling looks good, else zero
106
+ */
107
+ int spellcheck_test(void * chk, const char * word)
108
+ {
109
+ if (NULL == chk)
110
+ {
111
+ prt_error("Error: no spell-check handle specified!\n");
112
+ return 0;
113
+ }
114
+
115
+ return Hunspell_spell((Hunhandle *)chk, word);
116
+ }
117
+
118
+ int spellcheck_suggest(void * chk, char ***sug, const char * word)
119
+ {
120
+ if (NULL == chk)
121
+ {
122
+ prt_error("Error: no spell-check handle specified!\n");
123
+ return 0;
124
+ }
125
+
126
+ return Hunspell_suggest((Hunhandle *)chk, sug, word);
127
+ }
128
+
129
+ void spellcheck_free_suggest(char **sug, int size)
130
+ {
131
+ free(sug);
132
+ }
133
+
134
+ #endif /* #ifdef HAVE_HUNSPELL */
135
+
136
+ #endif
@@ -0,0 +1,34 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2009 Linas Vepstas */
3
+ /* All rights reserved */
4
+ /* */
5
+ /* Use of the link grammar parsing system is subject to the terms of the */
6
+ /* license set forth in the LICENSE file included with this software, */
7
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
8
+ /* This license allows free redistribution and use in source and binary */
9
+ /* forms, with or without modification, subject to certain conditions. */
10
+ /* */
11
+ /*************************************************************************/
12
+
13
+ #ifndef SPELLCHECK_H
14
+ #define SPELLCHECK_H
15
+ #if (defined HAVE_HUNSPELL) || (defined HAVE_ASPELL)
16
+
17
+ void * spellcheck_create(const char * lang);
18
+ void spellcheck_destroy(void *);
19
+ int spellcheck_test(void *, const char * word);
20
+ int spellcheck_suggest(void * chk, char ***sug, const char * word);
21
+ void spellcheck_free_suggest(char **sug, int size);
22
+
23
+ #else
24
+
25
+ #include "utilities.h" /* For MSVC inline portability */
26
+
27
+ static inline void * spellcheck_create(const char * lang) { return NULL; }
28
+ static inline void spellcheck_destroy(void * chk) {}
29
+ static inline int spellcheck_test(void * chk, const char * word) { return 0; }
30
+ static inline int spellcheck_suggest(void * chk, char ***sug, const char * word) { return 0; }
31
+ static inline void spellcheck_free_suggest(char **sug, int size) {}
32
+
33
+ #endif
34
+ #endif //endif SPELLCHECK_H
@@ -0,0 +1,169 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #include "api.h"
15
+
16
+ /**
17
+ * Suppose you have a program that generates strings and keeps pointers to them.
18
+ The program never needs to change these strings once they're generated.
19
+ If it generates the same string again, then it can reuse the one it
20
+ generated before. This is what this package supports.
21
+
22
+ String_set is the object. The functions are:
23
+
24
+ char * string_set_add(char * source_string, String_set * ss);
25
+ This function returns a pointer to a string with the same
26
+ contents as the source_string. If that string is already
27
+ in the table, then it uses that copy, otherwise it generates
28
+ and inserts a new one.
29
+
30
+ char * string_set_lookup(char * source_string, String_set * ss);
31
+ This function returns a pointer to a string with the same
32
+ contents as the source_string. If that string is not already
33
+ in the table, returns NULL;
34
+
35
+ String_set * string_set_create(void);
36
+ Create a new empty String_set.
37
+
38
+ string_set_delete(String_set *ss);
39
+ Free all the space associated with this string set.
40
+
41
+ The implementation uses probed hashing (i.e. not bucket).
42
+ */
43
+
44
+ static int hash_string(const char *sa, const String_set *ss)
45
+ {
46
+ unsigned char *str = (unsigned char *) sa;
47
+ unsigned int accum = 0;
48
+ for (;*str != '\0'; str++) accum = ((256*accum) + (*str)) % (ss->size);
49
+ return accum;
50
+ }
51
+
52
+ static int stride_hash_string(const char *sa, const String_set *ss)
53
+ {
54
+ unsigned char *str = (unsigned char *) sa;
55
+ /* This is the stride used, so we have to make sure that its value is not 0 */
56
+ unsigned int accum = 0;
57
+ for (;*str != '\0'; str++) accum = ((17*accum) + (*str)) % (ss->size);
58
+ if (accum == 0) accum = 1;
59
+ return accum;
60
+ }
61
+
62
+ /* return the next prime up from start */
63
+ static int next_prime_up(int start)
64
+ {
65
+ int i;
66
+ start = start | 1; /* make it odd */
67
+ for (;;) {
68
+ for (i=3; (i <= (start/i)); i += 2) {
69
+ if (start % i == 0) break;
70
+ }
71
+ if (start % i == 0) {
72
+ start += 2;
73
+ } else {
74
+ return start;
75
+ }
76
+ }
77
+ }
78
+
79
+ String_set * string_set_create(void)
80
+ {
81
+ String_set *ss;
82
+ int i;
83
+ ss = (String_set *) xalloc(sizeof(String_set));
84
+ ss->size = next_prime_up(100);
85
+ ss->table = (char **) xalloc(ss->size * sizeof(char *));
86
+ ss->count = 0;
87
+ for (i=0; i<ss->size; i++) ss->table[i] = NULL;
88
+ return ss;
89
+ }
90
+
91
+ /**
92
+ * lookup the given string in the table. Return a pointer
93
+ * to the place it is, or the place where it should be.
94
+ */
95
+ static int find_place(const char * str, String_set *ss)
96
+ {
97
+ int h, s, i;
98
+ h = hash_string(str, ss);
99
+ s = stride_hash_string(str, ss);
100
+ for (i=h; 1; i = (i + s)%(ss->size)) {
101
+ if ((ss->table[i] == NULL) || (strcmp(ss->table[i], str) == 0)) return i;
102
+ }
103
+ }
104
+
105
+ static void grow_table(String_set *ss)
106
+ {
107
+ String_set old;
108
+ int i, p;
109
+
110
+ old = *ss;
111
+ ss->size = next_prime_up(2 * old.size); /* at least double the size */
112
+ ss->table = (char **) xalloc(ss->size * sizeof(char *));
113
+ ss->count = 0;
114
+ for (i=0; i<ss->size; i++) ss->table[i] = NULL;
115
+ for (i=0; i<old.size; i++) {
116
+ if (old.table[i] != NULL) {
117
+ p = find_place(old.table[i], ss);
118
+ ss->table[p] = old.table[i];
119
+ ss->count++;
120
+ }
121
+ }
122
+ /*printf("growing from %d to %d\n", old.size, ss->size);*/
123
+ fflush(stdout);
124
+ xfree((char *) old.table, old.size * sizeof(char *));
125
+ }
126
+
127
+ const char * string_set_add(const char * source_string, String_set * ss)
128
+ {
129
+ char * str;
130
+ int len, p;
131
+
132
+ assert(source_string != NULL, "STRING_SET: Can't insert a null string");
133
+
134
+ p = find_place(source_string, ss);
135
+ if (ss->table[p] != NULL) return ss->table[p];
136
+
137
+ len = strlen(source_string);
138
+ str = (char *) xalloc(len+1);
139
+ strcpy(str, source_string);
140
+ ss->table[p] = str;
141
+ ss->count++;
142
+
143
+ /* We just added it to the table.
144
+ If the table got too big, we grow it.
145
+ Too big is defined as being more than 3/4 full */
146
+ if ((4 * ss->count) > (3 * ss->size)) grow_table(ss);
147
+
148
+ return str;
149
+ }
150
+
151
+ const char * string_set_lookup(const char * source_string, String_set * ss)
152
+ {
153
+ int p;
154
+
155
+ p = find_place(source_string, ss);
156
+ return ss->table[p];
157
+ }
158
+
159
+ void string_set_delete(String_set *ss)
160
+ {
161
+ int i;
162
+
163
+ if (ss == NULL) return;
164
+ for (i=0; i<ss->size; i++) {
165
+ if (ss->table[i] != NULL) xfree(ss->table[i], strlen(ss->table[i]) + 1);
166
+ }
167
+ xfree((char *) ss->table, ss->size * sizeof(char *));
168
+ xfree((char *) ss, sizeof(String_set));
169
+ }
@@ -0,0 +1,16 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ String_set * string_set_create(void);
14
+ const char * string_set_add(const char * source_string, String_set * ss);
15
+ const char * string_set_lookup(const char * source_string, String_set * ss);
16
+ void string_set_delete(String_set *ss);
@@ -0,0 +1,498 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #ifndef _STRUCTURES_H_
15
+ #define _STRUCTURES_H_
16
+
17
+ #include "api-types.h"
18
+ #include "utilities.h" /* Needed for inline defn in Windows */
19
+
20
+ /*
21
+ Global variable descriptions
22
+ -- Most of these global variables have been eliminated.
23
+ I've left this comment here for historical purposes --DS 4/98
24
+
25
+ N_words:
26
+ The number of words in the current sentence. Computed by
27
+ separate_sentence().
28
+
29
+ N_links:
30
+ The number of links in the current linkage. Computed by
31
+ extract_linkage().
32
+
33
+ sentence[].string:
34
+ Contains a slightly modified form of the words typed by the user.
35
+ Computed by separate_sentence().
36
+
37
+ sentence[].x:
38
+ Contains, for each word, a pointer to a list of expressions from the
39
+ dictionary that match the word in sentence[].string.
40
+ Computed by build_sentence_expressions().
41
+
42
+ sentence[].d
43
+ Contains for each word, a pointer to a list of disjuncts for this word.
44
+ Computed by: parepare_to_parse(), but modified by pruning and power
45
+ pruning.
46
+
47
+ link_array[]
48
+ This is an array of links. These links define the current linkage.
49
+ It is computed by extract_links(). It is used by analyze_linkage() to
50
+ compute pp_linkage[]. It may contain fat links.
51
+
52
+ pp_link_array[] ** eliminated (ALB)
53
+ Another array of links. Here all fat links have been expunged.
54
+ It is computed by analyze_linkage(), and used by post_process() and by
55
+ print_links();
56
+
57
+ chosen_disjuncts[]
58
+ This is an array pointers to disjuncts, one for each word, that is
59
+ computed by extract_links(). It represents the chosen disjuncts for the
60
+ current linkage. It is used to compute the cost of the linkage, and
61
+ also by compute_chosen_words() to compute the chosen_words[].
62
+
63
+ chosen_words[]
64
+ An array of pointers to strings. These are the words to be displayed
65
+ when printing the solution, the links, etc. Computed as a function of
66
+ chosen_disjuncts[] by compute_chosen_words(). This differs from
67
+ sentence[].string because it contains the suffixes. It differs from
68
+ chosen_disjunct[].string in that the idiom symbols have been removed.
69
+
70
+ has_fat_down[]
71
+ An array of chars, one for each word. TRUE if there is a fat link
72
+ down from this word, FALSE otherwise. (Only set if there is at least
73
+ one fat link.) Set by set_has_fat_down_array() and used by
74
+ analyze_linkage() and is_canonical().
75
+
76
+ is_conjunction[]
77
+ An array of chars, one for each word. TRUE if the word is a conjunction
78
+ ("and", "or", "nor", or "but" at the moment). False otherwise.
79
+ */
80
+
81
+
82
+ #define NEGATIVECOST -1000000
83
+ /* This is a hack that allows one to discard disjuncts containing
84
+ connectors whose cost is greater than given a bound. This number plus
85
+ the cost of any connectors on a disjunct must remain negative, and
86
+ this number multiplied times the number of costly connectors on any
87
+ disjunct must fit into an integer. */
88
+
89
+ /* Upper bound on the cost of any connector. */
90
+ #define MAX_CONNECTOR_COST 1000.0f
91
+
92
+ #define LEFT_WALL_DISPLAY ("LEFT-WALL") /* the string to use to show the wall */
93
+ #define LEFT_WALL_SUPPRESS ("Wd") /* If this connector is used on the wall, */
94
+ /* then suppress the display of the wall */
95
+ /* bogus name to prevent ever suppressing */
96
+ #define RIGHT_WALL_DISPLAY ("RIGHT-WALL") /* the string to use to show the wall */
97
+ #define RIGHT_WALL_SUPPRESS ("RW") /* If this connector is used on the wall, */
98
+
99
+ /* The following define the names of the special strings in the dictionary. */
100
+ #define LEFT_WALL_WORD ("LEFT-WALL")
101
+ #define RIGHT_WALL_WORD ("RIGHT-WALL")
102
+ #define POSTPROCESS_WORD ("POSTPROCESS")
103
+ #define ANDABLE_CONNECTORS_WORD ("ANDABLE-CONNECTORS")
104
+ #define UNLIMITED_CONNECTORS_WORD ("UNLIMITED-CONNECTORS")
105
+
106
+ #if DONT_USE_REGEX_GUESSING
107
+ /* English-language-specific morphology guessing */
108
+ /* Obsolete, replaced by regex-based morphology handler */
109
+ #define PROPER_WORD ("CAPITALIZED-WORDS")
110
+ #define PL_PROPER_WORD ("PL-CAPITALIZED-WORDS")
111
+ #define HYPHENATED_WORD ("HYPHENATED-WORDS")
112
+ #define NUMBER_WORD ("NUMBERS")
113
+ #define ING_WORD ("ING-WORDS")
114
+ #define S_WORD ("S-WORDS")
115
+ #define ED_WORD ("ED-WORDS")
116
+ #define LY_WORD ("LY-WORDS")
117
+ #endif /* DONT_USE_REGEX_GUESSING */
118
+
119
+ #define UNKNOWN_WORD ("UNKNOWN-WORD")
120
+
121
+ #define MAX_PATH_NAME 200 /* file names (including paths)
122
+ should not be longer than this */
123
+
124
+ /* Some size definitions. Reduce these for small machines */
125
+ #define MAX_WORD 60 /* maximum number of chars in a word */
126
+ #define MAX_LINE 1500 /* maximum number of chars in a sentence */
127
+ #define MAX_DISJUNCT_COST 10000
128
+
129
+ /* conditional compiling flags */
130
+ #define PLURALIZATION
131
+ /* If defined, Turns on the pluralization operation in */
132
+ /* "and", "or" and "nor" */
133
+ #define INFIX_NOTATION
134
+ /* If defined, then we're using infix notation for the dictionary */
135
+ /* otherwise we're using prefix notation */
136
+
137
+ #define DOWN_priority 2
138
+ #define UP_priority 1
139
+ #define THIN_priority 0
140
+
141
+ #define NORMAL_LABEL (-1) /* used for normal connectors */
142
+ /* the labels >= 0 are used by fat links */
143
+
144
+ #define UNLIMITED_LEN 255
145
+ #define SHORT_LEN 6
146
+ #define NO_WORD 255
147
+
148
+ #ifndef _MSC_VER
149
+ typedef long long s64; /* signed 64-bit integer, even on 32-bit cpus */
150
+ #define PARSE_NUM_OVERFLOW (1LL<<24)
151
+ #else
152
+ /* Microsoft Visual C Version 6 doesn't support long long. */
153
+ typedef signed __int64 s64; /* signed 64-bit integer, even on 32-bit cpus */
154
+ #define PARSE_NUM_OVERFLOW (((s64)1)<<24)
155
+ #endif
156
+
157
+ struct Connector_struct
158
+ {
159
+ short label;
160
+ short hash;
161
+ unsigned char word;
162
+ /* The nearest word to my left (or right) that
163
+ this could connect to. Computed by power pruning */
164
+ unsigned char length_limit;
165
+ /* If this is a length limited connector, this
166
+ gives the limit of the length of the link
167
+ that can be used on this connector. Since
168
+ this is strictly a funcion of the connector
169
+ name, efficiency is the only reason to store
170
+ this. If no limit, the value is set to 255. */
171
+ char priority;/* one of the three priorities above */
172
+ char multi; /* TRUE if this is a multi-connector */
173
+ Connector * next;
174
+ const char * string;
175
+
176
+ /* Hash table next pointer, used only during pruning. */
177
+ Connector * tableNext;
178
+ const char * prune_string;
179
+ };
180
+
181
+ static inline void connector_set_string(Connector *c, const char *s)
182
+ {
183
+ c->string = s;
184
+ c->hash = -1;
185
+ }
186
+ static inline const char * connector_get_string(Connector *c)
187
+ {
188
+ return c->string;
189
+ }
190
+
191
+ struct Disjunct_struct
192
+ {
193
+ Disjunct *next;
194
+ const char * string;
195
+ Connector *left, *right;
196
+ float cost;
197
+ char marked;
198
+ };
199
+
200
+ typedef struct Match_node_struct Match_node;
201
+ struct Match_node_struct
202
+ {
203
+ Match_node * next;
204
+ Disjunct * d;
205
+ };
206
+
207
+ typedef struct X_node_struct X_node;
208
+ struct X_node_struct
209
+ {
210
+ const char * string; /* the word itself */
211
+ Exp * exp;
212
+ X_node *next;
213
+ };
214
+
215
+ struct Word_struct
216
+ {
217
+ char string[MAX_WORD+1];
218
+ X_node * x; /* sentence starts out with these */
219
+ Disjunct * d; /* eventually these get generated */
220
+ int firstupper;
221
+ };
222
+
223
+ /**
224
+ * Types of Exp_struct structures
225
+ */
226
+ #define OR_type 0
227
+ #define AND_type 1
228
+ #define CONNECTOR_type 2
229
+
230
+ /**
231
+ * The E_list and Exp structures defined below comprise the expression
232
+ * trees that are stored in the dictionary. The expression has a type
233
+ * (AND, OR or TERMINAL). If it is not a terminal it has a list
234
+ * (an E_list) of children.
235
+ */
236
+ struct Exp_struct
237
+ {
238
+ Exp * next; /* Used only for mem management,for freeing */
239
+ char type; /* One of three types, see above */
240
+ char dir; /* '-' means to the left, '+' means to right (for connector) */
241
+ char multi; /* TRUE if a multi-connector (for connector) */
242
+ union {
243
+ E_list * l; /* only needed for non-terminals */
244
+ const char * string; /* only needed if it's a connector */
245
+ } u;
246
+ float cost; /* The cost of using this expression.
247
+ Only used for non-terminals */
248
+ };
249
+
250
+ struct E_list_struct
251
+ {
252
+ E_list * next;
253
+ Exp * e;
254
+ };
255
+
256
+ /* The structure below stores a list of dictionary word files. */
257
+ struct Word_file_struct
258
+ {
259
+ char file[MAX_PATH_NAME+1]; /* the file name */
260
+ int changed; /* TRUE if this file has been changed */
261
+ Word_file * next;
262
+ };
263
+
264
+ /**
265
+ * The dictionary is stored as a binary tree comprised of the following
266
+ * nodes. A list of these (via right pointers) is used to return
267
+ * the result of a dictionary lookup.
268
+ */
269
+ struct Dict_node_struct
270
+ {
271
+ const char * string; /* the word itself */
272
+ Word_file * file; /* the file the word came from (NULL if dict file) */
273
+ Exp * exp;
274
+ Dict_node *left, *right;
275
+ };
276
+
277
+ /* The regexs are stored as a linked list of the following nodes. */
278
+ struct Regex_node_s
279
+ {
280
+ char *name; /* The identifying name of the regex */
281
+ char *pattern; /* The regular expression pattern */
282
+ void *re; /* The compiled regex. void * to avoid
283
+ * having re library details invading the
284
+ * rest of the LG system; regex-morph.c
285
+ * takes care of all matching.
286
+ */
287
+ Regex_node *next;
288
+ };
289
+
290
+
291
+ /* The following three structs comprise what is returned by post_process(). */
292
+ typedef struct D_type_list_struct D_type_list;
293
+ struct D_type_list_struct
294
+ {
295
+ D_type_list * next;
296
+ int type;
297
+ };
298
+
299
+ typedef struct PP_node_struct PP_node;
300
+ struct PP_node_struct
301
+ {
302
+ D_type_list *d_type_array[MAX_LINKS];
303
+ const char *violation;
304
+ };
305
+
306
+ /* Davy added these */
307
+
308
+ typedef struct Andlist_struct Andlist;
309
+ struct Andlist_struct
310
+ {
311
+ Andlist * next;
312
+ int conjunction;
313
+ int num_elements;
314
+ int element[MAX_SENTENCE];
315
+ int num_outside_words;
316
+ int outside_word[MAX_SENTENCE];
317
+ int cost;
318
+ };
319
+
320
+ /**
321
+ * This is for building the graphs of links in post-processing and
322
+ * fat link extraction.
323
+ */
324
+ struct Linkage_info_struct
325
+ {
326
+ int index;
327
+ Boolean fat;
328
+ Boolean canonical;
329
+ Boolean improper_fat_linkage;
330
+ Boolean inconsistent_domains;
331
+ short N_violations;
332
+ short null_cost, unused_word_cost, and_cost, link_cost;
333
+ float disjunct_cost;
334
+ double corpus_cost;
335
+ Andlist * andlist;
336
+ int island[MAX_SENTENCE];
337
+ size_t nwords;
338
+ char **disjunct_list_str;
339
+ #ifdef USE_CORPUS
340
+ Sense **sense_list;
341
+ #endif
342
+ };
343
+
344
+ struct List_o_links_struct
345
+ {
346
+ int link; /* the link number */
347
+ int word; /* the word at the other end of this link */
348
+ int dir; /* 0: undirected, 1: away from me, -1: toward me */
349
+ List_o_links * next;
350
+ };
351
+
352
+ /* These parameters tell power_pruning, to tell whether this is before
353
+ * or after generating and disjuncts. GENTLE is before RUTHLESS is
354
+ * after.
355
+ */
356
+ #define GENTLE 1
357
+ #define RUTHLESS 0
358
+
359
+ typedef struct string_node_struct String_node;
360
+ struct string_node_struct
361
+ {
362
+ char * string;
363
+ int size;
364
+ String_node * next;
365
+ };
366
+
367
+ typedef struct Parse_choice_struct Parse_choice;
368
+
369
+ struct Link_s
370
+ {
371
+ int l, r;
372
+ Connector * lc, * rc;
373
+ const char * name; /* spelling of full link name */
374
+ };
375
+
376
+ struct Parse_choice_struct
377
+ {
378
+ Parse_choice * next;
379
+ Parse_set * set[2];
380
+ Link link[2]; /* the lc fields of these is NULL if there is no link used */
381
+ Disjunct *ld, *md, *rd; /* the chosen disjuncts for the relevant three words */
382
+ };
383
+
384
+ struct Parse_set_struct
385
+ {
386
+ s64 count; /* the number of ways */
387
+ Parse_choice * first;
388
+ Parse_choice * current; /* used to enumerate linkages */
389
+ };
390
+
391
+ struct X_table_connector_struct
392
+ {
393
+ short lw, rw;
394
+ short cost;
395
+ Parse_set *set;
396
+ Connector *le, *re;
397
+ X_table_connector *next;
398
+ };
399
+
400
+ /* from string-set.c */
401
+ struct String_set_s
402
+ {
403
+ int size; /* the current size of the table */
404
+ int count; /* number of things currently in the table */
405
+ char ** table; /* the table itself */
406
+ };
407
+
408
+
409
+ /* from pp_linkset.c */
410
+ typedef struct pp_linkset_node_s
411
+ {
412
+ const char *str;
413
+ struct pp_linkset_node_s *next;
414
+ } pp_linkset_node;
415
+
416
+ typedef struct pp_linkset_s
417
+ {
418
+ int hash_table_size;
419
+ int population;
420
+ pp_linkset_node **hash_table; /* data actually lives here */
421
+ } pp_linkset;
422
+
423
+
424
+ /* from pp_lexer.c */
425
+ #define PP_LEXER_MAX_LABELS 512
426
+
427
+ typedef struct pp_label_node_s
428
+ {
429
+ /* linked list of strings associated with a label in the table */
430
+ const char *str;
431
+ struct pp_label_node_s *next;
432
+ } pp_label_node; /* next=NULL: end of list */
433
+
434
+
435
+ typedef struct PPLexTable_s
436
+ {
437
+ String_set *string_set;
438
+ const char *labels[PP_LEXER_MAX_LABELS]; /* array of labels */
439
+ pp_label_node *nodes_of_label[PP_LEXER_MAX_LABELS]; /*str. for each label*/
440
+ pp_label_node *last_node_of_label[PP_LEXER_MAX_LABELS]; /* efficiency */
441
+ pp_label_node *current_node_of_active_label;/* state: curr node of label */
442
+ int idx_of_active_label; /* read state: current label */
443
+ } PPLexTable;
444
+
445
+ /* from pp_knowledge.c */
446
+ typedef struct StartingLinkAndDomain_s
447
+ {
448
+ const char *starting_link;
449
+ int domain; /* domain which the link belongs to (-1: terminator)*/
450
+ } StartingLinkAndDomain;
451
+
452
+ typedef struct pp_rule_s
453
+ {
454
+ /* Holds a single post-processing rule. Since rules come in many
455
+ flavors, not all fields of the following are always relevant */
456
+ const char *selector; /* name of link to which rule applies */
457
+ int domain; /* type of domain to which rule applies */
458
+ pp_linkset *link_set; /* handle to set of links relevant to rule */
459
+ int link_set_size; /* size of this set */
460
+ const char **link_array; /* array holding the spelled-out names */
461
+ const char *msg; /* explanation (NULL=end sentinel in array)*/
462
+ } pp_rule;
463
+
464
+ struct pp_knowledge_s
465
+ {
466
+ PPLexTable *lt; /* Internal rep'n of sets of strings from knowledge file */
467
+ const char *path; /* Name of file we loaded from */
468
+
469
+ /* handles to sets of links specified in knowledge file. These constitute
470
+ auxiliary data, necessary to implement the rules, below. See comments
471
+ in post-process.c for a description of these. */
472
+ pp_linkset *domain_starter_links;
473
+ pp_linkset *urfl_domain_starter_links;
474
+ pp_linkset *urfl_only_domain_starter_links;
475
+ pp_linkset *domain_contains_links;
476
+ pp_linkset *must_form_a_cycle_links;
477
+ pp_linkset *restricted_links;
478
+ pp_linkset *ignore_these_links;
479
+ pp_linkset *left_domain_starter_links;
480
+
481
+ /* arrays of rules specified in knowledge file */
482
+ pp_rule *connected_rules, *form_a_cycle_rules;
483
+ pp_rule *contains_one_rules, *contains_none_rules;
484
+ pp_rule *bounded_rules;
485
+
486
+ int n_connected_rules, n_form_a_cycle_rules;
487
+ int n_contains_one_rules, n_contains_none_rules;
488
+ int n_bounded_rules;
489
+
490
+ pp_linkset *set_of_links_starting_bounded_domain;
491
+ StartingLinkAndDomain *starting_link_lookup_table;
492
+ int nStartingLinks;
493
+ String_set *string_set;
494
+ };
495
+
496
+
497
+ #endif
498
+