grammar_police 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (345) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +2 -0
  5. data/c/.DS_Store +0 -0
  6. data/c/link-grammar.c +65 -0
  7. data/c/link-grammar.h +60 -0
  8. data/c/link-grammar.o +0 -0
  9. data/c/link-grammar.so +0 -0
  10. data/c/link-grammar/.DS_Store +0 -0
  11. data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
  12. data/c/link-grammar/.deps/and.Plo +202 -0
  13. data/c/link-grammar/.deps/api.Plo +244 -0
  14. data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
  15. data/c/link-grammar/.deps/command-line.Plo +201 -0
  16. data/c/link-grammar/.deps/constituents.Plo +201 -0
  17. data/c/link-grammar/.deps/count.Plo +202 -0
  18. data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
  19. data/c/link-grammar/.deps/disjuncts.Plo +123 -0
  20. data/c/link-grammar/.deps/error.Plo +121 -0
  21. data/c/link-grammar/.deps/expand.Plo +133 -0
  22. data/c/link-grammar/.deps/extract-links.Plo +198 -0
  23. data/c/link-grammar/.deps/fast-match.Plo +200 -0
  24. data/c/link-grammar/.deps/idiom.Plo +200 -0
  25. data/c/link-grammar/.deps/jni-client.Plo +217 -0
  26. data/c/link-grammar/.deps/link-parser.Po +1 -0
  27. data/c/link-grammar/.deps/massage.Plo +202 -0
  28. data/c/link-grammar/.deps/post-process.Plo +202 -0
  29. data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
  30. data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
  31. data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
  32. data/c/link-grammar/.deps/prefix.Plo +102 -0
  33. data/c/link-grammar/.deps/preparation.Plo +202 -0
  34. data/c/link-grammar/.deps/print-util.Plo +200 -0
  35. data/c/link-grammar/.deps/print.Plo +201 -0
  36. data/c/link-grammar/.deps/prune.Plo +202 -0
  37. data/c/link-grammar/.deps/read-dict.Plo +223 -0
  38. data/c/link-grammar/.deps/read-regex.Plo +123 -0
  39. data/c/link-grammar/.deps/regex-morph.Plo +131 -0
  40. data/c/link-grammar/.deps/resources.Plo +203 -0
  41. data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  42. data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  43. data/c/link-grammar/.deps/string-set.Plo +198 -0
  44. data/c/link-grammar/.deps/tokenize.Plo +160 -0
  45. data/c/link-grammar/.deps/utilities.Plo +222 -0
  46. data/c/link-grammar/.deps/word-file.Plo +201 -0
  47. data/c/link-grammar/.deps/word-utils.Plo +212 -0
  48. data/c/link-grammar/.libs/analyze-linkage.o +0 -0
  49. data/c/link-grammar/.libs/and.o +0 -0
  50. data/c/link-grammar/.libs/api.o +0 -0
  51. data/c/link-grammar/.libs/build-disjuncts.o +0 -0
  52. data/c/link-grammar/.libs/command-line.o +0 -0
  53. data/c/link-grammar/.libs/constituents.o +0 -0
  54. data/c/link-grammar/.libs/count.o +0 -0
  55. data/c/link-grammar/.libs/disjunct-utils.o +0 -0
  56. data/c/link-grammar/.libs/disjuncts.o +0 -0
  57. data/c/link-grammar/.libs/error.o +0 -0
  58. data/c/link-grammar/.libs/expand.o +0 -0
  59. data/c/link-grammar/.libs/extract-links.o +0 -0
  60. data/c/link-grammar/.libs/fast-match.o +0 -0
  61. data/c/link-grammar/.libs/idiom.o +0 -0
  62. data/c/link-grammar/.libs/jni-client.o +0 -0
  63. data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  64. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  65. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  66. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  67. data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
  68. data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  69. data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  70. data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  71. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  72. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  73. data/c/link-grammar/.libs/liblink-grammar.a +0 -0
  74. data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
  75. data/c/link-grammar/.libs/liblink-grammar.la +41 -0
  76. data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
  77. data/c/link-grammar/.libs/massage.o +0 -0
  78. data/c/link-grammar/.libs/post-process.o +0 -0
  79. data/c/link-grammar/.libs/pp_knowledge.o +0 -0
  80. data/c/link-grammar/.libs/pp_lexer.o +0 -0
  81. data/c/link-grammar/.libs/pp_linkset.o +0 -0
  82. data/c/link-grammar/.libs/prefix.o +0 -0
  83. data/c/link-grammar/.libs/preparation.o +0 -0
  84. data/c/link-grammar/.libs/print-util.o +0 -0
  85. data/c/link-grammar/.libs/print.o +0 -0
  86. data/c/link-grammar/.libs/prune.o +0 -0
  87. data/c/link-grammar/.libs/read-dict.o +0 -0
  88. data/c/link-grammar/.libs/read-regex.o +0 -0
  89. data/c/link-grammar/.libs/regex-morph.o +0 -0
  90. data/c/link-grammar/.libs/resources.o +0 -0
  91. data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
  92. data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
  93. data/c/link-grammar/.libs/string-set.o +0 -0
  94. data/c/link-grammar/.libs/tokenize.o +0 -0
  95. data/c/link-grammar/.libs/utilities.o +0 -0
  96. data/c/link-grammar/.libs/word-file.o +0 -0
  97. data/c/link-grammar/.libs/word-utils.o +0 -0
  98. data/c/link-grammar/Makefile +900 -0
  99. data/c/link-grammar/Makefile.am +202 -0
  100. data/c/link-grammar/Makefile.in +900 -0
  101. data/c/link-grammar/analyze-linkage.c +1317 -0
  102. data/c/link-grammar/analyze-linkage.h +24 -0
  103. data/c/link-grammar/and.c +1603 -0
  104. data/c/link-grammar/and.h +27 -0
  105. data/c/link-grammar/api-structures.h +362 -0
  106. data/c/link-grammar/api-types.h +72 -0
  107. data/c/link-grammar/api.c +1887 -0
  108. data/c/link-grammar/api.h +96 -0
  109. data/c/link-grammar/autoit/.DS_Store +0 -0
  110. data/c/link-grammar/autoit/README +10 -0
  111. data/c/link-grammar/autoit/_LGTest.au3 +22 -0
  112. data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  113. data/c/link-grammar/build-disjuncts.c +487 -0
  114. data/c/link-grammar/build-disjuncts.h +21 -0
  115. data/c/link-grammar/command-line.c +458 -0
  116. data/c/link-grammar/command-line.h +15 -0
  117. data/c/link-grammar/constituents.c +1836 -0
  118. data/c/link-grammar/constituents.h +26 -0
  119. data/c/link-grammar/corpus/.DS_Store +0 -0
  120. data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
  121. data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
  122. data/c/link-grammar/corpus/Makefile +527 -0
  123. data/c/link-grammar/corpus/Makefile.am +46 -0
  124. data/c/link-grammar/corpus/Makefile.in +527 -0
  125. data/c/link-grammar/corpus/README +17 -0
  126. data/c/link-grammar/corpus/cluster.c +286 -0
  127. data/c/link-grammar/corpus/cluster.h +32 -0
  128. data/c/link-grammar/corpus/corpus.c +483 -0
  129. data/c/link-grammar/corpus/corpus.h +46 -0
  130. data/c/link-grammar/count.c +828 -0
  131. data/c/link-grammar/count.h +25 -0
  132. data/c/link-grammar/disjunct-utils.c +261 -0
  133. data/c/link-grammar/disjunct-utils.h +27 -0
  134. data/c/link-grammar/disjuncts.c +138 -0
  135. data/c/link-grammar/disjuncts.h +13 -0
  136. data/c/link-grammar/error.c +92 -0
  137. data/c/link-grammar/error.h +35 -0
  138. data/c/link-grammar/expand.c +67 -0
  139. data/c/link-grammar/expand.h +13 -0
  140. data/c/link-grammar/externs.h +22 -0
  141. data/c/link-grammar/extract-links.c +625 -0
  142. data/c/link-grammar/extract-links.h +16 -0
  143. data/c/link-grammar/fast-match.c +309 -0
  144. data/c/link-grammar/fast-match.h +17 -0
  145. data/c/link-grammar/idiom.c +373 -0
  146. data/c/link-grammar/idiom.h +15 -0
  147. data/c/link-grammar/jni-client.c +779 -0
  148. data/c/link-grammar/jni-client.h +236 -0
  149. data/c/link-grammar/liblink-grammar-java.la +42 -0
  150. data/c/link-grammar/liblink-grammar.la +41 -0
  151. data/c/link-grammar/link-features.h +37 -0
  152. data/c/link-grammar/link-features.h.in +37 -0
  153. data/c/link-grammar/link-grammar-java.def +31 -0
  154. data/c/link-grammar/link-grammar.def +194 -0
  155. data/c/link-grammar/link-includes.h +465 -0
  156. data/c/link-grammar/link-parser.c +849 -0
  157. data/c/link-grammar/massage.c +329 -0
  158. data/c/link-grammar/massage.h +13 -0
  159. data/c/link-grammar/post-process.c +1113 -0
  160. data/c/link-grammar/post-process.h +45 -0
  161. data/c/link-grammar/pp_knowledge.c +376 -0
  162. data/c/link-grammar/pp_knowledge.h +14 -0
  163. data/c/link-grammar/pp_lexer.c +1920 -0
  164. data/c/link-grammar/pp_lexer.h +19 -0
  165. data/c/link-grammar/pp_linkset.c +158 -0
  166. data/c/link-grammar/pp_linkset.h +20 -0
  167. data/c/link-grammar/prefix.c +482 -0
  168. data/c/link-grammar/prefix.h +139 -0
  169. data/c/link-grammar/preparation.c +412 -0
  170. data/c/link-grammar/preparation.h +20 -0
  171. data/c/link-grammar/print-util.c +87 -0
  172. data/c/link-grammar/print-util.h +32 -0
  173. data/c/link-grammar/print.c +1085 -0
  174. data/c/link-grammar/print.h +16 -0
  175. data/c/link-grammar/prune.c +1864 -0
  176. data/c/link-grammar/prune.h +17 -0
  177. data/c/link-grammar/read-dict.c +1785 -0
  178. data/c/link-grammar/read-dict.h +29 -0
  179. data/c/link-grammar/read-regex.c +161 -0
  180. data/c/link-grammar/read-regex.h +12 -0
  181. data/c/link-grammar/regex-morph.c +126 -0
  182. data/c/link-grammar/regex-morph.h +17 -0
  183. data/c/link-grammar/resources.c +180 -0
  184. data/c/link-grammar/resources.h +23 -0
  185. data/c/link-grammar/sat-solver/.DS_Store +0 -0
  186. data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  187. data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  188. data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
  189. data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  190. data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  191. data/c/link-grammar/sat-solver/Makefile +527 -0
  192. data/c/link-grammar/sat-solver/Makefile.am +29 -0
  193. data/c/link-grammar/sat-solver/Makefile.in +527 -0
  194. data/c/link-grammar/sat-solver/clock.hpp +33 -0
  195. data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  196. data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  197. data/c/link-grammar/sat-solver/guiding.hpp +244 -0
  198. data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  199. data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  200. data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
  201. data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  202. data/c/link-grammar/sat-solver/trie.hpp +118 -0
  203. data/c/link-grammar/sat-solver/util.cpp +23 -0
  204. data/c/link-grammar/sat-solver/util.hpp +14 -0
  205. data/c/link-grammar/sat-solver/variables.cpp +5 -0
  206. data/c/link-grammar/sat-solver/variables.hpp +829 -0
  207. data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
  208. data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
  209. data/c/link-grammar/spellcheck-aspell.c +148 -0
  210. data/c/link-grammar/spellcheck-hun.c +136 -0
  211. data/c/link-grammar/spellcheck.h +34 -0
  212. data/c/link-grammar/string-set.c +169 -0
  213. data/c/link-grammar/string-set.h +16 -0
  214. data/c/link-grammar/structures.h +498 -0
  215. data/c/link-grammar/tokenize.c +1049 -0
  216. data/c/link-grammar/tokenize.h +15 -0
  217. data/c/link-grammar/utilities.c +847 -0
  218. data/c/link-grammar/utilities.h +281 -0
  219. data/c/link-grammar/word-file.c +124 -0
  220. data/c/link-grammar/word-file.h +15 -0
  221. data/c/link-grammar/word-utils.c +526 -0
  222. data/c/link-grammar/word-utils.h +152 -0
  223. data/data/.DS_Store +0 -0
  224. data/data/Makefile +511 -0
  225. data/data/Makefile.am +4 -0
  226. data/data/Makefile.in +511 -0
  227. data/data/de/.DS_Store +0 -0
  228. data/data/de/4.0.affix +7 -0
  229. data/data/de/4.0.dict +474 -0
  230. data/data/de/Makefile +387 -0
  231. data/data/de/Makefile.am +9 -0
  232. data/data/de/Makefile.in +387 -0
  233. data/data/en/.DS_Store +0 -0
  234. data/data/en/4.0.affix +26 -0
  235. data/data/en/4.0.batch +1002 -0
  236. data/data/en/4.0.biolg.batch +411 -0
  237. data/data/en/4.0.constituent-knowledge +127 -0
  238. data/data/en/4.0.dict +8759 -0
  239. data/data/en/4.0.dict.m4 +6928 -0
  240. data/data/en/4.0.enwiki.batch +14 -0
  241. data/data/en/4.0.fixes.batch +2776 -0
  242. data/data/en/4.0.knowledge +306 -0
  243. data/data/en/4.0.regex +225 -0
  244. data/data/en/4.0.voa.batch +114 -0
  245. data/data/en/Makefile +554 -0
  246. data/data/en/Makefile.am +19 -0
  247. data/data/en/Makefile.in +554 -0
  248. data/data/en/README +173 -0
  249. data/data/en/tiny.dict +157 -0
  250. data/data/en/words/.DS_Store +0 -0
  251. data/data/en/words/Makefile +456 -0
  252. data/data/en/words/Makefile.am +78 -0
  253. data/data/en/words/Makefile.in +456 -0
  254. data/data/en/words/currency +205 -0
  255. data/data/en/words/currency.p +28 -0
  256. data/data/en/words/entities.given-bisex.sing +39 -0
  257. data/data/en/words/entities.given-female.sing +4141 -0
  258. data/data/en/words/entities.given-male.sing +1633 -0
  259. data/data/en/words/entities.locations.sing +68 -0
  260. data/data/en/words/entities.national.sing +253 -0
  261. data/data/en/words/entities.organizations.sing +7 -0
  262. data/data/en/words/entities.us-states.sing +11 -0
  263. data/data/en/words/units.1 +45 -0
  264. data/data/en/words/units.1.dot +4 -0
  265. data/data/en/words/units.3 +2 -0
  266. data/data/en/words/units.4 +5 -0
  267. data/data/en/words/units.4.dot +1 -0
  268. data/data/en/words/words-medical.adv.1 +1191 -0
  269. data/data/en/words/words-medical.prep.1 +67 -0
  270. data/data/en/words/words-medical.v.4.1 +2835 -0
  271. data/data/en/words/words-medical.v.4.2 +2848 -0
  272. data/data/en/words/words-medical.v.4.3 +3011 -0
  273. data/data/en/words/words-medical.v.4.4 +3036 -0
  274. data/data/en/words/words-medical.v.4.5 +3050 -0
  275. data/data/en/words/words.adj.1 +6794 -0
  276. data/data/en/words/words.adj.2 +638 -0
  277. data/data/en/words/words.adj.3 +667 -0
  278. data/data/en/words/words.adv.1 +1573 -0
  279. data/data/en/words/words.adv.2 +67 -0
  280. data/data/en/words/words.adv.3 +157 -0
  281. data/data/en/words/words.adv.4 +80 -0
  282. data/data/en/words/words.n.1 +11464 -0
  283. data/data/en/words/words.n.1.wiki +264 -0
  284. data/data/en/words/words.n.2.s +2017 -0
  285. data/data/en/words/words.n.2.s.biolg +1 -0
  286. data/data/en/words/words.n.2.s.wiki +298 -0
  287. data/data/en/words/words.n.2.x +65 -0
  288. data/data/en/words/words.n.2.x.wiki +10 -0
  289. data/data/en/words/words.n.3 +5717 -0
  290. data/data/en/words/words.n.t +23 -0
  291. data/data/en/words/words.v.1.1 +1038 -0
  292. data/data/en/words/words.v.1.2 +1043 -0
  293. data/data/en/words/words.v.1.3 +1052 -0
  294. data/data/en/words/words.v.1.4 +1023 -0
  295. data/data/en/words/words.v.1.p +17 -0
  296. data/data/en/words/words.v.10.1 +14 -0
  297. data/data/en/words/words.v.10.2 +15 -0
  298. data/data/en/words/words.v.10.3 +88 -0
  299. data/data/en/words/words.v.10.4 +17 -0
  300. data/data/en/words/words.v.2.1 +1253 -0
  301. data/data/en/words/words.v.2.2 +1304 -0
  302. data/data/en/words/words.v.2.3 +1280 -0
  303. data/data/en/words/words.v.2.4 +1285 -0
  304. data/data/en/words/words.v.2.5 +1287 -0
  305. data/data/en/words/words.v.4.1 +2472 -0
  306. data/data/en/words/words.v.4.2 +2487 -0
  307. data/data/en/words/words.v.4.3 +2441 -0
  308. data/data/en/words/words.v.4.4 +2478 -0
  309. data/data/en/words/words.v.4.5 +2483 -0
  310. data/data/en/words/words.v.5.1 +98 -0
  311. data/data/en/words/words.v.5.2 +98 -0
  312. data/data/en/words/words.v.5.3 +103 -0
  313. data/data/en/words/words.v.5.4 +102 -0
  314. data/data/en/words/words.v.6.1 +388 -0
  315. data/data/en/words/words.v.6.2 +401 -0
  316. data/data/en/words/words.v.6.3 +397 -0
  317. data/data/en/words/words.v.6.4 +405 -0
  318. data/data/en/words/words.v.6.5 +401 -0
  319. data/data/en/words/words.v.8.1 +117 -0
  320. data/data/en/words/words.v.8.2 +118 -0
  321. data/data/en/words/words.v.8.3 +118 -0
  322. data/data/en/words/words.v.8.4 +119 -0
  323. data/data/en/words/words.v.8.5 +119 -0
  324. data/data/en/words/words.y +104 -0
  325. data/data/lt/.DS_Store +0 -0
  326. data/data/lt/4.0.affix +6 -0
  327. data/data/lt/4.0.constituent-knowledge +24 -0
  328. data/data/lt/4.0.dict +135 -0
  329. data/data/lt/4.0.knowledge +38 -0
  330. data/data/lt/Makefile +389 -0
  331. data/data/lt/Makefile.am +11 -0
  332. data/data/lt/Makefile.in +389 -0
  333. data/grammar_police.gemspec +23 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_police.rb +11 -0
  336. data/lib/grammar_police/.DS_Store +0 -0
  337. data/lib/grammar_police/dictionary.rb +30 -0
  338. data/lib/grammar_police/linkage.rb +26 -0
  339. data/lib/grammar_police/parse_options.rb +32 -0
  340. data/lib/grammar_police/sentence.rb +44 -0
  341. data/lib/grammar_police/version.rb +3 -0
  342. data/tests/.DS_Store +0 -0
  343. data/tests/count_linkages.rb +29 -0
  344. data/tests/sentences.txt +86 -0
  345. metadata +408 -0
@@ -0,0 +1,159 @@
1
+ #include "word-tag.hpp"
2
+ #include "fast-sprintf.hpp"
3
+
4
+ void WordTag::insert_connectors(Exp* exp, int& dfs_position,
5
+ bool& leading_right, bool& leading_left,
6
+ std::vector<int>& eps_right,
7
+ std::vector<int>& eps_left,
8
+ char* var, bool root, int parrent_cost) {
9
+ int cost = parrent_cost + exp->cost;
10
+ if (exp->type == CONNECTOR_type) {
11
+ dfs_position++;
12
+
13
+ const char* name = exp->u.string;
14
+
15
+ Connector* connector = connector_new();
16
+ connector->multi = exp->multi;
17
+ connector->string = name;
18
+ set_connector_length_limit(connector);
19
+
20
+
21
+ switch(exp->dir) {
22
+ case '+':
23
+ _position.push_back(_right_connectors.size());
24
+ _dir.push_back('+');
25
+ _right_connectors.push_back(PositionConnector(connector, '+', _word, dfs_position, exp->cost, cost,
26
+ leading_right, false,
27
+ eps_right, eps_left));
28
+ leading_right = false;
29
+ break;
30
+ case '-':
31
+ _position.push_back(_left_connectors.size());
32
+ _dir.push_back('-');
33
+ _left_connectors.push_back(PositionConnector(connector, '-', _word, dfs_position, exp->cost, cost,
34
+ false, leading_left,
35
+ eps_right, eps_left));
36
+ leading_left = false;
37
+ break;
38
+ default:
39
+ throw std::string("Unknown connector direction: ") + exp->dir;
40
+ }
41
+ } else if (exp->type == AND_type) {
42
+ if (exp->u.l == NULL) {
43
+ /* zeroary and */
44
+ } else
45
+ if (exp->u.l != NULL && exp->u.l->next == NULL) {
46
+ /* unary and - skip */
47
+ insert_connectors(exp->u.l->e, dfs_position, leading_right, leading_left, eps_right, eps_left, var, root, cost);
48
+ } else {
49
+ int i;
50
+ E_list* l;
51
+
52
+ char new_var[MAX_VARIABLE_NAME];
53
+ char* last_new_var = new_var;
54
+ char* last_var = var;
55
+ while(*last_new_var = *last_var) {
56
+ last_new_var++;
57
+ last_var++;
58
+ }
59
+
60
+ for (i = 0, l = exp->u.l; l != NULL; l = l->next, i++) {
61
+ char* s = last_new_var;
62
+ *s++ = 'c';
63
+ fast_sprintf(s, i);
64
+
65
+ insert_connectors(l->e, dfs_position, leading_right, leading_left, eps_right, eps_left, new_var, false, cost);
66
+ if (leading_right && var != NULL) {
67
+ eps_right.push_back(_variables->epsilon(new_var, '+'));
68
+ }
69
+
70
+
71
+ if (leading_left && var != NULL) {
72
+ eps_left.push_back(_variables->epsilon(new_var, '-'));
73
+ }
74
+ }
75
+ }
76
+ } else if (exp->type == OR_type) {
77
+ if (exp->u.l != NULL && exp->u.l->next == NULL) {
78
+ /* unary or - skip */
79
+ insert_connectors(exp->u.l->e, dfs_position, leading_right, leading_left, eps_right, eps_left, var, root, cost);
80
+ } else {
81
+ int i;
82
+ E_list* l;
83
+ bool ll_true = false;
84
+ bool lr_true = false;
85
+
86
+ char new_var[MAX_VARIABLE_NAME];
87
+ char* last_new_var = new_var;
88
+ char* last_var = var;
89
+ while(*last_new_var = *last_var) {
90
+ last_new_var++;
91
+ last_var++;
92
+ }
93
+
94
+ for (i = 0, l = exp->u.l; l != NULL; l = l->next, i++) {
95
+ bool lr = leading_right, ll = leading_left;
96
+ std::vector<int> er = eps_right, el = eps_left;
97
+
98
+ char* s = last_new_var;
99
+ *s++ = 'd';
100
+ fast_sprintf(s, i);
101
+
102
+ insert_connectors(l->e, dfs_position, lr, ll, er, el, new_var, false, cost);
103
+ if (lr)
104
+ lr_true = true;
105
+ if (ll)
106
+ ll_true = true;
107
+ }
108
+ leading_right = lr_true;
109
+ leading_left = ll_true;
110
+ }
111
+ }
112
+ }
113
+
114
+
115
+
116
+ void WordTag::find_matches(int w, const char* C, char dir, std::vector<PositionConnector*>& matches) {
117
+ // cout << "Look connection on: ." << _word << ". ." << w << ". " << C << dir << endl;
118
+ Connector search_cntr;
119
+ init_connector(&search_cntr);
120
+ search_cntr.label = NORMAL_LABEL;
121
+ search_cntr.priority = THIN_priority;
122
+ search_cntr.string = C;
123
+ set_connector_length_limit(&search_cntr);
124
+
125
+ std::vector<PositionConnector>* connectors;
126
+ switch(dir) {
127
+ case '+':
128
+ connectors = &_left_connectors;
129
+ break;
130
+ case '-':
131
+ connectors = &_right_connectors;
132
+ break;
133
+ default:
134
+ throw std::string("Unknown connector direction: ") + dir;
135
+ }
136
+
137
+ bool conjunction = sentence_contains_conjunction(_sent);
138
+ std::vector<PositionConnector>::iterator i;
139
+ for (i = connectors->begin(); i != connectors->end(); i++) {
140
+ if (WordTag::match(w, search_cntr, dir, (*i).word, *((*i).connector), conjunction)) {
141
+ matches.push_back(&(*i));
142
+ }
143
+ }
144
+ }
145
+
146
+ void WordTag::add_matches_with_word(WordTag& tag) {
147
+ std::vector<PositionConnector>::iterator i;
148
+ for (i = _right_connectors.begin(); i != _right_connectors.end(); i++) {
149
+ std::vector<PositionConnector*> connector_matches;
150
+ tag.find_matches(_word, (*i).connector->string, '+', connector_matches);
151
+ std::vector<PositionConnector*>::iterator j;
152
+ for (j = connector_matches.begin(); j != connector_matches.end(); j++) {
153
+ i->matches.push_back(*j);
154
+ set_match_possible((*j)->word, (*j)->position);
155
+ (*j)->matches.push_back(&(*i));
156
+ tag.set_match_possible(_word, (*i).position);
157
+ }
158
+ }
159
+ }
@@ -0,0 +1,162 @@
1
+ #ifndef __WORD_TAG_HPP__
2
+ #define __WORD_TAG_HPP__
3
+
4
+ #include <vector>
5
+ #include <map>
6
+ #include <set>
7
+
8
+ extern "C" {
9
+ #include <link-grammar/api.h>
10
+ }
11
+
12
+ #include "variables.hpp"
13
+
14
+
15
+ struct PositionConnector {
16
+ PositionConnector(Connector* c, char d, int w, int p, int cst, int pcst,
17
+ bool lr, bool ll, const std::vector<int>& er, const std::vector<int>& el)
18
+ : connector(c), dir(d), word(w), position(p), cost(cst), parrent_cost(pcst),
19
+ leading_right(lr), leading_left(ll),
20
+ eps_right(er), eps_left(el) {
21
+ /*
22
+ cout << c->string << " : ." << w << ". : ." << p << ". ";
23
+ if (leading_right) {
24
+ cout << "lr: ";
25
+ copy(er.begin(), er.end(), ostream_iterator<int>(cout, " "));
26
+ }
27
+ if (leading_left) {
28
+ cout << "ll: ";
29
+ copy(el.begin(), el.end(), ostream_iterator<int>(cout, " "));
30
+ }
31
+ cout << endl;
32
+ */
33
+ }
34
+
35
+ // Connector itself
36
+ Connector* connector;
37
+ // Direction
38
+ char dir;
39
+ // word in a sentence that this connector belongs to
40
+ int word;
41
+ // position in the word tag
42
+ int position;
43
+ // cost of the connector
44
+ int cost;
45
+ // parrent cost
46
+ int parrent_cost;
47
+
48
+ bool leading_right;
49
+ std::vector<int> eps_right;
50
+ bool leading_left;
51
+ std::vector<int> eps_left;
52
+
53
+ // Matches with other words
54
+ std::vector<PositionConnector*> matches;
55
+
56
+ };
57
+
58
+
59
+ // TODO: Hash connectors for faster matching
60
+
61
+ class WordTag {
62
+ private:
63
+ std::vector<PositionConnector> _left_connectors;
64
+ std::vector<PositionConnector> _right_connectors;
65
+
66
+ std::vector<char> _dir;
67
+ std::vector<int> _position;
68
+
69
+ int _word;
70
+ Variables* _variables;
71
+
72
+ Sentence _sent;
73
+ Parse_Options _opts;
74
+
75
+ // Could this word tag match a connector (wi, pi)?
76
+ // For each word wi I keep a set of positions pi that can be matched
77
+ std::vector< std::set<int> > _match_possible;
78
+ void set_match_possible(int wj, int pj) {
79
+ _match_possible[wj].insert(pj);
80
+ }
81
+
82
+ public:
83
+ WordTag(int word, Variables* variables, Sentence sent, Parse_Options opts)
84
+ : _word(word), _variables(variables), _sent(sent), _opts(opts) {
85
+ _match_possible.resize(_sent->length);
86
+ }
87
+
88
+ const std::vector<PositionConnector>& get_left_connectors() {
89
+ return _left_connectors;
90
+ }
91
+
92
+ const std::vector<PositionConnector>& get_right_connectors() {
93
+ return _right_connectors;
94
+ }
95
+
96
+ PositionConnector* get(int dfs_position) {
97
+ switch (_dir[dfs_position - 1]) {
98
+ case '+':
99
+ return &_right_connectors[_position[dfs_position - 1]];
100
+ case '-':
101
+ return &_left_connectors[_position[dfs_position - 1]];
102
+ }
103
+ }
104
+
105
+ void set_connector_length_limit(Connector* c) {
106
+ int short_len = _opts->short_length;
107
+ if (short_len > UNLIMITED_LEN)
108
+ short_len = UNLIMITED_LEN;
109
+
110
+ Connector_set *conset = _sent->dict->unlimited_connector_set;
111
+ if (parse_options_get_all_short_connectors(_opts)) {
112
+ c->length_limit = short_len;
113
+ }
114
+ else if (conset == NULL || match_in_connector_set(_sent, conset, c, '+')) {
115
+ c->length_limit = UNLIMITED_LEN;
116
+ } else {
117
+ c->length_limit = short_len;
118
+ }
119
+ }
120
+
121
+ int match(int w1, Connector& cntr1, char dir, int w2, Connector& cntr2, bool conjunction) {
122
+ if (conjunction) {
123
+ switch (dir) {
124
+ case '+':
125
+ return ::prune_match(0, &cntr1, &cntr2);
126
+ case '-':
127
+ return ::prune_match(0, &cntr2, &cntr1);
128
+ default:
129
+ throw std::string("Unknown connector direction: ") + dir;
130
+ }
131
+ } else {
132
+ return ::do_match(_sent, &cntr1, &cntr2, w1, w2);
133
+ }
134
+ }
135
+
136
+ void insert_connectors(Exp* exp, int& dfs_position,
137
+ bool& leading_right, bool& leading_left,
138
+ std::vector<int>& eps_right,
139
+ std::vector<int>& eps_left,
140
+ char* var, bool root, int parrent_cost);
141
+
142
+ // Caches information about the found matches to the _matches vector, and also
143
+ // updates the _matches vector of all connectors in the given tag.
144
+ // In order to have all possible matches correctly cached, the function assumes that it is
145
+ // iteratively called for all words in the sentence, where the tag is on the right side of
146
+ // this word
147
+ void add_matches_with_word(WordTag& tag);
148
+
149
+ // Find matches in this word tag with the connector (name, dir).
150
+ void find_matches(int w, const char* C, char dir, std::vector<PositionConnector*>& matches);
151
+
152
+ // A simpler function: Can any connector in this word match a connector wi, pi?
153
+ // It is assumed that
154
+ bool match_possible(int wi, int pi) {
155
+ return _match_possible[wi].find(pi) != _match_possible[wi].end();
156
+ }
157
+
158
+
159
+
160
+ };
161
+
162
+ #endif
@@ -0,0 +1,148 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2009 Vikas N. Kumar */
3
+ /* Copyright (c) 2009 Linas Vepstas */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #ifndef SPELLCHECK_ASPELL_C
15
+ #define
16
+
17
+ #ifdef HAVE_ASPELL
18
+
19
+ #include <stdio.h>
20
+ #include <stdlib.h>
21
+ #include <string.h>
22
+
23
+ #include <aspell.h>
24
+ #include "link-includes.h"
25
+ #include "spellcheck.h"
26
+ #include "utilities.h" /* For Win32 compatibility */
27
+
28
+ #define ASPELL_LANG_KEY "lang"
29
+ static const char *spellcheck_lang_mapping[] = {
30
+ "en" /* link-grammar language */, "en_US" /* Aspell language key */
31
+ };
32
+
33
+ struct linkgrammar_aspell {
34
+ AspellConfig *config;
35
+ AspellSpeller *speller;
36
+ };
37
+
38
+ void * spellcheck_create(const char * lang)
39
+ {
40
+ struct linkgrammar_aspell *aspell = NULL;
41
+ size_t i = 0;
42
+ AspellCanHaveError *spell_err = NULL;
43
+
44
+ for (i = 0; i < sizeof(spellcheck_lang_mapping)/sizeof(char *); i += 2)
45
+ {
46
+ if (0 != strcmp(lang, spellcheck_lang_mapping[i])) continue;
47
+ aspell = (struct linkgrammar_aspell *)malloc(sizeof(struct linkgrammar_aspell));
48
+ if (!aspell) {
49
+ prt_error("Error: out of memory. Aspell not used.\n");
50
+ aspell = NULL;
51
+ break;
52
+ }
53
+ aspell->config = NULL;
54
+ aspell->speller = NULL;
55
+ aspell->config = new_aspell_config();
56
+ if (aspell_config_replace(aspell->config, ASPELL_LANG_KEY,
57
+ spellcheck_lang_mapping[i]) == 0) {
58
+ prt_error("Error: failed to set language in aspell: %s\n", lang);
59
+ delete_aspell_config(aspell->config);
60
+ free(aspell);
61
+ aspell = NULL;
62
+ break;
63
+ }
64
+ spell_err = new_aspell_speller(aspell->config);
65
+ if (aspell_error_number(spell_err) != 0) {
66
+ prt_error("Error: Aspell: %s\n", aspell_error_message(spell_err));
67
+ delete_aspell_can_have_error(spell_err);
68
+ delete_aspell_config(aspell->config);
69
+ free(aspell);
70
+ aspell = NULL;
71
+ break;
72
+ }
73
+ aspell->speller = to_aspell_speller(spell_err);
74
+ break;
75
+ }
76
+ return aspell;
77
+ }
78
+
79
+ void spellcheck_destroy(void * chk)
80
+ {
81
+ struct linkgrammar_aspell *aspell = (struct linkgrammar_aspell *)chk;
82
+ if (aspell) {
83
+ delete_aspell_speller(aspell->speller);
84
+ delete_aspell_config(aspell->config);
85
+ free(aspell);
86
+ aspell = NULL;
87
+ }
88
+ }
89
+
90
+ int spellcheck_test(void * chk, const char * word)
91
+ {
92
+ int val = 0;
93
+ struct linkgrammar_aspell *aspell = (struct linkgrammar_aspell *)chk;
94
+ if (aspell && aspell->speller) {
95
+ /* this can return -1 on failure */
96
+ val = aspell_speller_check(aspell->speller, word, -1);
97
+ }
98
+ return (val == 1) ? 1 : 0;
99
+ }
100
+
101
+ int spellcheck_suggest(void * chk, char ***sug, const char * word)
102
+ {
103
+ struct linkgrammar_aspell *aspell = (struct linkgrammar_aspell *)chk;
104
+ if (!sug) {
105
+ prt_error("Error: Aspell. Corrupt pointer.\n");
106
+ return 0;
107
+ }
108
+ if (aspell && aspell->speller) {
109
+ const AspellWordList *list = NULL;
110
+ AspellStringEnumeration *elem = NULL;
111
+ const char *aword = NULL;
112
+ unsigned int size, i;
113
+ char **array = NULL;
114
+
115
+ list = aspell_speller_suggest(aspell->speller, word, -1);
116
+ elem = aspell_word_list_elements(list);
117
+ size = aspell_word_list_size(list);
118
+ /* allocate an array of char* for returning back to link-parser
119
+ */
120
+ array = (char **)malloc(sizeof(char *) * size);
121
+ if (!array) {
122
+ prt_error("Error: Aspell. Out of memory.\n");
123
+ delete_aspell_string_enumeration(elem);
124
+ return 0;
125
+ }
126
+ i = 0;
127
+ while ((aword = aspell_string_enumeration_next(elem)) != NULL) {
128
+ array[i++] = strdup(aword);
129
+ }
130
+ delete_aspell_string_enumeration(elem);
131
+ *sug = array;
132
+ return size;
133
+ }
134
+ return 0;
135
+ }
136
+
137
+ void spellcheck_free_suggest(char **sug, int size)
138
+ {
139
+ int i = 0;
140
+ for (i = 0; i < size; ++i) {
141
+ free(sug[i]);
142
+ sug[i] = NULL;
143
+ }
144
+ free(sug);
145
+ }
146
+
147
+ #endif /* #ifdef HAVE_ASPELL */
148
+ #endif