grammar_police 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (345) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +2 -0
  5. data/c/.DS_Store +0 -0
  6. data/c/link-grammar.c +65 -0
  7. data/c/link-grammar.h +60 -0
  8. data/c/link-grammar.o +0 -0
  9. data/c/link-grammar.so +0 -0
  10. data/c/link-grammar/.DS_Store +0 -0
  11. data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
  12. data/c/link-grammar/.deps/and.Plo +202 -0
  13. data/c/link-grammar/.deps/api.Plo +244 -0
  14. data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
  15. data/c/link-grammar/.deps/command-line.Plo +201 -0
  16. data/c/link-grammar/.deps/constituents.Plo +201 -0
  17. data/c/link-grammar/.deps/count.Plo +202 -0
  18. data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
  19. data/c/link-grammar/.deps/disjuncts.Plo +123 -0
  20. data/c/link-grammar/.deps/error.Plo +121 -0
  21. data/c/link-grammar/.deps/expand.Plo +133 -0
  22. data/c/link-grammar/.deps/extract-links.Plo +198 -0
  23. data/c/link-grammar/.deps/fast-match.Plo +200 -0
  24. data/c/link-grammar/.deps/idiom.Plo +200 -0
  25. data/c/link-grammar/.deps/jni-client.Plo +217 -0
  26. data/c/link-grammar/.deps/link-parser.Po +1 -0
  27. data/c/link-grammar/.deps/massage.Plo +202 -0
  28. data/c/link-grammar/.deps/post-process.Plo +202 -0
  29. data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
  30. data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
  31. data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
  32. data/c/link-grammar/.deps/prefix.Plo +102 -0
  33. data/c/link-grammar/.deps/preparation.Plo +202 -0
  34. data/c/link-grammar/.deps/print-util.Plo +200 -0
  35. data/c/link-grammar/.deps/print.Plo +201 -0
  36. data/c/link-grammar/.deps/prune.Plo +202 -0
  37. data/c/link-grammar/.deps/read-dict.Plo +223 -0
  38. data/c/link-grammar/.deps/read-regex.Plo +123 -0
  39. data/c/link-grammar/.deps/regex-morph.Plo +131 -0
  40. data/c/link-grammar/.deps/resources.Plo +203 -0
  41. data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  42. data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  43. data/c/link-grammar/.deps/string-set.Plo +198 -0
  44. data/c/link-grammar/.deps/tokenize.Plo +160 -0
  45. data/c/link-grammar/.deps/utilities.Plo +222 -0
  46. data/c/link-grammar/.deps/word-file.Plo +201 -0
  47. data/c/link-grammar/.deps/word-utils.Plo +212 -0
  48. data/c/link-grammar/.libs/analyze-linkage.o +0 -0
  49. data/c/link-grammar/.libs/and.o +0 -0
  50. data/c/link-grammar/.libs/api.o +0 -0
  51. data/c/link-grammar/.libs/build-disjuncts.o +0 -0
  52. data/c/link-grammar/.libs/command-line.o +0 -0
  53. data/c/link-grammar/.libs/constituents.o +0 -0
  54. data/c/link-grammar/.libs/count.o +0 -0
  55. data/c/link-grammar/.libs/disjunct-utils.o +0 -0
  56. data/c/link-grammar/.libs/disjuncts.o +0 -0
  57. data/c/link-grammar/.libs/error.o +0 -0
  58. data/c/link-grammar/.libs/expand.o +0 -0
  59. data/c/link-grammar/.libs/extract-links.o +0 -0
  60. data/c/link-grammar/.libs/fast-match.o +0 -0
  61. data/c/link-grammar/.libs/idiom.o +0 -0
  62. data/c/link-grammar/.libs/jni-client.o +0 -0
  63. data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  64. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  65. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  66. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  67. data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
  68. data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  69. data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  70. data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  71. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  72. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  73. data/c/link-grammar/.libs/liblink-grammar.a +0 -0
  74. data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
  75. data/c/link-grammar/.libs/liblink-grammar.la +41 -0
  76. data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
  77. data/c/link-grammar/.libs/massage.o +0 -0
  78. data/c/link-grammar/.libs/post-process.o +0 -0
  79. data/c/link-grammar/.libs/pp_knowledge.o +0 -0
  80. data/c/link-grammar/.libs/pp_lexer.o +0 -0
  81. data/c/link-grammar/.libs/pp_linkset.o +0 -0
  82. data/c/link-grammar/.libs/prefix.o +0 -0
  83. data/c/link-grammar/.libs/preparation.o +0 -0
  84. data/c/link-grammar/.libs/print-util.o +0 -0
  85. data/c/link-grammar/.libs/print.o +0 -0
  86. data/c/link-grammar/.libs/prune.o +0 -0
  87. data/c/link-grammar/.libs/read-dict.o +0 -0
  88. data/c/link-grammar/.libs/read-regex.o +0 -0
  89. data/c/link-grammar/.libs/regex-morph.o +0 -0
  90. data/c/link-grammar/.libs/resources.o +0 -0
  91. data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
  92. data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
  93. data/c/link-grammar/.libs/string-set.o +0 -0
  94. data/c/link-grammar/.libs/tokenize.o +0 -0
  95. data/c/link-grammar/.libs/utilities.o +0 -0
  96. data/c/link-grammar/.libs/word-file.o +0 -0
  97. data/c/link-grammar/.libs/word-utils.o +0 -0
  98. data/c/link-grammar/Makefile +900 -0
  99. data/c/link-grammar/Makefile.am +202 -0
  100. data/c/link-grammar/Makefile.in +900 -0
  101. data/c/link-grammar/analyze-linkage.c +1317 -0
  102. data/c/link-grammar/analyze-linkage.h +24 -0
  103. data/c/link-grammar/and.c +1603 -0
  104. data/c/link-grammar/and.h +27 -0
  105. data/c/link-grammar/api-structures.h +362 -0
  106. data/c/link-grammar/api-types.h +72 -0
  107. data/c/link-grammar/api.c +1887 -0
  108. data/c/link-grammar/api.h +96 -0
  109. data/c/link-grammar/autoit/.DS_Store +0 -0
  110. data/c/link-grammar/autoit/README +10 -0
  111. data/c/link-grammar/autoit/_LGTest.au3 +22 -0
  112. data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  113. data/c/link-grammar/build-disjuncts.c +487 -0
  114. data/c/link-grammar/build-disjuncts.h +21 -0
  115. data/c/link-grammar/command-line.c +458 -0
  116. data/c/link-grammar/command-line.h +15 -0
  117. data/c/link-grammar/constituents.c +1836 -0
  118. data/c/link-grammar/constituents.h +26 -0
  119. data/c/link-grammar/corpus/.DS_Store +0 -0
  120. data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
  121. data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
  122. data/c/link-grammar/corpus/Makefile +527 -0
  123. data/c/link-grammar/corpus/Makefile.am +46 -0
  124. data/c/link-grammar/corpus/Makefile.in +527 -0
  125. data/c/link-grammar/corpus/README +17 -0
  126. data/c/link-grammar/corpus/cluster.c +286 -0
  127. data/c/link-grammar/corpus/cluster.h +32 -0
  128. data/c/link-grammar/corpus/corpus.c +483 -0
  129. data/c/link-grammar/corpus/corpus.h +46 -0
  130. data/c/link-grammar/count.c +828 -0
  131. data/c/link-grammar/count.h +25 -0
  132. data/c/link-grammar/disjunct-utils.c +261 -0
  133. data/c/link-grammar/disjunct-utils.h +27 -0
  134. data/c/link-grammar/disjuncts.c +138 -0
  135. data/c/link-grammar/disjuncts.h +13 -0
  136. data/c/link-grammar/error.c +92 -0
  137. data/c/link-grammar/error.h +35 -0
  138. data/c/link-grammar/expand.c +67 -0
  139. data/c/link-grammar/expand.h +13 -0
  140. data/c/link-grammar/externs.h +22 -0
  141. data/c/link-grammar/extract-links.c +625 -0
  142. data/c/link-grammar/extract-links.h +16 -0
  143. data/c/link-grammar/fast-match.c +309 -0
  144. data/c/link-grammar/fast-match.h +17 -0
  145. data/c/link-grammar/idiom.c +373 -0
  146. data/c/link-grammar/idiom.h +15 -0
  147. data/c/link-grammar/jni-client.c +779 -0
  148. data/c/link-grammar/jni-client.h +236 -0
  149. data/c/link-grammar/liblink-grammar-java.la +42 -0
  150. data/c/link-grammar/liblink-grammar.la +41 -0
  151. data/c/link-grammar/link-features.h +37 -0
  152. data/c/link-grammar/link-features.h.in +37 -0
  153. data/c/link-grammar/link-grammar-java.def +31 -0
  154. data/c/link-grammar/link-grammar.def +194 -0
  155. data/c/link-grammar/link-includes.h +465 -0
  156. data/c/link-grammar/link-parser.c +849 -0
  157. data/c/link-grammar/massage.c +329 -0
  158. data/c/link-grammar/massage.h +13 -0
  159. data/c/link-grammar/post-process.c +1113 -0
  160. data/c/link-grammar/post-process.h +45 -0
  161. data/c/link-grammar/pp_knowledge.c +376 -0
  162. data/c/link-grammar/pp_knowledge.h +14 -0
  163. data/c/link-grammar/pp_lexer.c +1920 -0
  164. data/c/link-grammar/pp_lexer.h +19 -0
  165. data/c/link-grammar/pp_linkset.c +158 -0
  166. data/c/link-grammar/pp_linkset.h +20 -0
  167. data/c/link-grammar/prefix.c +482 -0
  168. data/c/link-grammar/prefix.h +139 -0
  169. data/c/link-grammar/preparation.c +412 -0
  170. data/c/link-grammar/preparation.h +20 -0
  171. data/c/link-grammar/print-util.c +87 -0
  172. data/c/link-grammar/print-util.h +32 -0
  173. data/c/link-grammar/print.c +1085 -0
  174. data/c/link-grammar/print.h +16 -0
  175. data/c/link-grammar/prune.c +1864 -0
  176. data/c/link-grammar/prune.h +17 -0
  177. data/c/link-grammar/read-dict.c +1785 -0
  178. data/c/link-grammar/read-dict.h +29 -0
  179. data/c/link-grammar/read-regex.c +161 -0
  180. data/c/link-grammar/read-regex.h +12 -0
  181. data/c/link-grammar/regex-morph.c +126 -0
  182. data/c/link-grammar/regex-morph.h +17 -0
  183. data/c/link-grammar/resources.c +180 -0
  184. data/c/link-grammar/resources.h +23 -0
  185. data/c/link-grammar/sat-solver/.DS_Store +0 -0
  186. data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  187. data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  188. data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
  189. data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  190. data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  191. data/c/link-grammar/sat-solver/Makefile +527 -0
  192. data/c/link-grammar/sat-solver/Makefile.am +29 -0
  193. data/c/link-grammar/sat-solver/Makefile.in +527 -0
  194. data/c/link-grammar/sat-solver/clock.hpp +33 -0
  195. data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  196. data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  197. data/c/link-grammar/sat-solver/guiding.hpp +244 -0
  198. data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  199. data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  200. data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
  201. data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  202. data/c/link-grammar/sat-solver/trie.hpp +118 -0
  203. data/c/link-grammar/sat-solver/util.cpp +23 -0
  204. data/c/link-grammar/sat-solver/util.hpp +14 -0
  205. data/c/link-grammar/sat-solver/variables.cpp +5 -0
  206. data/c/link-grammar/sat-solver/variables.hpp +829 -0
  207. data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
  208. data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
  209. data/c/link-grammar/spellcheck-aspell.c +148 -0
  210. data/c/link-grammar/spellcheck-hun.c +136 -0
  211. data/c/link-grammar/spellcheck.h +34 -0
  212. data/c/link-grammar/string-set.c +169 -0
  213. data/c/link-grammar/string-set.h +16 -0
  214. data/c/link-grammar/structures.h +498 -0
  215. data/c/link-grammar/tokenize.c +1049 -0
  216. data/c/link-grammar/tokenize.h +15 -0
  217. data/c/link-grammar/utilities.c +847 -0
  218. data/c/link-grammar/utilities.h +281 -0
  219. data/c/link-grammar/word-file.c +124 -0
  220. data/c/link-grammar/word-file.h +15 -0
  221. data/c/link-grammar/word-utils.c +526 -0
  222. data/c/link-grammar/word-utils.h +152 -0
  223. data/data/.DS_Store +0 -0
  224. data/data/Makefile +511 -0
  225. data/data/Makefile.am +4 -0
  226. data/data/Makefile.in +511 -0
  227. data/data/de/.DS_Store +0 -0
  228. data/data/de/4.0.affix +7 -0
  229. data/data/de/4.0.dict +474 -0
  230. data/data/de/Makefile +387 -0
  231. data/data/de/Makefile.am +9 -0
  232. data/data/de/Makefile.in +387 -0
  233. data/data/en/.DS_Store +0 -0
  234. data/data/en/4.0.affix +26 -0
  235. data/data/en/4.0.batch +1002 -0
  236. data/data/en/4.0.biolg.batch +411 -0
  237. data/data/en/4.0.constituent-knowledge +127 -0
  238. data/data/en/4.0.dict +8759 -0
  239. data/data/en/4.0.dict.m4 +6928 -0
  240. data/data/en/4.0.enwiki.batch +14 -0
  241. data/data/en/4.0.fixes.batch +2776 -0
  242. data/data/en/4.0.knowledge +306 -0
  243. data/data/en/4.0.regex +225 -0
  244. data/data/en/4.0.voa.batch +114 -0
  245. data/data/en/Makefile +554 -0
  246. data/data/en/Makefile.am +19 -0
  247. data/data/en/Makefile.in +554 -0
  248. data/data/en/README +173 -0
  249. data/data/en/tiny.dict +157 -0
  250. data/data/en/words/.DS_Store +0 -0
  251. data/data/en/words/Makefile +456 -0
  252. data/data/en/words/Makefile.am +78 -0
  253. data/data/en/words/Makefile.in +456 -0
  254. data/data/en/words/currency +205 -0
  255. data/data/en/words/currency.p +28 -0
  256. data/data/en/words/entities.given-bisex.sing +39 -0
  257. data/data/en/words/entities.given-female.sing +4141 -0
  258. data/data/en/words/entities.given-male.sing +1633 -0
  259. data/data/en/words/entities.locations.sing +68 -0
  260. data/data/en/words/entities.national.sing +253 -0
  261. data/data/en/words/entities.organizations.sing +7 -0
  262. data/data/en/words/entities.us-states.sing +11 -0
  263. data/data/en/words/units.1 +45 -0
  264. data/data/en/words/units.1.dot +4 -0
  265. data/data/en/words/units.3 +2 -0
  266. data/data/en/words/units.4 +5 -0
  267. data/data/en/words/units.4.dot +1 -0
  268. data/data/en/words/words-medical.adv.1 +1191 -0
  269. data/data/en/words/words-medical.prep.1 +67 -0
  270. data/data/en/words/words-medical.v.4.1 +2835 -0
  271. data/data/en/words/words-medical.v.4.2 +2848 -0
  272. data/data/en/words/words-medical.v.4.3 +3011 -0
  273. data/data/en/words/words-medical.v.4.4 +3036 -0
  274. data/data/en/words/words-medical.v.4.5 +3050 -0
  275. data/data/en/words/words.adj.1 +6794 -0
  276. data/data/en/words/words.adj.2 +638 -0
  277. data/data/en/words/words.adj.3 +667 -0
  278. data/data/en/words/words.adv.1 +1573 -0
  279. data/data/en/words/words.adv.2 +67 -0
  280. data/data/en/words/words.adv.3 +157 -0
  281. data/data/en/words/words.adv.4 +80 -0
  282. data/data/en/words/words.n.1 +11464 -0
  283. data/data/en/words/words.n.1.wiki +264 -0
  284. data/data/en/words/words.n.2.s +2017 -0
  285. data/data/en/words/words.n.2.s.biolg +1 -0
  286. data/data/en/words/words.n.2.s.wiki +298 -0
  287. data/data/en/words/words.n.2.x +65 -0
  288. data/data/en/words/words.n.2.x.wiki +10 -0
  289. data/data/en/words/words.n.3 +5717 -0
  290. data/data/en/words/words.n.t +23 -0
  291. data/data/en/words/words.v.1.1 +1038 -0
  292. data/data/en/words/words.v.1.2 +1043 -0
  293. data/data/en/words/words.v.1.3 +1052 -0
  294. data/data/en/words/words.v.1.4 +1023 -0
  295. data/data/en/words/words.v.1.p +17 -0
  296. data/data/en/words/words.v.10.1 +14 -0
  297. data/data/en/words/words.v.10.2 +15 -0
  298. data/data/en/words/words.v.10.3 +88 -0
  299. data/data/en/words/words.v.10.4 +17 -0
  300. data/data/en/words/words.v.2.1 +1253 -0
  301. data/data/en/words/words.v.2.2 +1304 -0
  302. data/data/en/words/words.v.2.3 +1280 -0
  303. data/data/en/words/words.v.2.4 +1285 -0
  304. data/data/en/words/words.v.2.5 +1287 -0
  305. data/data/en/words/words.v.4.1 +2472 -0
  306. data/data/en/words/words.v.4.2 +2487 -0
  307. data/data/en/words/words.v.4.3 +2441 -0
  308. data/data/en/words/words.v.4.4 +2478 -0
  309. data/data/en/words/words.v.4.5 +2483 -0
  310. data/data/en/words/words.v.5.1 +98 -0
  311. data/data/en/words/words.v.5.2 +98 -0
  312. data/data/en/words/words.v.5.3 +103 -0
  313. data/data/en/words/words.v.5.4 +102 -0
  314. data/data/en/words/words.v.6.1 +388 -0
  315. data/data/en/words/words.v.6.2 +401 -0
  316. data/data/en/words/words.v.6.3 +397 -0
  317. data/data/en/words/words.v.6.4 +405 -0
  318. data/data/en/words/words.v.6.5 +401 -0
  319. data/data/en/words/words.v.8.1 +117 -0
  320. data/data/en/words/words.v.8.2 +118 -0
  321. data/data/en/words/words.v.8.3 +118 -0
  322. data/data/en/words/words.v.8.4 +119 -0
  323. data/data/en/words/words.v.8.5 +119 -0
  324. data/data/en/words/words.y +104 -0
  325. data/data/lt/.DS_Store +0 -0
  326. data/data/lt/4.0.affix +6 -0
  327. data/data/lt/4.0.constituent-knowledge +24 -0
  328. data/data/lt/4.0.dict +135 -0
  329. data/data/lt/4.0.knowledge +38 -0
  330. data/data/lt/Makefile +389 -0
  331. data/data/lt/Makefile.am +11 -0
  332. data/data/lt/Makefile.in +389 -0
  333. data/grammar_police.gemspec +23 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_police.rb +11 -0
  336. data/lib/grammar_police/.DS_Store +0 -0
  337. data/lib/grammar_police/dictionary.rb +30 -0
  338. data/lib/grammar_police/linkage.rb +26 -0
  339. data/lib/grammar_police/parse_options.rb +32 -0
  340. data/lib/grammar_police/sentence.rb +44 -0
  341. data/lib/grammar_police/version.rb +3 -0
  342. data/tests/.DS_Store +0 -0
  343. data/tests/count_linkages.rb +29 -0
  344. data/tests/sentences.txt +86 -0
  345. metadata +408 -0
@@ -0,0 +1,11 @@
1
+ #include "../api.h"
2
+
3
+ #ifdef USE_SAT_SOLVER
4
+ int sat_parse(Sentence sent, Parse_Options opts);
5
+ Linkage sat_create_linkage(int k, Sentence sent, Parse_Options opts);
6
+ void sat_sentence_delete(Sentence sent);
7
+ #else
8
+ static inline int sat_parse(Sentence sent, Parse_Options opts) { return -1; }
9
+ static inline Linkage sat_create_linkage(int k, Sentence sent, Parse_Options opts) { return NULL; }
10
+ static inline void sat_sentence_delete(Sentence sent) {}
11
+ #endif
@@ -0,0 +1,381 @@
1
+ extern "C" {
2
+ #include <link-grammar/api.h>
3
+ }
4
+
5
+ extern "C" int sat_encode(Sentence sent, Parse_Options opts);
6
+ extern "C" Linkage sat_create_linkage(int k, Sentence sent, Parse_Options opts);
7
+ extern "C" void sat_sentence_delete(Sentence sent);
8
+
9
+ #include "word-tag.hpp"
10
+
11
+ /**
12
+ * Base class for all SAT encodings
13
+ */
14
+ class SATEncoder {
15
+ public:
16
+
17
+ // Construct the encoder based on given sentence
18
+ SATEncoder(Sentence sent, Parse_Options opts)
19
+ : _sent(sent), _opts(opts), _solver(new Solver()), _variables(new Variables(sent)) {
20
+ // Preprocess word tags of the sentence
21
+ build_word_tags();
22
+ }
23
+
24
+ virtual ~SATEncoder()
25
+ {
26
+ delete _variables;
27
+ delete _solver;
28
+ }
29
+
30
+ // Create the formula from the sentence
31
+ void encode();
32
+
33
+ // Solve the formula, returning the next linkage.
34
+ Linkage get_next_linkage();
35
+
36
+ protected:
37
+
38
+ /**
39
+ * Methods that generate various link-grammar constraints.
40
+ */
41
+
42
+ // Top-level method that generates satisfaction conditions for every
43
+ // word in the sentence
44
+ void generate_satisfaction_conditions();
45
+
46
+ // Generates satisfaction conditions for the word-tag expression e
47
+ void generate_satisfaction_for_expression(int w, int& dfs_position, Exp* e, char* var, int parrent_cost);
48
+
49
+ // Handle the case of NULL expression of a word
50
+ virtual void handle_null_expression(int w) = 0;
51
+
52
+ // Determine if this word-tag must be satisfied and generate appropriate clauses
53
+ virtual void determine_satisfaction(int w, char* name) = 0;
54
+
55
+ // Generates satisfaction condition for the connector (wi, pi)
56
+ virtual void generate_satisfaction_for_connector(int wi, int pi, const char* Ci,
57
+ char dir, bool multi, int cost, char* var) = 0;
58
+
59
+ // Definition of link_cw((wi, pi), wj) variables when wj is an ordinary word
60
+ void generate_link_cw_ordinary_definition(int wi, int pi, const char* Ci, char dir, int cost, int wj);
61
+
62
+ // Generates order constraints for the elements of a conjunction.
63
+ void generate_conjunct_order_constraints(int w, Exp *e1, Exp* e2, int& dfs_position);
64
+
65
+ /**
66
+ * Methods used for optimizing conjunction ordering constraints
67
+ */
68
+
69
+ // Number of connectors in an expression
70
+ int num_connectors(Exp* e);
71
+
72
+ // This expression can be matched without using any connectors of
73
+ // the given direction
74
+ static int empty_connectors(Exp* exp, char dir);
75
+
76
+ // This expression can be matched while using a connector of the
77
+ // given direction
78
+ static int non_empty_connectors(Exp* exp, char dir);
79
+
80
+ // Trailing connectors of a given direction in the given expression
81
+ void trailing_connectors(int w, Exp* exp, char dir, int& dfs_position,
82
+ std::vector<PositionConnector*>& connectors);
83
+ bool trailing_connectors_and_aux(int w, E_list* l, char dir, int& dfs_position,
84
+ std::vector<PositionConnector*>& connectors);
85
+
86
+ // Connectors of the given direction that cannot be trailing
87
+ // connectors
88
+ void certainly_non_trailing(int w, Exp* exp, char dir, int& dfs_position,
89
+ std::vector<PositionConnector*>& connectors, bool has_right);
90
+
91
+ // Connectors that can act as leading connectors of a given
92
+ // direction in the given direction
93
+ void leading_connectors(int w, Exp* exp, char dir, int& dfs_position,
94
+ std::vector<PositionConnector*>& connectors);
95
+
96
+ /**
97
+ * Definitions of linked(wi, wj) variables.
98
+ */
99
+
100
+ // Define all linked(wi, wj) variables
101
+ virtual void generate_linked_definitions() = 0;
102
+
103
+ // In order to reduce the number of clauses, some linked(wi, wj)
104
+ // variables can apriori be eliminated. The information about pairs
105
+ // of words that can be linked is kept in this matrix.
106
+ MatrixUpperTriangle<int> _linked_possible;
107
+
108
+ /**
109
+ * Planarity constraints
110
+ */
111
+
112
+ // Generates clauses that forbid link-crossing
113
+ void generate_planarity_conditions();
114
+ // Stronger planarity pruning
115
+ void generate_linked_min_max_planarity();
116
+
117
+
118
+ /**
119
+ * Connectivity constraints
120
+ */
121
+
122
+ #ifdef _CONNECTIVITY_
123
+ // Generate clauses that encode the connectivity requirement of the
124
+ // linkage. Experiments showed that it is better to check the
125
+ // connectivity aposteriori and this method has been excised.
126
+ void generate_connectivity();
127
+ #endif
128
+
129
+
130
+ // Helper method for connectivity_components
131
+ static void dfs(int node, const MatrixUpperTriangle<int>& graph, int component, std::vector<int>& components);
132
+
133
+ // Extract connectivity components of a linkage. Return true iff the linkage is connected.
134
+ bool connectivity_components(std::vector<int>& components);
135
+
136
+ // Generate clauses that prohibit all disconnected linkages that
137
+ // have the specified connectivity components.
138
+ void generate_disconnectivity_prohibiting(std::vector<int> components);
139
+
140
+
141
+ /**
142
+ * Encoding specific clauses - override to add clauses that are
143
+ * specific to a certain encoding
144
+ */
145
+ virtual void generate_encoding_specific_clauses() {}
146
+
147
+
148
+ /**
149
+ * Post-processing - PP pruning
150
+ */
151
+
152
+ // Generates PP pruning clauses.
153
+ void pp_prune();
154
+ bool post_process_linkage(Linkage linkage);
155
+
156
+
157
+ /**
158
+ * Power pruning
159
+ */
160
+ // Generate definition of epsilon variables that are used for power
161
+ // pruning
162
+ void generate_epsilon_definitions();
163
+ bool generate_epsilon_for_expression(int w, int& dfs_position, Exp* e, char* var, bool root, char dir);
164
+
165
+
166
+ // Power pruning
167
+ void power_prune();
168
+ // auxiliary method that extends power pruning clauses with additional literals
169
+ // (e.g., link should not be power-prunned if there words are fat-linked)
170
+ virtual void add_additional_power_pruning_conditions(vec<Lit>& clause, int wl, int wr)
171
+ {}
172
+
173
+
174
+
175
+ /**
176
+ * Cost cutoff
177
+ */
178
+
179
+ // Cost cutoff treshold value. Nodes of the expression tree are
180
+ // pruned if their cost exceeds this value. Cost cutoff is performed
181
+ // during satisfaction condition generating.
182
+ static const int _cost_cutoff = 2;
183
+
184
+
185
+ /**
186
+ * Creating clauses and passing them to the MiniSAT solver
187
+ */
188
+
189
+ // Add the specified clause to the solver
190
+ void add_clause(vec<Lit>& clause) {
191
+ #ifdef _DEBUG
192
+ print_clause(clause);
193
+ #endif
194
+ for (int i = 0; i < clause.size(); i++) {
195
+ while (var(clause[i]) >= _solver->nVars()) {
196
+ _solver->newVar();
197
+ }
198
+ }
199
+ _solver->addClause(clause);
200
+ }
201
+
202
+
203
+ // Print clause literals to standard output
204
+ static void print_clause(const vec<Lit>& clause) {
205
+ static int num = 1;
206
+
207
+ cout << "Clause: ." << num++ << ". ";
208
+ for (int i = 0; i < clause.size(); i++)
209
+ cout << (sign(clause[i]) ? '-' : '+') << var(clause[i]) << " ";
210
+ cout << endl;
211
+ }
212
+
213
+
214
+
215
+ /**
216
+ * Conversion of various formula types to CNF. Clauses obtained
217
+ * are automatically passed to the SAT Solver.
218
+ */
219
+ void generate_literal(Lit l);
220
+ void generate_and_definition(Lit lhs, vec<Lit>& rhs);
221
+ void generate_or_definition(Lit lhs, vec<Lit>& rhs);
222
+ void generate_xor_definition(Lit lhs, vec<Lit>& rhs);
223
+ void generate_equivalence_definition(Lit l1, Lit l2);
224
+ void generate_classical_and_definition(Lit lhs, vec<Lit>& rhs);
225
+ void generate_and(vec<Lit>& vect);
226
+ void generate_or(vec<Lit>& vect);
227
+ void generate_xor_conditions(vec<Lit>& vect);
228
+ void generate_conditional_or_definition(Lit condition, Lit lhs, vec<Lit>& rhs);
229
+ void generate_conditional_lr_implication_or_definition(Lit condition, Lit lhs, vec<Lit>& rhs);
230
+ void generate_conditional_lr_implication_or_definition(Lit condition1, Lit condition2, Lit lhs, vec<Lit>& rhs);
231
+
232
+ /*
233
+ * Word tags of the words in a sentence kept in a preprocessed
234
+ * form. This enables users to get information about the
235
+ * connectors in a very eficient way.
236
+ */
237
+ // Word tags
238
+ std::vector<WordTag> _word_tags;
239
+
240
+ // Initializes _word_tags array
241
+ void build_word_tags();
242
+
243
+
244
+ // Find all matching connectors between two words
245
+ void find_all_matches_between_words(int w1, int w2,
246
+ std::vector<std::pair<const PositionConnector*, const PositionConnector*> >& matches);
247
+
248
+ // Check if the connector (wi, pi) can match any word in [l, r)
249
+ bool matches_in_interval(int wi, int pi, int l, int r);
250
+
251
+
252
+ // Join several expressions corresponding to different dictionary
253
+ // entries of a word into a single expression.
254
+ Exp* join_alternatives(int w);
255
+
256
+ // Erase auxiliary expression tree nodes obtained by joining several
257
+ // expressions into one.
258
+ void free_alternatives(Exp* e);
259
+
260
+
261
+ /**
262
+ * Decoding
263
+ */
264
+
265
+ // Convert propositional model to a parse info structure
266
+ virtual bool extract_links(Parse_info pi) = 0;
267
+
268
+ // Create linkage from a propositional model
269
+ Linkage create_linkage();
270
+
271
+ // Generate clause that prohibits the current model
272
+ void generate_linkage_prohibiting();
273
+
274
+
275
+ // Object that contains all information about the variable
276
+ // encoding.
277
+ Variables* _variables;
278
+
279
+ // The MiniSAT solver instance. The solver keeps the set of clauses.
280
+ Solver* _solver;
281
+
282
+ // Sentence that is being parsed.
283
+ Sentence _sent;
284
+
285
+ // Parse options.
286
+ Parse_Options _opts;
287
+
288
+ };
289
+
290
+
291
+ /*******************************************************************************
292
+ * SAT encoding for sentences that do not contain conjunction. *
293
+ *******************************************************************************/
294
+ class SATEncoderConjunctionFreeSentences : public SATEncoder {
295
+ public:
296
+ SATEncoderConjunctionFreeSentences(Sentence sent, Parse_Options opts)
297
+ : SATEncoder(sent, opts) {
298
+ }
299
+
300
+ virtual void handle_null_expression(int w);
301
+ virtual void determine_satisfaction(int w, char* name);
302
+ virtual void generate_satisfaction_for_connector(int wi, int pi, const char* Ci,
303
+ char dir, bool multi, int cost, char* var);
304
+
305
+
306
+ virtual void generate_linked_definitions();
307
+ virtual bool extract_links(Parse_info pi);
308
+
309
+ virtual void generate_encoding_specific_clauses();
310
+ };
311
+
312
+ /*******************************************************************************
313
+ * SAT encoding for sentences that contain conjunction. *
314
+ *******************************************************************************/
315
+ class SATEncoderConjunctiveSentences : public SATEncoder {
316
+ public:
317
+ SATEncoderConjunctiveSentences(Sentence sent, Parse_Options opts)
318
+ : SATEncoder(sent, opts) {
319
+ init_connective_words();
320
+ }
321
+
322
+ private:
323
+ virtual void handle_null_expression(int w);
324
+ virtual void determine_satisfaction(int w, char* name);
325
+
326
+ virtual void generate_satisfaction_for_connector(int wi, int pi, const char* Ci,
327
+ char dir, bool multi, int cost, char* var);
328
+
329
+ virtual void add_additional_power_pruning_conditions(vec<Lit>& clause, int wl, int wr);
330
+
331
+ virtual void generate_encoding_specific_clauses();
332
+
333
+ // various fat-link conditions
334
+ void either_tag_or_fat_link(int w, Lit tag);
335
+ void generate_fat_link_up_definitions();
336
+ void generate_fat_link_down_definitions();
337
+ void generate_fat_link_up_between_down_conditions();
338
+ void generate_fat_link_comma_conditions();
339
+ void generate_fat_link_crossover_conditions();
340
+ void generate_fat_link_Left_Wall_not_inside();
341
+ void generate_fat_link_linked_upperside();
342
+ void generate_fat_link_existence();
343
+ void generate_fat_link_neighbor();
344
+ void generate_label_compatibility();
345
+
346
+ // link_cw variables
347
+
348
+ bool link_cw_possible(int wi, int pi, const char* Ci, char dir, int w, int llim, int rlim);
349
+ bool link_cw_possible_with_fld(int wi, int pi, const char* Ci, char dir, int w, int llim, int rlim);
350
+
351
+ void generate_link_cw_connective_impossible (int wi, int pi, const char* Ci, int wj);
352
+ void generate_link_cw_connective_definition (int wi, int pi, const char* Cj, int wj);
353
+
354
+ // link_top_cw variables
355
+ void generate_link_top_cw_up_definition (int wi,
356
+ int wj, int pj, const char* Cj, bool multi);
357
+ void generate_link_top_cw_iff_link_cw (int wi,
358
+ int wj, int pj, const char* Cj);
359
+ // link_top_ww
360
+ void generate_link_top_ww_connective_comma_definition (Lit lhs, int wi, int wj);
361
+
362
+ // link_ww
363
+ virtual void generate_linked_definitions();
364
+
365
+ // Linkage extraction from the model
366
+ void get_satisfied_link_top_cw_connectors(int word, int top_word, std::vector<int>& link_top_cw_vars);
367
+ virtual bool extract_links(Parse_info pi);
368
+
369
+ // Initialize the vector of connective words of this sentence
370
+ void init_connective_words();
371
+
372
+ // Words that are connectives or commas
373
+ std::vector<int> _connectives;
374
+
375
+ // Cache isConnectiveOrComma results for faster checking
376
+ std::vector<bool> _is_connective_or_comma;
377
+
378
+ bool isConnectiveOrComma(int w) {
379
+ return _is_connective_or_comma[w];
380
+ }
381
+ };
@@ -0,0 +1,118 @@
1
+ #ifndef __TRIE_HPP__
2
+ #define __TRIE_HPP__
3
+
4
+ #include <string>
5
+
6
+ /*
7
+ Trie that supports strings made out of alphabeth letters,
8
+ digits and underscores
9
+ */
10
+ template<class T>
11
+ class Trie {
12
+ public:
13
+ Trie();
14
+ ~Trie();
15
+
16
+ void insert(const char* key, T value);
17
+ T lookup(const char* key);
18
+
19
+ // returned in the key is not found in the trie
20
+ const static int NOT_FOUND = -1;
21
+
22
+ private:
23
+ // no copying
24
+ Trie(const Trie&);
25
+ void operator=(const Trie& t);
26
+
27
+
28
+
29
+ // Number of supported chars - digits + upper + lower + other
30
+ const static int NUM_CHARS = 10 + 1 + 10 + 1;
31
+ // hash chars
32
+ int char_to_pos(char c);
33
+
34
+ bool _terminal;
35
+ Trie* _next[NUM_CHARS];
36
+ T _value;
37
+ };
38
+
39
+
40
+ template <class T>
41
+ Trie<T>::Trie()
42
+ : _terminal(false) {
43
+ memset(_next, 0, NUM_CHARS*sizeof(Trie*));
44
+ }
45
+
46
+ template <class T>
47
+ Trie<T>::~Trie() {
48
+ for (int i = 0; i < NUM_CHARS; i++)
49
+ if (_next[i]) {
50
+ delete _next[i];
51
+ }
52
+ }
53
+
54
+
55
+ template <class T>
56
+ int Trie<T>::char_to_pos(char c) {
57
+ static int pos[] = {
58
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
59
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
62
+ -1, -1, -1, -1, -1, -1, -1,
63
+ // A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
64
+ -1, -1, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
65
+ -1, -1, -1, -1, 11, -1,
66
+ // a b c d e f g h i j k l m n o p q r s t u v w x y z
67
+ -1, -1, 12, 13, 14, 15, -1, -1, 16, -1, -1, 17, -1, 18, -1, -1, -1, 19, -1, -1, 20, -1, 21, -1, -1, -1};
68
+ assert(pos[c] != -1, "NOT FOUND");
69
+ return pos[c];
70
+ /*
71
+ if ('0' <= c && c <= '9')
72
+ return c-'0' + 26 + 26;
73
+ if (c == 'c')
74
+ return 2;
75
+ if (c == 'd')
76
+ return 3;
77
+ if (c == '_')
78
+ return 26 + 26 + 10;
79
+ if ('a' <= c && c <= 'z')
80
+ return c-'a';
81
+ if ('A' <= c && c <= 'Z')
82
+ return c-'A' + 26;
83
+ if (c == '*')
84
+ return 26 + 26 + 10 + 1;
85
+ throw std::string("Trie::char ") + c + " is not supported";
86
+ */
87
+ }
88
+
89
+ template <class T>
90
+ void Trie<T>::insert(const char* key, T value) {
91
+ Trie* t = this;
92
+ while(*key != '\0') {
93
+ int pos = char_to_pos(*key);
94
+ if (!t->_next[pos]) {
95
+ t->_next[pos] = new Trie();
96
+ }
97
+ t = t->_next[pos];
98
+ key++;
99
+ }
100
+ t->_terminal = true;
101
+ t->_value = value;
102
+ }
103
+
104
+ template <class T>
105
+ T Trie<T>::lookup(const char* key) {
106
+ Trie* t = this;
107
+ while(*key != '\0') {
108
+ int pos = char_to_pos(*key);
109
+ t = t->_next[pos];
110
+ if (!t) {
111
+ return NOT_FOUND;
112
+ }
113
+ key++;
114
+ }
115
+ return t->_terminal ? t->_value : NOT_FOUND;
116
+ }
117
+
118
+ #endif