grammar_police 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/c/.DS_Store +0 -0
- data/c/link-grammar.c +65 -0
- data/c/link-grammar.h +60 -0
- data/c/link-grammar.o +0 -0
- data/c/link-grammar.so +0 -0
- data/c/link-grammar/.DS_Store +0 -0
- data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/c/link-grammar/.deps/and.Plo +202 -0
- data/c/link-grammar/.deps/api.Plo +244 -0
- data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/c/link-grammar/.deps/command-line.Plo +201 -0
- data/c/link-grammar/.deps/constituents.Plo +201 -0
- data/c/link-grammar/.deps/count.Plo +202 -0
- data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/c/link-grammar/.deps/disjuncts.Plo +123 -0
- data/c/link-grammar/.deps/error.Plo +121 -0
- data/c/link-grammar/.deps/expand.Plo +133 -0
- data/c/link-grammar/.deps/extract-links.Plo +198 -0
- data/c/link-grammar/.deps/fast-match.Plo +200 -0
- data/c/link-grammar/.deps/idiom.Plo +200 -0
- data/c/link-grammar/.deps/jni-client.Plo +217 -0
- data/c/link-grammar/.deps/link-parser.Po +1 -0
- data/c/link-grammar/.deps/massage.Plo +202 -0
- data/c/link-grammar/.deps/post-process.Plo +202 -0
- data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/c/link-grammar/.deps/prefix.Plo +102 -0
- data/c/link-grammar/.deps/preparation.Plo +202 -0
- data/c/link-grammar/.deps/print-util.Plo +200 -0
- data/c/link-grammar/.deps/print.Plo +201 -0
- data/c/link-grammar/.deps/prune.Plo +202 -0
- data/c/link-grammar/.deps/read-dict.Plo +223 -0
- data/c/link-grammar/.deps/read-regex.Plo +123 -0
- data/c/link-grammar/.deps/regex-morph.Plo +131 -0
- data/c/link-grammar/.deps/resources.Plo +203 -0
- data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/c/link-grammar/.deps/string-set.Plo +198 -0
- data/c/link-grammar/.deps/tokenize.Plo +160 -0
- data/c/link-grammar/.deps/utilities.Plo +222 -0
- data/c/link-grammar/.deps/word-file.Plo +201 -0
- data/c/link-grammar/.deps/word-utils.Plo +212 -0
- data/c/link-grammar/.libs/analyze-linkage.o +0 -0
- data/c/link-grammar/.libs/and.o +0 -0
- data/c/link-grammar/.libs/api.o +0 -0
- data/c/link-grammar/.libs/build-disjuncts.o +0 -0
- data/c/link-grammar/.libs/command-line.o +0 -0
- data/c/link-grammar/.libs/constituents.o +0 -0
- data/c/link-grammar/.libs/count.o +0 -0
- data/c/link-grammar/.libs/disjunct-utils.o +0 -0
- data/c/link-grammar/.libs/disjuncts.o +0 -0
- data/c/link-grammar/.libs/error.o +0 -0
- data/c/link-grammar/.libs/expand.o +0 -0
- data/c/link-grammar/.libs/extract-links.o +0 -0
- data/c/link-grammar/.libs/fast-match.o +0 -0
- data/c/link-grammar/.libs/idiom.o +0 -0
- data/c/link-grammar/.libs/jni-client.o +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.la +41 -0
- data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/c/link-grammar/.libs/massage.o +0 -0
- data/c/link-grammar/.libs/post-process.o +0 -0
- data/c/link-grammar/.libs/pp_knowledge.o +0 -0
- data/c/link-grammar/.libs/pp_lexer.o +0 -0
- data/c/link-grammar/.libs/pp_linkset.o +0 -0
- data/c/link-grammar/.libs/prefix.o +0 -0
- data/c/link-grammar/.libs/preparation.o +0 -0
- data/c/link-grammar/.libs/print-util.o +0 -0
- data/c/link-grammar/.libs/print.o +0 -0
- data/c/link-grammar/.libs/prune.o +0 -0
- data/c/link-grammar/.libs/read-dict.o +0 -0
- data/c/link-grammar/.libs/read-regex.o +0 -0
- data/c/link-grammar/.libs/regex-morph.o +0 -0
- data/c/link-grammar/.libs/resources.o +0 -0
- data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/c/link-grammar/.libs/string-set.o +0 -0
- data/c/link-grammar/.libs/tokenize.o +0 -0
- data/c/link-grammar/.libs/utilities.o +0 -0
- data/c/link-grammar/.libs/word-file.o +0 -0
- data/c/link-grammar/.libs/word-utils.o +0 -0
- data/c/link-grammar/Makefile +900 -0
- data/c/link-grammar/Makefile.am +202 -0
- data/c/link-grammar/Makefile.in +900 -0
- data/c/link-grammar/analyze-linkage.c +1317 -0
- data/c/link-grammar/analyze-linkage.h +24 -0
- data/c/link-grammar/and.c +1603 -0
- data/c/link-grammar/and.h +27 -0
- data/c/link-grammar/api-structures.h +362 -0
- data/c/link-grammar/api-types.h +72 -0
- data/c/link-grammar/api.c +1887 -0
- data/c/link-grammar/api.h +96 -0
- data/c/link-grammar/autoit/.DS_Store +0 -0
- data/c/link-grammar/autoit/README +10 -0
- data/c/link-grammar/autoit/_LGTest.au3 +22 -0
- data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/c/link-grammar/build-disjuncts.c +487 -0
- data/c/link-grammar/build-disjuncts.h +21 -0
- data/c/link-grammar/command-line.c +458 -0
- data/c/link-grammar/command-line.h +15 -0
- data/c/link-grammar/constituents.c +1836 -0
- data/c/link-grammar/constituents.h +26 -0
- data/c/link-grammar/corpus/.DS_Store +0 -0
- data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/c/link-grammar/corpus/Makefile +527 -0
- data/c/link-grammar/corpus/Makefile.am +46 -0
- data/c/link-grammar/corpus/Makefile.in +527 -0
- data/c/link-grammar/corpus/README +17 -0
- data/c/link-grammar/corpus/cluster.c +286 -0
- data/c/link-grammar/corpus/cluster.h +32 -0
- data/c/link-grammar/corpus/corpus.c +483 -0
- data/c/link-grammar/corpus/corpus.h +46 -0
- data/c/link-grammar/count.c +828 -0
- data/c/link-grammar/count.h +25 -0
- data/c/link-grammar/disjunct-utils.c +261 -0
- data/c/link-grammar/disjunct-utils.h +27 -0
- data/c/link-grammar/disjuncts.c +138 -0
- data/c/link-grammar/disjuncts.h +13 -0
- data/c/link-grammar/error.c +92 -0
- data/c/link-grammar/error.h +35 -0
- data/c/link-grammar/expand.c +67 -0
- data/c/link-grammar/expand.h +13 -0
- data/c/link-grammar/externs.h +22 -0
- data/c/link-grammar/extract-links.c +625 -0
- data/c/link-grammar/extract-links.h +16 -0
- data/c/link-grammar/fast-match.c +309 -0
- data/c/link-grammar/fast-match.h +17 -0
- data/c/link-grammar/idiom.c +373 -0
- data/c/link-grammar/idiom.h +15 -0
- data/c/link-grammar/jni-client.c +779 -0
- data/c/link-grammar/jni-client.h +236 -0
- data/c/link-grammar/liblink-grammar-java.la +42 -0
- data/c/link-grammar/liblink-grammar.la +41 -0
- data/c/link-grammar/link-features.h +37 -0
- data/c/link-grammar/link-features.h.in +37 -0
- data/c/link-grammar/link-grammar-java.def +31 -0
- data/c/link-grammar/link-grammar.def +194 -0
- data/c/link-grammar/link-includes.h +465 -0
- data/c/link-grammar/link-parser.c +849 -0
- data/c/link-grammar/massage.c +329 -0
- data/c/link-grammar/massage.h +13 -0
- data/c/link-grammar/post-process.c +1113 -0
- data/c/link-grammar/post-process.h +45 -0
- data/c/link-grammar/pp_knowledge.c +376 -0
- data/c/link-grammar/pp_knowledge.h +14 -0
- data/c/link-grammar/pp_lexer.c +1920 -0
- data/c/link-grammar/pp_lexer.h +19 -0
- data/c/link-grammar/pp_linkset.c +158 -0
- data/c/link-grammar/pp_linkset.h +20 -0
- data/c/link-grammar/prefix.c +482 -0
- data/c/link-grammar/prefix.h +139 -0
- data/c/link-grammar/preparation.c +412 -0
- data/c/link-grammar/preparation.h +20 -0
- data/c/link-grammar/print-util.c +87 -0
- data/c/link-grammar/print-util.h +32 -0
- data/c/link-grammar/print.c +1085 -0
- data/c/link-grammar/print.h +16 -0
- data/c/link-grammar/prune.c +1864 -0
- data/c/link-grammar/prune.h +17 -0
- data/c/link-grammar/read-dict.c +1785 -0
- data/c/link-grammar/read-dict.h +29 -0
- data/c/link-grammar/read-regex.c +161 -0
- data/c/link-grammar/read-regex.h +12 -0
- data/c/link-grammar/regex-morph.c +126 -0
- data/c/link-grammar/regex-morph.h +17 -0
- data/c/link-grammar/resources.c +180 -0
- data/c/link-grammar/resources.h +23 -0
- data/c/link-grammar/sat-solver/.DS_Store +0 -0
- data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/c/link-grammar/sat-solver/Makefile +527 -0
- data/c/link-grammar/sat-solver/Makefile.am +29 -0
- data/c/link-grammar/sat-solver/Makefile.in +527 -0
- data/c/link-grammar/sat-solver/clock.hpp +33 -0
- data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/c/link-grammar/sat-solver/guiding.hpp +244 -0
- data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/c/link-grammar/sat-solver/trie.hpp +118 -0
- data/c/link-grammar/sat-solver/util.cpp +23 -0
- data/c/link-grammar/sat-solver/util.hpp +14 -0
- data/c/link-grammar/sat-solver/variables.cpp +5 -0
- data/c/link-grammar/sat-solver/variables.hpp +829 -0
- data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/c/link-grammar/spellcheck-aspell.c +148 -0
- data/c/link-grammar/spellcheck-hun.c +136 -0
- data/c/link-grammar/spellcheck.h +34 -0
- data/c/link-grammar/string-set.c +169 -0
- data/c/link-grammar/string-set.h +16 -0
- data/c/link-grammar/structures.h +498 -0
- data/c/link-grammar/tokenize.c +1049 -0
- data/c/link-grammar/tokenize.h +15 -0
- data/c/link-grammar/utilities.c +847 -0
- data/c/link-grammar/utilities.h +281 -0
- data/c/link-grammar/word-file.c +124 -0
- data/c/link-grammar/word-file.h +15 -0
- data/c/link-grammar/word-utils.c +526 -0
- data/c/link-grammar/word-utils.h +152 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/grammar_police.gemspec +23 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_police.rb +11 -0
- data/lib/grammar_police/.DS_Store +0 -0
- data/lib/grammar_police/dictionary.rb +30 -0
- data/lib/grammar_police/linkage.rb +26 -0
- data/lib/grammar_police/parse_options.rb +32 -0
- data/lib/grammar_police/sentence.rb +44 -0
- data/lib/grammar_police/version.rb +3 -0
- data/tests/.DS_Store +0 -0
- data/tests/count_linkages.rb +29 -0
- data/tests/sentences.txt +86 -0
- metadata +408 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
#include "../api.h"
|
2
|
+
|
3
|
+
#ifdef USE_SAT_SOLVER
|
4
|
+
int sat_parse(Sentence sent, Parse_Options opts);
|
5
|
+
Linkage sat_create_linkage(int k, Sentence sent, Parse_Options opts);
|
6
|
+
void sat_sentence_delete(Sentence sent);
|
7
|
+
#else
|
8
|
+
static inline int sat_parse(Sentence sent, Parse_Options opts) { return -1; }
|
9
|
+
static inline Linkage sat_create_linkage(int k, Sentence sent, Parse_Options opts) { return NULL; }
|
10
|
+
static inline void sat_sentence_delete(Sentence sent) {}
|
11
|
+
#endif
|
@@ -0,0 +1,381 @@
|
|
1
|
+
extern "C" {
|
2
|
+
#include <link-grammar/api.h>
|
3
|
+
}
|
4
|
+
|
5
|
+
extern "C" int sat_encode(Sentence sent, Parse_Options opts);
|
6
|
+
extern "C" Linkage sat_create_linkage(int k, Sentence sent, Parse_Options opts);
|
7
|
+
extern "C" void sat_sentence_delete(Sentence sent);
|
8
|
+
|
9
|
+
#include "word-tag.hpp"
|
10
|
+
|
11
|
+
/**
|
12
|
+
* Base class for all SAT encodings
|
13
|
+
*/
|
14
|
+
class SATEncoder {
|
15
|
+
public:
|
16
|
+
|
17
|
+
// Construct the encoder based on given sentence
|
18
|
+
SATEncoder(Sentence sent, Parse_Options opts)
|
19
|
+
: _sent(sent), _opts(opts), _solver(new Solver()), _variables(new Variables(sent)) {
|
20
|
+
// Preprocess word tags of the sentence
|
21
|
+
build_word_tags();
|
22
|
+
}
|
23
|
+
|
24
|
+
virtual ~SATEncoder()
|
25
|
+
{
|
26
|
+
delete _variables;
|
27
|
+
delete _solver;
|
28
|
+
}
|
29
|
+
|
30
|
+
// Create the formula from the sentence
|
31
|
+
void encode();
|
32
|
+
|
33
|
+
// Solve the formula, returning the next linkage.
|
34
|
+
Linkage get_next_linkage();
|
35
|
+
|
36
|
+
protected:
|
37
|
+
|
38
|
+
/**
|
39
|
+
* Methods that generate various link-grammar constraints.
|
40
|
+
*/
|
41
|
+
|
42
|
+
// Top-level method that generates satisfaction conditions for every
|
43
|
+
// word in the sentence
|
44
|
+
void generate_satisfaction_conditions();
|
45
|
+
|
46
|
+
// Generates satisfaction conditions for the word-tag expression e
|
47
|
+
void generate_satisfaction_for_expression(int w, int& dfs_position, Exp* e, char* var, int parrent_cost);
|
48
|
+
|
49
|
+
// Handle the case of NULL expression of a word
|
50
|
+
virtual void handle_null_expression(int w) = 0;
|
51
|
+
|
52
|
+
// Determine if this word-tag must be satisfied and generate appropriate clauses
|
53
|
+
virtual void determine_satisfaction(int w, char* name) = 0;
|
54
|
+
|
55
|
+
// Generates satisfaction condition for the connector (wi, pi)
|
56
|
+
virtual void generate_satisfaction_for_connector(int wi, int pi, const char* Ci,
|
57
|
+
char dir, bool multi, int cost, char* var) = 0;
|
58
|
+
|
59
|
+
// Definition of link_cw((wi, pi), wj) variables when wj is an ordinary word
|
60
|
+
void generate_link_cw_ordinary_definition(int wi, int pi, const char* Ci, char dir, int cost, int wj);
|
61
|
+
|
62
|
+
// Generates order constraints for the elements of a conjunction.
|
63
|
+
void generate_conjunct_order_constraints(int w, Exp *e1, Exp* e2, int& dfs_position);
|
64
|
+
|
65
|
+
/**
|
66
|
+
* Methods used for optimizing conjunction ordering constraints
|
67
|
+
*/
|
68
|
+
|
69
|
+
// Number of connectors in an expression
|
70
|
+
int num_connectors(Exp* e);
|
71
|
+
|
72
|
+
// This expression can be matched without using any connectors of
|
73
|
+
// the given direction
|
74
|
+
static int empty_connectors(Exp* exp, char dir);
|
75
|
+
|
76
|
+
// This expression can be matched while using a connector of the
|
77
|
+
// given direction
|
78
|
+
static int non_empty_connectors(Exp* exp, char dir);
|
79
|
+
|
80
|
+
// Trailing connectors of a given direction in the given expression
|
81
|
+
void trailing_connectors(int w, Exp* exp, char dir, int& dfs_position,
|
82
|
+
std::vector<PositionConnector*>& connectors);
|
83
|
+
bool trailing_connectors_and_aux(int w, E_list* l, char dir, int& dfs_position,
|
84
|
+
std::vector<PositionConnector*>& connectors);
|
85
|
+
|
86
|
+
// Connectors of the given direction that cannot be trailing
|
87
|
+
// connectors
|
88
|
+
void certainly_non_trailing(int w, Exp* exp, char dir, int& dfs_position,
|
89
|
+
std::vector<PositionConnector*>& connectors, bool has_right);
|
90
|
+
|
91
|
+
// Connectors that can act as leading connectors of a given
|
92
|
+
// direction in the given direction
|
93
|
+
void leading_connectors(int w, Exp* exp, char dir, int& dfs_position,
|
94
|
+
std::vector<PositionConnector*>& connectors);
|
95
|
+
|
96
|
+
/**
|
97
|
+
* Definitions of linked(wi, wj) variables.
|
98
|
+
*/
|
99
|
+
|
100
|
+
// Define all linked(wi, wj) variables
|
101
|
+
virtual void generate_linked_definitions() = 0;
|
102
|
+
|
103
|
+
// In order to reduce the number of clauses, some linked(wi, wj)
|
104
|
+
// variables can apriori be eliminated. The information about pairs
|
105
|
+
// of words that can be linked is kept in this matrix.
|
106
|
+
MatrixUpperTriangle<int> _linked_possible;
|
107
|
+
|
108
|
+
/**
|
109
|
+
* Planarity constraints
|
110
|
+
*/
|
111
|
+
|
112
|
+
// Generates clauses that forbid link-crossing
|
113
|
+
void generate_planarity_conditions();
|
114
|
+
// Stronger planarity pruning
|
115
|
+
void generate_linked_min_max_planarity();
|
116
|
+
|
117
|
+
|
118
|
+
/**
|
119
|
+
* Connectivity constraints
|
120
|
+
*/
|
121
|
+
|
122
|
+
#ifdef _CONNECTIVITY_
|
123
|
+
// Generate clauses that encode the connectivity requirement of the
|
124
|
+
// linkage. Experiments showed that it is better to check the
|
125
|
+
// connectivity aposteriori and this method has been excised.
|
126
|
+
void generate_connectivity();
|
127
|
+
#endif
|
128
|
+
|
129
|
+
|
130
|
+
// Helper method for connectivity_components
|
131
|
+
static void dfs(int node, const MatrixUpperTriangle<int>& graph, int component, std::vector<int>& components);
|
132
|
+
|
133
|
+
// Extract connectivity components of a linkage. Return true iff the linkage is connected.
|
134
|
+
bool connectivity_components(std::vector<int>& components);
|
135
|
+
|
136
|
+
// Generate clauses that prohibit all disconnected linkages that
|
137
|
+
// have the specified connectivity components.
|
138
|
+
void generate_disconnectivity_prohibiting(std::vector<int> components);
|
139
|
+
|
140
|
+
|
141
|
+
/**
|
142
|
+
* Encoding specific clauses - override to add clauses that are
|
143
|
+
* specific to a certain encoding
|
144
|
+
*/
|
145
|
+
virtual void generate_encoding_specific_clauses() {}
|
146
|
+
|
147
|
+
|
148
|
+
/**
|
149
|
+
* Post-processing - PP pruning
|
150
|
+
*/
|
151
|
+
|
152
|
+
// Generates PP pruning clauses.
|
153
|
+
void pp_prune();
|
154
|
+
bool post_process_linkage(Linkage linkage);
|
155
|
+
|
156
|
+
|
157
|
+
/**
|
158
|
+
* Power pruning
|
159
|
+
*/
|
160
|
+
// Generate definition of epsilon variables that are used for power
|
161
|
+
// pruning
|
162
|
+
void generate_epsilon_definitions();
|
163
|
+
bool generate_epsilon_for_expression(int w, int& dfs_position, Exp* e, char* var, bool root, char dir);
|
164
|
+
|
165
|
+
|
166
|
+
// Power pruning
|
167
|
+
void power_prune();
|
168
|
+
// auxiliary method that extends power pruning clauses with additional literals
|
169
|
+
// (e.g., link should not be power-prunned if there words are fat-linked)
|
170
|
+
virtual void add_additional_power_pruning_conditions(vec<Lit>& clause, int wl, int wr)
|
171
|
+
{}
|
172
|
+
|
173
|
+
|
174
|
+
|
175
|
+
/**
|
176
|
+
* Cost cutoff
|
177
|
+
*/
|
178
|
+
|
179
|
+
// Cost cutoff treshold value. Nodes of the expression tree are
|
180
|
+
// pruned if their cost exceeds this value. Cost cutoff is performed
|
181
|
+
// during satisfaction condition generating.
|
182
|
+
static const int _cost_cutoff = 2;
|
183
|
+
|
184
|
+
|
185
|
+
/**
|
186
|
+
* Creating clauses and passing them to the MiniSAT solver
|
187
|
+
*/
|
188
|
+
|
189
|
+
// Add the specified clause to the solver
|
190
|
+
void add_clause(vec<Lit>& clause) {
|
191
|
+
#ifdef _DEBUG
|
192
|
+
print_clause(clause);
|
193
|
+
#endif
|
194
|
+
for (int i = 0; i < clause.size(); i++) {
|
195
|
+
while (var(clause[i]) >= _solver->nVars()) {
|
196
|
+
_solver->newVar();
|
197
|
+
}
|
198
|
+
}
|
199
|
+
_solver->addClause(clause);
|
200
|
+
}
|
201
|
+
|
202
|
+
|
203
|
+
// Print clause literals to standard output
|
204
|
+
static void print_clause(const vec<Lit>& clause) {
|
205
|
+
static int num = 1;
|
206
|
+
|
207
|
+
cout << "Clause: ." << num++ << ". ";
|
208
|
+
for (int i = 0; i < clause.size(); i++)
|
209
|
+
cout << (sign(clause[i]) ? '-' : '+') << var(clause[i]) << " ";
|
210
|
+
cout << endl;
|
211
|
+
}
|
212
|
+
|
213
|
+
|
214
|
+
|
215
|
+
/**
|
216
|
+
* Conversion of various formula types to CNF. Clauses obtained
|
217
|
+
* are automatically passed to the SAT Solver.
|
218
|
+
*/
|
219
|
+
void generate_literal(Lit l);
|
220
|
+
void generate_and_definition(Lit lhs, vec<Lit>& rhs);
|
221
|
+
void generate_or_definition(Lit lhs, vec<Lit>& rhs);
|
222
|
+
void generate_xor_definition(Lit lhs, vec<Lit>& rhs);
|
223
|
+
void generate_equivalence_definition(Lit l1, Lit l2);
|
224
|
+
void generate_classical_and_definition(Lit lhs, vec<Lit>& rhs);
|
225
|
+
void generate_and(vec<Lit>& vect);
|
226
|
+
void generate_or(vec<Lit>& vect);
|
227
|
+
void generate_xor_conditions(vec<Lit>& vect);
|
228
|
+
void generate_conditional_or_definition(Lit condition, Lit lhs, vec<Lit>& rhs);
|
229
|
+
void generate_conditional_lr_implication_or_definition(Lit condition, Lit lhs, vec<Lit>& rhs);
|
230
|
+
void generate_conditional_lr_implication_or_definition(Lit condition1, Lit condition2, Lit lhs, vec<Lit>& rhs);
|
231
|
+
|
232
|
+
/*
|
233
|
+
* Word tags of the words in a sentence kept in a preprocessed
|
234
|
+
* form. This enables users to get information about the
|
235
|
+
* connectors in a very eficient way.
|
236
|
+
*/
|
237
|
+
// Word tags
|
238
|
+
std::vector<WordTag> _word_tags;
|
239
|
+
|
240
|
+
// Initializes _word_tags array
|
241
|
+
void build_word_tags();
|
242
|
+
|
243
|
+
|
244
|
+
// Find all matching connectors between two words
|
245
|
+
void find_all_matches_between_words(int w1, int w2,
|
246
|
+
std::vector<std::pair<const PositionConnector*, const PositionConnector*> >& matches);
|
247
|
+
|
248
|
+
// Check if the connector (wi, pi) can match any word in [l, r)
|
249
|
+
bool matches_in_interval(int wi, int pi, int l, int r);
|
250
|
+
|
251
|
+
|
252
|
+
// Join several expressions corresponding to different dictionary
|
253
|
+
// entries of a word into a single expression.
|
254
|
+
Exp* join_alternatives(int w);
|
255
|
+
|
256
|
+
// Erase auxiliary expression tree nodes obtained by joining several
|
257
|
+
// expressions into one.
|
258
|
+
void free_alternatives(Exp* e);
|
259
|
+
|
260
|
+
|
261
|
+
/**
|
262
|
+
* Decoding
|
263
|
+
*/
|
264
|
+
|
265
|
+
// Convert propositional model to a parse info structure
|
266
|
+
virtual bool extract_links(Parse_info pi) = 0;
|
267
|
+
|
268
|
+
// Create linkage from a propositional model
|
269
|
+
Linkage create_linkage();
|
270
|
+
|
271
|
+
// Generate clause that prohibits the current model
|
272
|
+
void generate_linkage_prohibiting();
|
273
|
+
|
274
|
+
|
275
|
+
// Object that contains all information about the variable
|
276
|
+
// encoding.
|
277
|
+
Variables* _variables;
|
278
|
+
|
279
|
+
// The MiniSAT solver instance. The solver keeps the set of clauses.
|
280
|
+
Solver* _solver;
|
281
|
+
|
282
|
+
// Sentence that is being parsed.
|
283
|
+
Sentence _sent;
|
284
|
+
|
285
|
+
// Parse options.
|
286
|
+
Parse_Options _opts;
|
287
|
+
|
288
|
+
};
|
289
|
+
|
290
|
+
|
291
|
+
/*******************************************************************************
|
292
|
+
* SAT encoding for sentences that do not contain conjunction. *
|
293
|
+
*******************************************************************************/
|
294
|
+
class SATEncoderConjunctionFreeSentences : public SATEncoder {
|
295
|
+
public:
|
296
|
+
SATEncoderConjunctionFreeSentences(Sentence sent, Parse_Options opts)
|
297
|
+
: SATEncoder(sent, opts) {
|
298
|
+
}
|
299
|
+
|
300
|
+
virtual void handle_null_expression(int w);
|
301
|
+
virtual void determine_satisfaction(int w, char* name);
|
302
|
+
virtual void generate_satisfaction_for_connector(int wi, int pi, const char* Ci,
|
303
|
+
char dir, bool multi, int cost, char* var);
|
304
|
+
|
305
|
+
|
306
|
+
virtual void generate_linked_definitions();
|
307
|
+
virtual bool extract_links(Parse_info pi);
|
308
|
+
|
309
|
+
virtual void generate_encoding_specific_clauses();
|
310
|
+
};
|
311
|
+
|
312
|
+
/*******************************************************************************
|
313
|
+
* SAT encoding for sentences that contain conjunction. *
|
314
|
+
*******************************************************************************/
|
315
|
+
class SATEncoderConjunctiveSentences : public SATEncoder {
|
316
|
+
public:
|
317
|
+
SATEncoderConjunctiveSentences(Sentence sent, Parse_Options opts)
|
318
|
+
: SATEncoder(sent, opts) {
|
319
|
+
init_connective_words();
|
320
|
+
}
|
321
|
+
|
322
|
+
private:
|
323
|
+
virtual void handle_null_expression(int w);
|
324
|
+
virtual void determine_satisfaction(int w, char* name);
|
325
|
+
|
326
|
+
virtual void generate_satisfaction_for_connector(int wi, int pi, const char* Ci,
|
327
|
+
char dir, bool multi, int cost, char* var);
|
328
|
+
|
329
|
+
virtual void add_additional_power_pruning_conditions(vec<Lit>& clause, int wl, int wr);
|
330
|
+
|
331
|
+
virtual void generate_encoding_specific_clauses();
|
332
|
+
|
333
|
+
// various fat-link conditions
|
334
|
+
void either_tag_or_fat_link(int w, Lit tag);
|
335
|
+
void generate_fat_link_up_definitions();
|
336
|
+
void generate_fat_link_down_definitions();
|
337
|
+
void generate_fat_link_up_between_down_conditions();
|
338
|
+
void generate_fat_link_comma_conditions();
|
339
|
+
void generate_fat_link_crossover_conditions();
|
340
|
+
void generate_fat_link_Left_Wall_not_inside();
|
341
|
+
void generate_fat_link_linked_upperside();
|
342
|
+
void generate_fat_link_existence();
|
343
|
+
void generate_fat_link_neighbor();
|
344
|
+
void generate_label_compatibility();
|
345
|
+
|
346
|
+
// link_cw variables
|
347
|
+
|
348
|
+
bool link_cw_possible(int wi, int pi, const char* Ci, char dir, int w, int llim, int rlim);
|
349
|
+
bool link_cw_possible_with_fld(int wi, int pi, const char* Ci, char dir, int w, int llim, int rlim);
|
350
|
+
|
351
|
+
void generate_link_cw_connective_impossible (int wi, int pi, const char* Ci, int wj);
|
352
|
+
void generate_link_cw_connective_definition (int wi, int pi, const char* Cj, int wj);
|
353
|
+
|
354
|
+
// link_top_cw variables
|
355
|
+
void generate_link_top_cw_up_definition (int wi,
|
356
|
+
int wj, int pj, const char* Cj, bool multi);
|
357
|
+
void generate_link_top_cw_iff_link_cw (int wi,
|
358
|
+
int wj, int pj, const char* Cj);
|
359
|
+
// link_top_ww
|
360
|
+
void generate_link_top_ww_connective_comma_definition (Lit lhs, int wi, int wj);
|
361
|
+
|
362
|
+
// link_ww
|
363
|
+
virtual void generate_linked_definitions();
|
364
|
+
|
365
|
+
// Linkage extraction from the model
|
366
|
+
void get_satisfied_link_top_cw_connectors(int word, int top_word, std::vector<int>& link_top_cw_vars);
|
367
|
+
virtual bool extract_links(Parse_info pi);
|
368
|
+
|
369
|
+
// Initialize the vector of connective words of this sentence
|
370
|
+
void init_connective_words();
|
371
|
+
|
372
|
+
// Words that are connectives or commas
|
373
|
+
std::vector<int> _connectives;
|
374
|
+
|
375
|
+
// Cache isConnectiveOrComma results for faster checking
|
376
|
+
std::vector<bool> _is_connective_or_comma;
|
377
|
+
|
378
|
+
bool isConnectiveOrComma(int w) {
|
379
|
+
return _is_connective_or_comma[w];
|
380
|
+
}
|
381
|
+
};
|
@@ -0,0 +1,118 @@
|
|
1
|
+
#ifndef __TRIE_HPP__
|
2
|
+
#define __TRIE_HPP__
|
3
|
+
|
4
|
+
#include <string>
|
5
|
+
|
6
|
+
/*
|
7
|
+
Trie that supports strings made out of alphabeth letters,
|
8
|
+
digits and underscores
|
9
|
+
*/
|
10
|
+
template<class T>
|
11
|
+
class Trie {
|
12
|
+
public:
|
13
|
+
Trie();
|
14
|
+
~Trie();
|
15
|
+
|
16
|
+
void insert(const char* key, T value);
|
17
|
+
T lookup(const char* key);
|
18
|
+
|
19
|
+
// returned in the key is not found in the trie
|
20
|
+
const static int NOT_FOUND = -1;
|
21
|
+
|
22
|
+
private:
|
23
|
+
// no copying
|
24
|
+
Trie(const Trie&);
|
25
|
+
void operator=(const Trie& t);
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
// Number of supported chars - digits + upper + lower + other
|
30
|
+
const static int NUM_CHARS = 10 + 1 + 10 + 1;
|
31
|
+
// hash chars
|
32
|
+
int char_to_pos(char c);
|
33
|
+
|
34
|
+
bool _terminal;
|
35
|
+
Trie* _next[NUM_CHARS];
|
36
|
+
T _value;
|
37
|
+
};
|
38
|
+
|
39
|
+
|
40
|
+
template <class T>
|
41
|
+
Trie<T>::Trie()
|
42
|
+
: _terminal(false) {
|
43
|
+
memset(_next, 0, NUM_CHARS*sizeof(Trie*));
|
44
|
+
}
|
45
|
+
|
46
|
+
template <class T>
|
47
|
+
Trie<T>::~Trie() {
|
48
|
+
for (int i = 0; i < NUM_CHARS; i++)
|
49
|
+
if (_next[i]) {
|
50
|
+
delete _next[i];
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
|
55
|
+
template <class T>
|
56
|
+
int Trie<T>::char_to_pos(char c) {
|
57
|
+
static int pos[] = {
|
58
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
59
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
60
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
61
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
|
62
|
+
-1, -1, -1, -1, -1, -1, -1,
|
63
|
+
// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
|
64
|
+
-1, -1, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
65
|
+
-1, -1, -1, -1, 11, -1,
|
66
|
+
// a b c d e f g h i j k l m n o p q r s t u v w x y z
|
67
|
+
-1, -1, 12, 13, 14, 15, -1, -1, 16, -1, -1, 17, -1, 18, -1, -1, -1, 19, -1, -1, 20, -1, 21, -1, -1, -1};
|
68
|
+
assert(pos[c] != -1, "NOT FOUND");
|
69
|
+
return pos[c];
|
70
|
+
/*
|
71
|
+
if ('0' <= c && c <= '9')
|
72
|
+
return c-'0' + 26 + 26;
|
73
|
+
if (c == 'c')
|
74
|
+
return 2;
|
75
|
+
if (c == 'd')
|
76
|
+
return 3;
|
77
|
+
if (c == '_')
|
78
|
+
return 26 + 26 + 10;
|
79
|
+
if ('a' <= c && c <= 'z')
|
80
|
+
return c-'a';
|
81
|
+
if ('A' <= c && c <= 'Z')
|
82
|
+
return c-'A' + 26;
|
83
|
+
if (c == '*')
|
84
|
+
return 26 + 26 + 10 + 1;
|
85
|
+
throw std::string("Trie::char ") + c + " is not supported";
|
86
|
+
*/
|
87
|
+
}
|
88
|
+
|
89
|
+
template <class T>
|
90
|
+
void Trie<T>::insert(const char* key, T value) {
|
91
|
+
Trie* t = this;
|
92
|
+
while(*key != '\0') {
|
93
|
+
int pos = char_to_pos(*key);
|
94
|
+
if (!t->_next[pos]) {
|
95
|
+
t->_next[pos] = new Trie();
|
96
|
+
}
|
97
|
+
t = t->_next[pos];
|
98
|
+
key++;
|
99
|
+
}
|
100
|
+
t->_terminal = true;
|
101
|
+
t->_value = value;
|
102
|
+
}
|
103
|
+
|
104
|
+
template <class T>
|
105
|
+
T Trie<T>::lookup(const char* key) {
|
106
|
+
Trie* t = this;
|
107
|
+
while(*key != '\0') {
|
108
|
+
int pos = char_to_pos(*key);
|
109
|
+
t = t->_next[pos];
|
110
|
+
if (!t) {
|
111
|
+
return NOT_FOUND;
|
112
|
+
}
|
113
|
+
key++;
|
114
|
+
}
|
115
|
+
return t->_terminal ? t->_value : NOT_FOUND;
|
116
|
+
}
|
117
|
+
|
118
|
+
#endif
|