grammar_cop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,1887 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* Copyright 2008, 2009 Linas Vepstas */
|
5
|
+
/* All rights reserved */
|
6
|
+
/* */
|
7
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
8
|
+
/* license set forth in the LICENSE file included with this software, */
|
9
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
10
|
+
/* This license allows free redistribution and use in source and binary */
|
11
|
+
/* forms, with or without modification, subject to certain conditions. */
|
12
|
+
/* */
|
13
|
+
/*************************************************************************/
|
14
|
+
|
15
|
+
#ifndef API_C
|
16
|
+
#define API_C
|
17
|
+
|
18
|
+
#include <limits.h>
|
19
|
+
#include <math.h>
|
20
|
+
#include <string.h>
|
21
|
+
|
22
|
+
#include "api.h"
|
23
|
+
#include "disjuncts.h"
|
24
|
+
#include "error.h"
|
25
|
+
#include "preparation.h"
|
26
|
+
#include "read-regex.h"
|
27
|
+
#include "regex-morph.h"
|
28
|
+
#include "sat-solver/sat-encoder.h"
|
29
|
+
#include "corpus/corpus.h"
|
30
|
+
#include "spellcheck.h"
|
31
|
+
|
32
|
+
/***************************************************************
|
33
|
+
*
|
34
|
+
* Routines for setting Parse_Options
|
35
|
+
*
|
36
|
+
****************************************************************/
|
37
|
+
static int VDAL_compare_parse(Linkage_info * p1, Linkage_info * p2)
|
38
|
+
{
|
39
|
+
/* for sorting the linkages in postprocessing */
|
40
|
+
if (p1->N_violations != p2->N_violations) {
|
41
|
+
return (p1->N_violations - p2->N_violations);
|
42
|
+
}
|
43
|
+
else if (p1->unused_word_cost != p2->unused_word_cost) {
|
44
|
+
return (p1->unused_word_cost - p2->unused_word_cost);
|
45
|
+
}
|
46
|
+
else if (p1->fat != p2->fat) {
|
47
|
+
return (p1->fat - p2->fat);
|
48
|
+
}
|
49
|
+
else if (p1->disjunct_cost != p2->disjunct_cost) {
|
50
|
+
return (p1->disjunct_cost - p2->disjunct_cost);
|
51
|
+
}
|
52
|
+
else if (p1->and_cost != p2->and_cost) {
|
53
|
+
return (p1->and_cost - p2->and_cost);
|
54
|
+
}
|
55
|
+
else {
|
56
|
+
return (p1->link_cost - p2->link_cost);
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
#ifdef USE_CORPUS
|
61
|
+
static int CORP_compare_parse(Linkage_info * p1, Linkage_info * p2)
|
62
|
+
{
|
63
|
+
double diff = p1->corpus_cost - p2->corpus_cost;
|
64
|
+
if (fabs(diff) < 1.0e-5)
|
65
|
+
return VDAL_compare_parse(p1, p2);
|
66
|
+
if (diff < 0.0f) return -1;
|
67
|
+
return 1;
|
68
|
+
}
|
69
|
+
#endif
|
70
|
+
|
71
|
+
/**
|
72
|
+
* Create and initialize a Parse_Options object
|
73
|
+
*/
|
74
|
+
Parse_Options parse_options_create(void)
|
75
|
+
{
|
76
|
+
Parse_Options po;
|
77
|
+
|
78
|
+
init_memusage();
|
79
|
+
po = (Parse_Options) xalloc(sizeof(struct Parse_Options_s));
|
80
|
+
|
81
|
+
/* Here's where the values are initialized */
|
82
|
+
po->verbosity = 1;
|
83
|
+
po->linkage_limit = 100;
|
84
|
+
po->disjunct_cost = MAX_DISJUNCT_COST;
|
85
|
+
po->use_fat_links = FALSE;
|
86
|
+
po->min_null_count = 0;
|
87
|
+
po->max_null_count = 0;
|
88
|
+
po->null_block = 1;
|
89
|
+
po->islands_ok = FALSE;
|
90
|
+
po->use_spell_guess = TRUE;
|
91
|
+
po->use_sat_solver = FALSE;
|
92
|
+
|
93
|
+
#ifdef XXX_USE_CORPUS
|
94
|
+
/* Use the corpus cost model, if available.
|
95
|
+
* It really does a better job at parse ranking.
|
96
|
+
* Err .. sometimes ...
|
97
|
+
*/
|
98
|
+
po->cost_model.compare_fn = &CORP_compare_parse;
|
99
|
+
po->cost_model.type = CORPUS;
|
100
|
+
#else /* USE_CORPUS */
|
101
|
+
po->cost_model.compare_fn = &VDAL_compare_parse;
|
102
|
+
po->cost_model.type = VDAL;
|
103
|
+
#endif /* USE_CORPUS */
|
104
|
+
po->short_length = 6;
|
105
|
+
po->all_short = FALSE;
|
106
|
+
po->twopass_length = 30;
|
107
|
+
po->max_sentence_length = 170;
|
108
|
+
po->resources = resources_create();
|
109
|
+
po->display_short = TRUE;
|
110
|
+
po->display_word_subscripts = TRUE;
|
111
|
+
po->display_link_subscripts = TRUE;
|
112
|
+
po->display_walls = FALSE;
|
113
|
+
po->display_union = FALSE;
|
114
|
+
po->allow_null = TRUE;
|
115
|
+
po->use_cluster_disjuncts = FALSE;
|
116
|
+
po->echo_on = FALSE;
|
117
|
+
po->batch_mode = FALSE;
|
118
|
+
po->panic_mode = FALSE;
|
119
|
+
po->screen_width = 79;
|
120
|
+
po->display_on = TRUE;
|
121
|
+
po->display_postscript = FALSE;
|
122
|
+
po->display_constituents = 0;
|
123
|
+
po->display_bad = FALSE;
|
124
|
+
po->display_disjuncts = FALSE;
|
125
|
+
po->display_links = FALSE;
|
126
|
+
po->display_senses = FALSE;
|
127
|
+
|
128
|
+
return po;
|
129
|
+
}
|
130
|
+
|
131
|
+
int parse_options_delete(Parse_Options opts)
|
132
|
+
{
|
133
|
+
resources_delete(opts->resources);
|
134
|
+
xfree(opts, sizeof(struct Parse_Options_s));
|
135
|
+
return 0;
|
136
|
+
}
|
137
|
+
|
138
|
+
void parse_options_set_cost_model_type(Parse_Options opts, int cm)
|
139
|
+
{
|
140
|
+
switch(cm) {
|
141
|
+
case VDAL:
|
142
|
+
opts->cost_model.type = VDAL;
|
143
|
+
opts->cost_model.compare_fn = &VDAL_compare_parse;
|
144
|
+
break;
|
145
|
+
case CORPUS:
|
146
|
+
#ifdef USE_CORPUS
|
147
|
+
opts->cost_model.type = CORPUS;
|
148
|
+
opts->cost_model.compare_fn = &CORP_compare_parse;
|
149
|
+
#else
|
150
|
+
prt_error("Error: Source code compiled with cost model 'CORPUS' disabled.\n");
|
151
|
+
#endif
|
152
|
+
break;
|
153
|
+
default:
|
154
|
+
prt_error("Error: Illegal cost model: %d\n", cm);
|
155
|
+
}
|
156
|
+
}
|
157
|
+
|
158
|
+
int parse_options_get_cost_model_type(Parse_Options opts)
|
159
|
+
{
|
160
|
+
return opts->cost_model.type;
|
161
|
+
}
|
162
|
+
|
163
|
+
void parse_options_set_verbosity(Parse_Options opts, int dummy)
|
164
|
+
{
|
165
|
+
opts->verbosity = dummy;
|
166
|
+
verbosity = opts->verbosity;
|
167
|
+
/* this is one of the only global variables. */
|
168
|
+
}
|
169
|
+
|
170
|
+
int parse_options_get_verbosity(Parse_Options opts) {
|
171
|
+
return opts->verbosity;
|
172
|
+
}
|
173
|
+
|
174
|
+
void parse_options_set_use_sat_parser(Parse_Options opts, int dummy) {
|
175
|
+
#ifdef USE_SAT_SOLVER
|
176
|
+
opts->use_sat_solver = dummy;
|
177
|
+
#else
|
178
|
+
prt_error("Error: cannot enable the Boolean SAT parser; this "
|
179
|
+
" library was built without SAT solver support.\n");
|
180
|
+
#endif
|
181
|
+
}
|
182
|
+
int parse_options_get_use_sat_parser(Parse_Options opts) {
|
183
|
+
return opts->use_sat_solver;
|
184
|
+
}
|
185
|
+
|
186
|
+
void parse_options_set_use_fat_links(Parse_Options opts, int dummy) {
|
187
|
+
opts->use_fat_links = dummy;
|
188
|
+
}
|
189
|
+
int parse_options_get_use_fat_links(Parse_Options opts) {
|
190
|
+
return opts->use_fat_links;
|
191
|
+
}
|
192
|
+
|
193
|
+
void parse_options_set_linkage_limit(Parse_Options opts, int dummy) {
|
194
|
+
opts->linkage_limit = dummy;
|
195
|
+
}
|
196
|
+
int parse_options_get_linkage_limit(Parse_Options opts) {
|
197
|
+
return opts->linkage_limit;
|
198
|
+
}
|
199
|
+
|
200
|
+
void parse_options_set_disjunct_cost(Parse_Options opts, int dummy) {
|
201
|
+
opts->disjunct_cost = dummy;
|
202
|
+
}
|
203
|
+
void parse_options_set_disjunct_costf(Parse_Options opts, float dummy) {
|
204
|
+
opts->disjunct_cost = dummy;
|
205
|
+
}
|
206
|
+
int parse_options_get_disjunct_cost(Parse_Options opts) {
|
207
|
+
return opts->disjunct_cost;
|
208
|
+
}
|
209
|
+
float parse_options_get_disjunct_costf(Parse_Options opts) {
|
210
|
+
return opts->disjunct_cost;
|
211
|
+
}
|
212
|
+
|
213
|
+
void parse_options_set_min_null_count(Parse_Options opts, int val) {
|
214
|
+
opts->min_null_count = val;
|
215
|
+
}
|
216
|
+
int parse_options_get_min_null_count(Parse_Options opts) {
|
217
|
+
return opts->min_null_count;
|
218
|
+
}
|
219
|
+
|
220
|
+
void parse_options_set_max_null_count(Parse_Options opts, int val) {
|
221
|
+
opts->max_null_count = val;
|
222
|
+
}
|
223
|
+
int parse_options_get_max_null_count(Parse_Options opts) {
|
224
|
+
return opts->max_null_count;
|
225
|
+
}
|
226
|
+
|
227
|
+
|
228
|
+
void parse_options_set_null_block(Parse_Options opts, int dummy) {
|
229
|
+
opts->null_block = dummy;
|
230
|
+
}
|
231
|
+
int parse_options_get_null_block(Parse_Options opts) {
|
232
|
+
return opts->null_block;
|
233
|
+
}
|
234
|
+
|
235
|
+
void parse_options_set_islands_ok(Parse_Options opts, int dummy) {
|
236
|
+
opts->islands_ok = dummy;
|
237
|
+
}
|
238
|
+
|
239
|
+
int parse_options_get_islands_ok(Parse_Options opts) {
|
240
|
+
return opts->islands_ok;
|
241
|
+
}
|
242
|
+
|
243
|
+
void parse_options_set_spell_guess(Parse_Options opts, int dummy) {
|
244
|
+
opts->use_spell_guess = dummy;
|
245
|
+
}
|
246
|
+
|
247
|
+
int parse_options_get_spell_guess(Parse_Options opts) {
|
248
|
+
return opts->use_spell_guess;
|
249
|
+
}
|
250
|
+
|
251
|
+
void parse_options_set_short_length(Parse_Options opts, int short_length) {
|
252
|
+
opts->short_length = short_length;
|
253
|
+
}
|
254
|
+
|
255
|
+
int parse_options_get_short_length(Parse_Options opts) {
|
256
|
+
return opts->short_length;
|
257
|
+
}
|
258
|
+
|
259
|
+
void parse_options_set_all_short_connectors(Parse_Options opts, int val) {
|
260
|
+
opts->all_short = val;
|
261
|
+
}
|
262
|
+
|
263
|
+
int parse_options_get_all_short_connectors(Parse_Options opts) {
|
264
|
+
return opts->all_short;
|
265
|
+
}
|
266
|
+
|
267
|
+
void parse_options_set_max_parse_time(Parse_Options opts, int dummy) {
|
268
|
+
opts->resources->max_parse_time = dummy;
|
269
|
+
}
|
270
|
+
|
271
|
+
int parse_options_get_max_parse_time(Parse_Options opts) {
|
272
|
+
return opts->resources->max_parse_time;
|
273
|
+
}
|
274
|
+
|
275
|
+
void parse_options_set_max_memory(Parse_Options opts, int dummy) {
|
276
|
+
opts->resources->max_memory = dummy;
|
277
|
+
}
|
278
|
+
|
279
|
+
int parse_options_get_max_memory(Parse_Options opts) {
|
280
|
+
return opts->resources->max_memory;
|
281
|
+
}
|
282
|
+
|
283
|
+
void parse_options_set_max_sentence_length(Parse_Options opts, int dummy) {
|
284
|
+
opts->max_sentence_length = dummy;
|
285
|
+
}
|
286
|
+
|
287
|
+
int parse_options_get_max_sentence_length(Parse_Options opts) {
|
288
|
+
return opts->max_sentence_length;
|
289
|
+
}
|
290
|
+
|
291
|
+
void parse_options_set_echo_on(Parse_Options opts, int dummy) {
|
292
|
+
opts->echo_on = dummy;
|
293
|
+
}
|
294
|
+
|
295
|
+
int parse_options_get_echo_on(Parse_Options opts) {
|
296
|
+
return opts->echo_on;
|
297
|
+
}
|
298
|
+
|
299
|
+
void parse_options_set_batch_mode(Parse_Options opts, int dummy) {
|
300
|
+
opts->batch_mode = dummy;
|
301
|
+
}
|
302
|
+
|
303
|
+
int parse_options_get_batch_mode(Parse_Options opts) {
|
304
|
+
return opts->batch_mode;
|
305
|
+
}
|
306
|
+
|
307
|
+
void parse_options_set_panic_mode(Parse_Options opts, int dummy) {
|
308
|
+
opts->panic_mode = dummy;
|
309
|
+
}
|
310
|
+
|
311
|
+
int parse_options_get_panic_mode(Parse_Options opts) {
|
312
|
+
return opts->panic_mode;
|
313
|
+
}
|
314
|
+
|
315
|
+
void parse_options_set_allow_null(Parse_Options opts, int dummy) {
|
316
|
+
opts->allow_null = dummy;
|
317
|
+
}
|
318
|
+
|
319
|
+
int parse_options_get_allow_null(Parse_Options opts) {
|
320
|
+
return opts->allow_null;
|
321
|
+
}
|
322
|
+
|
323
|
+
void parse_options_set_use_cluster_disjuncts(Parse_Options opts, int dummy) {
|
324
|
+
opts->use_cluster_disjuncts = dummy;
|
325
|
+
}
|
326
|
+
|
327
|
+
int parse_options_get_use_cluster_disjuncts(Parse_Options opts) {
|
328
|
+
return opts->use_cluster_disjuncts;
|
329
|
+
}
|
330
|
+
|
331
|
+
void parse_options_set_screen_width(Parse_Options opts, int dummy) {
|
332
|
+
opts->screen_width = dummy;
|
333
|
+
}
|
334
|
+
|
335
|
+
int parse_options_get_screen_width(Parse_Options opts) {
|
336
|
+
return opts->screen_width;
|
337
|
+
}
|
338
|
+
|
339
|
+
|
340
|
+
void parse_options_set_display_on(Parse_Options opts, int dummy) {
|
341
|
+
opts->display_on = dummy;
|
342
|
+
}
|
343
|
+
|
344
|
+
int parse_options_get_display_on(Parse_Options opts) {
|
345
|
+
return opts->display_on;
|
346
|
+
}
|
347
|
+
|
348
|
+
void parse_options_set_display_postscript(Parse_Options opts, int dummy) {
|
349
|
+
opts->display_postscript = dummy;
|
350
|
+
}
|
351
|
+
|
352
|
+
int parse_options_get_display_postscript(Parse_Options opts)
|
353
|
+
{
|
354
|
+
return opts->display_postscript;
|
355
|
+
}
|
356
|
+
|
357
|
+
void parse_options_set_display_constituents(Parse_Options opts, int dummy)
|
358
|
+
{
|
359
|
+
if ((dummy < 0) || (dummy > 3)) {
|
360
|
+
prt_error("Possible values for constituents: \n"
|
361
|
+
" 0 (no display)\n"
|
362
|
+
" 1 (treebank style, multi-line indented)\n"
|
363
|
+
" 2 (flat tree, square brackets)\n"
|
364
|
+
" 3 (flat treebank style)\n");
|
365
|
+
opts->display_constituents = 0;
|
366
|
+
}
|
367
|
+
else opts->display_constituents = dummy;
|
368
|
+
}
|
369
|
+
|
370
|
+
int parse_options_get_display_constituents(Parse_Options opts)
|
371
|
+
{
|
372
|
+
return opts->display_constituents;
|
373
|
+
}
|
374
|
+
|
375
|
+
void parse_options_set_display_bad(Parse_Options opts, int dummy) {
|
376
|
+
opts->display_bad = dummy;
|
377
|
+
}
|
378
|
+
|
379
|
+
int parse_options_get_display_bad(Parse_Options opts) {
|
380
|
+
return opts->display_bad;
|
381
|
+
}
|
382
|
+
|
383
|
+
void parse_options_set_display_disjuncts(Parse_Options opts, int dummy) {
|
384
|
+
opts->display_disjuncts = dummy;
|
385
|
+
}
|
386
|
+
|
387
|
+
int parse_options_get_display_disjuncts(Parse_Options opts) {
|
388
|
+
return opts->display_disjuncts;
|
389
|
+
}
|
390
|
+
|
391
|
+
void parse_options_set_display_links(Parse_Options opts, int dummy) {
|
392
|
+
opts->display_links = dummy;
|
393
|
+
}
|
394
|
+
|
395
|
+
int parse_options_get_display_links(Parse_Options opts) {
|
396
|
+
return opts->display_links;
|
397
|
+
}
|
398
|
+
|
399
|
+
void parse_options_set_display_senses(Parse_Options opts, int dummy) {
|
400
|
+
opts->display_senses = dummy;
|
401
|
+
}
|
402
|
+
|
403
|
+
int parse_options_get_display_senses(Parse_Options opts) {
|
404
|
+
return opts->display_senses;
|
405
|
+
}
|
406
|
+
|
407
|
+
void parse_options_set_display_walls(Parse_Options opts, int dummy) {
|
408
|
+
opts->display_walls = dummy;
|
409
|
+
}
|
410
|
+
|
411
|
+
int parse_options_get_display_walls(Parse_Options opts) {
|
412
|
+
return opts->display_walls;
|
413
|
+
}
|
414
|
+
|
415
|
+
int parse_options_get_display_union(Parse_Options opts) {
|
416
|
+
return opts->display_union;
|
417
|
+
}
|
418
|
+
|
419
|
+
void parse_options_set_display_union(Parse_Options opts, int dummy) {
|
420
|
+
opts->display_union = dummy;
|
421
|
+
}
|
422
|
+
|
423
|
+
int parse_options_timer_expired(Parse_Options opts) {
|
424
|
+
return resources_timer_expired(opts->resources);
|
425
|
+
}
|
426
|
+
|
427
|
+
int parse_options_memory_exhausted(Parse_Options opts) {
|
428
|
+
return resources_memory_exhausted(opts->resources);
|
429
|
+
}
|
430
|
+
|
431
|
+
int parse_options_resources_exhausted(Parse_Options opts) {
|
432
|
+
return (resources_timer_expired(opts->resources) || resources_memory_exhausted(opts->resources));
|
433
|
+
}
|
434
|
+
|
435
|
+
void parse_options_reset_resources(Parse_Options opts) {
|
436
|
+
resources_reset(opts->resources);
|
437
|
+
}
|
438
|
+
|
439
|
+
|
440
|
+
/***************************************************************
|
441
|
+
*
|
442
|
+
* Routines for manipulating Dictionary
|
443
|
+
*
|
444
|
+
****************************************************************/
|
445
|
+
|
446
|
+
/* Units will typically have a ".u" at the end. Get
|
447
|
+
* rid of it, as otherwise stipping is messed up. */
|
448
|
+
static inline char * deinflect(const char * str)
|
449
|
+
{
|
450
|
+
char * s = strdup(str);
|
451
|
+
char * p = strchr(s, '.');
|
452
|
+
if (p && p != s) *p = 0x0;
|
453
|
+
return s;
|
454
|
+
}
|
455
|
+
|
456
|
+
static void affix_list_create(Dictionary dict)
|
457
|
+
{
|
458
|
+
int i, j, k, l, m;
|
459
|
+
int r_strippable=0, l_strippable=0, u_strippable=0;
|
460
|
+
int s_strippable=0, p_strippable=0;
|
461
|
+
Dict_node * dn, * dn2, * start_dn;
|
462
|
+
|
463
|
+
const char * rpunc_con = "RPUNC";
|
464
|
+
const char * lpunc_con = "LPUNC";
|
465
|
+
const char * units_con = "UNITS";
|
466
|
+
|
467
|
+
/* Hmm SUF and PRE do not seem to be used at this time ... */
|
468
|
+
const char * suf_con = "SUF";
|
469
|
+
const char * pre_con = "PRE";
|
470
|
+
|
471
|
+
dict->strip_left = NULL;
|
472
|
+
dict->strip_right = NULL;
|
473
|
+
dict->strip_units = NULL;
|
474
|
+
dict->prefix = NULL;
|
475
|
+
dict->suffix = NULL;
|
476
|
+
|
477
|
+
/* Load affixes from the affix table.
|
478
|
+
*/
|
479
|
+
start_dn = list_whole_dictionary(dict->root, NULL);
|
480
|
+
for (dn = start_dn; dn != NULL; dn = dn->right)
|
481
|
+
{
|
482
|
+
if (word_has_connector(dn, rpunc_con, 0)) r_strippable++;
|
483
|
+
if (word_has_connector(dn, lpunc_con, 0)) l_strippable++;
|
484
|
+
if (word_has_connector(dn, units_con, 0)) u_strippable++;
|
485
|
+
if (word_has_connector(dn, suf_con, 0)) s_strippable++;
|
486
|
+
if (word_has_connector(dn, pre_con, 0)) p_strippable++;
|
487
|
+
}
|
488
|
+
dict->strip_right = (const char **) xalloc(r_strippable * sizeof(char *));
|
489
|
+
dict->strip_left = (const char **) xalloc(l_strippable * sizeof(char *));
|
490
|
+
dict->strip_units = (const char **) xalloc(u_strippable * sizeof(char *));
|
491
|
+
dict->suffix = (const char **) xalloc(s_strippable * sizeof(char *));
|
492
|
+
dict->prefix = (const char **) xalloc(p_strippable * sizeof(char *));
|
493
|
+
|
494
|
+
dict->r_strippable = r_strippable;
|
495
|
+
dict->l_strippable = l_strippable;
|
496
|
+
dict->u_strippable = u_strippable;
|
497
|
+
dict->p_strippable = p_strippable;
|
498
|
+
dict->s_strippable = s_strippable;
|
499
|
+
|
500
|
+
i = 0;
|
501
|
+
j = 0;
|
502
|
+
k = 0;
|
503
|
+
l = 0;
|
504
|
+
m = 0;
|
505
|
+
dn = start_dn;
|
506
|
+
|
507
|
+
while (dn != NULL)
|
508
|
+
{
|
509
|
+
if (word_has_connector(dn, rpunc_con, 0))
|
510
|
+
{
|
511
|
+
dict->strip_right[i] = deinflect(dn->string);
|
512
|
+
i++;
|
513
|
+
}
|
514
|
+
if (word_has_connector(dn, lpunc_con, 0))
|
515
|
+
{
|
516
|
+
dict->strip_left[j] = deinflect(dn->string);
|
517
|
+
j++;
|
518
|
+
}
|
519
|
+
if (word_has_connector(dn, units_con, 0))
|
520
|
+
{
|
521
|
+
dict->strip_units[m] = deinflect(dn->string);
|
522
|
+
m++;
|
523
|
+
}
|
524
|
+
if (word_has_connector(dn, suf_con, 0))
|
525
|
+
{
|
526
|
+
dict->suffix[k] = dn->string;
|
527
|
+
k++;
|
528
|
+
}
|
529
|
+
if (word_has_connector(dn, pre_con, 0))
|
530
|
+
{
|
531
|
+
dict->prefix[l] = dn->string;
|
532
|
+
l++;
|
533
|
+
}
|
534
|
+
dn2 = dn->right;
|
535
|
+
dn->right = NULL;
|
536
|
+
xfree(dn, sizeof(Dict_node));
|
537
|
+
dn = dn2;
|
538
|
+
}
|
539
|
+
}
|
540
|
+
|
541
|
+
static void affix_list_delete(Dictionary dict)
|
542
|
+
{
|
543
|
+
int i;
|
544
|
+
for (i=0; i<dict->l_strippable; i++)
|
545
|
+
{
|
546
|
+
free((char *)dict->strip_left[i]);
|
547
|
+
}
|
548
|
+
for (i=0; i<dict->r_strippable; i++)
|
549
|
+
{
|
550
|
+
free((char *)dict->strip_right[i]);
|
551
|
+
}
|
552
|
+
for (i=0; i<dict->u_strippable; i++)
|
553
|
+
{
|
554
|
+
free((char *)dict->strip_units[i]);
|
555
|
+
}
|
556
|
+
xfree(dict->strip_right, dict->r_strippable * sizeof(char *));
|
557
|
+
xfree(dict->strip_left, dict->l_strippable * sizeof(char *));
|
558
|
+
xfree(dict->strip_units, dict->u_strippable * sizeof(char *));
|
559
|
+
xfree(dict->suffix, dict->s_strippable * sizeof(char *));
|
560
|
+
xfree(dict->prefix, dict->p_strippable * sizeof(char *));
|
561
|
+
}
|
562
|
+
|
563
|
+
/**
|
564
|
+
* The following function is dictionary_create with an extra
|
565
|
+
* paramater called "path". If this is non-null, then the path
|
566
|
+
* used to find the file is taken from that path. Otherwise,
|
567
|
+
* the path is taken from the dict_name. This is only needed
|
568
|
+
* because an affix_file is opened by a recursive call to this
|
569
|
+
* function.
|
570
|
+
*/
|
571
|
+
static Dictionary
|
572
|
+
dictionary_six(const char * lang, const char * dict_name,
|
573
|
+
const char * pp_name, const char * cons_name,
|
574
|
+
const char * affix_name, const char * regex_name)
|
575
|
+
{
|
576
|
+
const char * t;
|
577
|
+
Dictionary dict;
|
578
|
+
Dict_node *dict_node;
|
579
|
+
|
580
|
+
init_memusage();
|
581
|
+
|
582
|
+
dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
|
583
|
+
memset(dict, 0, sizeof(struct Dictionary_s));
|
584
|
+
|
585
|
+
dict->string_set = string_set_create();
|
586
|
+
|
587
|
+
dict->lang = lang;
|
588
|
+
t = strrchr (lang, '/');
|
589
|
+
if (t) dict->lang = string_set_add(t+1, dict->string_set);
|
590
|
+
dict->name = string_set_add(dict_name, dict->string_set);
|
591
|
+
|
592
|
+
dict->max_cost = 1000;
|
593
|
+
dict->num_entries = 0;
|
594
|
+
dict->is_special = FALSE;
|
595
|
+
dict->already_got_it = '\0';
|
596
|
+
dict->line_number = 1;
|
597
|
+
dict->root = NULL;
|
598
|
+
dict->word_file_header = NULL;
|
599
|
+
dict->exp_list = NULL;
|
600
|
+
dict->affix_table = NULL;
|
601
|
+
dict->recursive_error = FALSE;
|
602
|
+
|
603
|
+
/* To disable spell-checking, just set the cheker to NULL */
|
604
|
+
dict->spell_checker = spellcheck_create(dict->lang);
|
605
|
+
|
606
|
+
dict->fp = dictopen(dict->name, "r");
|
607
|
+
if (dict->fp == NULL)
|
608
|
+
{
|
609
|
+
prt_error("Error: Could not open dictionary %s\n", dict_name);
|
610
|
+
goto failure;
|
611
|
+
}
|
612
|
+
|
613
|
+
if (!read_dictionary(dict))
|
614
|
+
{
|
615
|
+
fclose(dict->fp);
|
616
|
+
goto failure;
|
617
|
+
}
|
618
|
+
fclose(dict->fp);
|
619
|
+
|
620
|
+
dict->affix_table = NULL;
|
621
|
+
if (affix_name != NULL)
|
622
|
+
{
|
623
|
+
dict->affix_table = dictionary_six(lang, affix_name, NULL, NULL, NULL, NULL);
|
624
|
+
if (dict->affix_table == NULL)
|
625
|
+
{
|
626
|
+
goto failure;
|
627
|
+
}
|
628
|
+
affix_list_create(dict->affix_table);
|
629
|
+
}
|
630
|
+
|
631
|
+
dict->regex_root = NULL;
|
632
|
+
if (regex_name != NULL)
|
633
|
+
{
|
634
|
+
int rc;
|
635
|
+
rc = read_regex_file(dict, regex_name);
|
636
|
+
if (rc) goto failure;
|
637
|
+
rc = compile_regexs(dict);
|
638
|
+
if (rc) goto failure;
|
639
|
+
}
|
640
|
+
|
641
|
+
#if USE_CORPUS
|
642
|
+
dict->corpus = NULL;
|
643
|
+
if (affix_name != NULL) /* Don't do this for the second time */
|
644
|
+
{
|
645
|
+
dict->corpus = lg_corpus_new();
|
646
|
+
}
|
647
|
+
#endif
|
648
|
+
|
649
|
+
dict->left_wall_defined = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
|
650
|
+
dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);
|
651
|
+
dict->postprocessor = post_process_open(pp_name);
|
652
|
+
dict->constituent_pp = post_process_open(cons_name);
|
653
|
+
|
654
|
+
dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
|
655
|
+
dict->use_unknown_word = TRUE;
|
656
|
+
|
657
|
+
#if DONT_USE_REGEX_GUESSING
|
658
|
+
dict->capitalized_word_defined = boolean_dictionary_lookup(dict, PROPER_WORD);
|
659
|
+
dict->pl_capitalized_word_defined = boolean_dictionary_lookup(dict, PL_PROPER_WORD);
|
660
|
+
|
661
|
+
dict->hyphenated_word_defined = boolean_dictionary_lookup(dict, HYPHENATED_WORD);
|
662
|
+
dict->number_word_defined = boolean_dictionary_lookup(dict, NUMBER_WORD);
|
663
|
+
|
664
|
+
dict->ing_word_defined = boolean_dictionary_lookup(dict, ING_WORD);
|
665
|
+
dict->s_word_defined = boolean_dictionary_lookup(dict, S_WORD);
|
666
|
+
dict->ed_word_defined = boolean_dictionary_lookup(dict, ED_WORD);
|
667
|
+
dict->ly_word_defined = boolean_dictionary_lookup(dict, LY_WORD);
|
668
|
+
#endif /* DONT_USE_REGEX_GUESSING */
|
669
|
+
|
670
|
+
if ((dict_node = dictionary_lookup_list(dict, ANDABLE_CONNECTORS_WORD)) != NULL) {
|
671
|
+
dict->andable_connector_set = connector_set_create(dict_node->exp);
|
672
|
+
} else {
|
673
|
+
dict->andable_connector_set = NULL;
|
674
|
+
}
|
675
|
+
free_lookup_list(dict_node);
|
676
|
+
|
677
|
+
if ((dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD)) != NULL) {
|
678
|
+
dict->unlimited_connector_set = connector_set_create(dict_node->exp);
|
679
|
+
} else {
|
680
|
+
dict->unlimited_connector_set = NULL;
|
681
|
+
}
|
682
|
+
free_lookup_list(dict_node);
|
683
|
+
|
684
|
+
return dict;
|
685
|
+
|
686
|
+
failure:
|
687
|
+
string_set_delete(dict->string_set);
|
688
|
+
xfree(dict, sizeof(struct Dictionary_s));
|
689
|
+
return NULL;
|
690
|
+
}
|
691
|
+
|
692
|
+
Dictionary
|
693
|
+
dictionary_create(const char * dict_name, const char * pp_name,
|
694
|
+
const char * cons_name, const char * affix_name)
|
695
|
+
{
|
696
|
+
return dictionary_six("en", dict_name, pp_name, cons_name, affix_name, NULL);
|
697
|
+
}
|
698
|
+
|
699
|
+
Dictionary dictionary_create_lang(const char * lang)
|
700
|
+
{
|
701
|
+
Dictionary dictionary;
|
702
|
+
|
703
|
+
if(lang && *lang)
|
704
|
+
{
|
705
|
+
char * dict_name;
|
706
|
+
char * pp_name;
|
707
|
+
char * cons_name;
|
708
|
+
char * affix_name;
|
709
|
+
char * regex_name;
|
710
|
+
|
711
|
+
dict_name = join_path(lang, "4.0.dict");
|
712
|
+
pp_name = join_path(lang, "4.0.knowledge");
|
713
|
+
cons_name = join_path(lang, "4.0.constituent-knowledge");
|
714
|
+
affix_name = join_path(lang, "4.0.affix");
|
715
|
+
regex_name = join_path(lang, "4.0.regex");
|
716
|
+
|
717
|
+
dictionary = dictionary_six(lang, dict_name, pp_name, cons_name,
|
718
|
+
affix_name, regex_name);
|
719
|
+
|
720
|
+
free(regex_name);
|
721
|
+
free(affix_name);
|
722
|
+
free(cons_name);
|
723
|
+
free(pp_name);
|
724
|
+
free(dict_name);
|
725
|
+
}
|
726
|
+
else
|
727
|
+
{
|
728
|
+
prt_error("Error: No language specified!\n");
|
729
|
+
dictionary = NULL;
|
730
|
+
}
|
731
|
+
|
732
|
+
return dictionary;
|
733
|
+
}
|
734
|
+
|
735
|
+
Dictionary dictionary_create_default_lang(void)
|
736
|
+
{
|
737
|
+
Dictionary dictionary;
|
738
|
+
char * lang;
|
739
|
+
|
740
|
+
lang = get_default_locale();
|
741
|
+
if(lang && *lang) {
|
742
|
+
dictionary = dictionary_create_lang(lang);
|
743
|
+
free(lang);
|
744
|
+
} else {
|
745
|
+
/* Default to en when locales are broken (e.g. WIN32) */
|
746
|
+
dictionary = dictionary_create_lang("en");
|
747
|
+
}
|
748
|
+
|
749
|
+
return dictionary;
|
750
|
+
}
|
751
|
+
|
752
|
+
int dictionary_delete(Dictionary dict)
|
753
|
+
{
|
754
|
+
if (verbosity > 0) {
|
755
|
+
prt_error("Info: Freeing dictionary %s\n", dict->name);
|
756
|
+
}
|
757
|
+
|
758
|
+
#if USE_CORPUS
|
759
|
+
lg_corpus_delete(dict->corpus);
|
760
|
+
#endif
|
761
|
+
|
762
|
+
if (dict->affix_table != NULL) {
|
763
|
+
affix_list_delete(dict->affix_table);
|
764
|
+
dictionary_delete(dict->affix_table);
|
765
|
+
}
|
766
|
+
spellcheck_destroy(dict->spell_checker);
|
767
|
+
|
768
|
+
connector_set_delete(dict->andable_connector_set);
|
769
|
+
connector_set_delete(dict->unlimited_connector_set);
|
770
|
+
|
771
|
+
post_process_close(dict->postprocessor);
|
772
|
+
post_process_close(dict->constituent_pp);
|
773
|
+
string_set_delete(dict->string_set);
|
774
|
+
free_regexs(dict);
|
775
|
+
free_dictionary(dict);
|
776
|
+
xfree(dict, sizeof(struct Dictionary_s));
|
777
|
+
|
778
|
+
return 0;
|
779
|
+
}
|
780
|
+
|
781
|
+
int dictionary_get_max_cost(Dictionary dict)
|
782
|
+
{
|
783
|
+
return dict->max_cost;
|
784
|
+
}
|
785
|
+
|
786
|
+
/***************************************************************
|
787
|
+
*
|
788
|
+
* Routines for postprocessing
|
789
|
+
*
|
790
|
+
****************************************************************/
|
791
|
+
|
792
|
+
static Linkage_info * linkage_info_new(int num_to_alloc)
|
793
|
+
{
|
794
|
+
Linkage_info *link_info;
|
795
|
+
link_info = (Linkage_info *) xalloc(num_to_alloc * sizeof(Linkage_info));
|
796
|
+
memset(link_info, 0, num_to_alloc * sizeof(Linkage_info));
|
797
|
+
return link_info;
|
798
|
+
}
|
799
|
+
|
800
|
+
static void linkage_info_delete(Linkage_info *link_info, int sz)
|
801
|
+
{
|
802
|
+
int i,j;
|
803
|
+
|
804
|
+
for (i=0; i<sz; i++)
|
805
|
+
{
|
806
|
+
Linkage_info *lifo = &link_info[i];
|
807
|
+
int nwords = lifo->nwords;
|
808
|
+
for (j=0; j<nwords; j++)
|
809
|
+
{
|
810
|
+
if (lifo->disjunct_list_str[j])
|
811
|
+
free(lifo->disjunct_list_str[j]);
|
812
|
+
}
|
813
|
+
free(lifo->disjunct_list_str);
|
814
|
+
#ifdef USE_CORPUS
|
815
|
+
lg_sense_delete(lifo);
|
816
|
+
#endif
|
817
|
+
}
|
818
|
+
xfree(link_info, sz);
|
819
|
+
}
|
820
|
+
|
821
|
+
static void free_andlists(Sentence sent)
|
822
|
+
{
|
823
|
+
int L;
|
824
|
+
Andlist * andlist, * next;
|
825
|
+
for(L=0; L<sent->num_linkages_post_processed; L++) {
|
826
|
+
/* printf("%d ", sent->link_info[L].canonical); */
|
827
|
+
/* if (sent->link_info[L].canonical==0) continue; */
|
828
|
+
andlist = sent->link_info[L].andlist;
|
829
|
+
while(1) {
|
830
|
+
if(andlist == NULL) break;
|
831
|
+
next = andlist->next;
|
832
|
+
xfree((char *) andlist, sizeof(Andlist));
|
833
|
+
andlist = next;
|
834
|
+
}
|
835
|
+
}
|
836
|
+
/* printf("\n"); */
|
837
|
+
}
|
838
|
+
|
839
|
+
static void free_post_processing(Sentence sent)
|
840
|
+
{
|
841
|
+
if (sent->link_info != NULL) {
|
842
|
+
/* postprocessing must have been done */
|
843
|
+
free_andlists(sent);
|
844
|
+
linkage_info_delete(sent->link_info, sent->num_linkages_alloced);
|
845
|
+
sent->link_info = NULL;
|
846
|
+
}
|
847
|
+
}
|
848
|
+
|
849
|
+
static void post_process_linkages(Sentence sent, Parse_Options opts)
|
850
|
+
{
|
851
|
+
int *indices;
|
852
|
+
int in, block_bottom, block_top;
|
853
|
+
int N_linkages_found, N_linkages_alloced;
|
854
|
+
int N_linkages_post_processed, N_valid_linkages;
|
855
|
+
int N_thin_linkages;
|
856
|
+
int overflowed, only_canonical_allowed;
|
857
|
+
Linkage_info *link_info;
|
858
|
+
int canonical;
|
859
|
+
|
860
|
+
free_post_processing(sent);
|
861
|
+
|
862
|
+
overflowed = build_parse_set(sent, sent->null_count, opts);
|
863
|
+
print_time(opts, "Built parse set");
|
864
|
+
|
865
|
+
if (overflowed && (1 < opts->verbosity))
|
866
|
+
{
|
867
|
+
err_ctxt ec;
|
868
|
+
ec.sent = sent;
|
869
|
+
err_msg(&ec, Warn, "Warning: Count overflow.\n"
|
870
|
+
"Considering a random subset of %d of an unknown and large number of linkages\n",
|
871
|
+
opts->linkage_limit);
|
872
|
+
}
|
873
|
+
N_linkages_found = sent->num_linkages_found;
|
874
|
+
|
875
|
+
if (sent->num_linkages_found == 0)
|
876
|
+
{
|
877
|
+
sent->num_linkages_alloced = 0;
|
878
|
+
sent->num_linkages_post_processed = 0;
|
879
|
+
sent->num_valid_linkages = 0;
|
880
|
+
sent->num_thin_linkages = 0;
|
881
|
+
sent->link_info = NULL;
|
882
|
+
return;
|
883
|
+
}
|
884
|
+
|
885
|
+
if (N_linkages_found > opts->linkage_limit)
|
886
|
+
{
|
887
|
+
N_linkages_alloced = opts->linkage_limit;
|
888
|
+
if (opts->verbosity > 1)
|
889
|
+
{
|
890
|
+
err_ctxt ec;
|
891
|
+
ec.sent = sent;
|
892
|
+
err_msg(&ec, Warn, "Warning: Considering a random subset of %d of %d linkages\n",
|
893
|
+
N_linkages_alloced, N_linkages_found);
|
894
|
+
}
|
895
|
+
}
|
896
|
+
else
|
897
|
+
{
|
898
|
+
N_linkages_alloced = N_linkages_found;
|
899
|
+
}
|
900
|
+
|
901
|
+
link_info = linkage_info_new(N_linkages_alloced);
|
902
|
+
N_valid_linkages = 0;
|
903
|
+
|
904
|
+
/* Generate an array of linkage indices to examine */
|
905
|
+
indices = (int *) xalloc(N_linkages_alloced * sizeof(int));
|
906
|
+
if (overflowed)
|
907
|
+
{
|
908
|
+
for (in=0; in < N_linkages_alloced; in++)
|
909
|
+
{
|
910
|
+
indices[in] = -(in+1);
|
911
|
+
}
|
912
|
+
}
|
913
|
+
else
|
914
|
+
{
|
915
|
+
sent->rand_state = N_linkages_found + sent->length;
|
916
|
+
for (in=0; in<N_linkages_alloced; in++)
|
917
|
+
{
|
918
|
+
double frac = (double) N_linkages_found;
|
919
|
+
frac /= (double) N_linkages_alloced;
|
920
|
+
block_bottom = (int) (((double) in) * frac);
|
921
|
+
block_top = (int) (((double) (in+1)) * frac);
|
922
|
+
indices[in] = block_bottom +
|
923
|
+
(rand_r(&sent->rand_state) % (block_top-block_bottom));
|
924
|
+
}
|
925
|
+
}
|
926
|
+
|
927
|
+
only_canonical_allowed = !(overflowed || (N_linkages_found > 2*opts->linkage_limit));
|
928
|
+
/* When we're processing only a small subset of the linkages,
|
929
|
+
* don't worry about restricting the set we consider to be
|
930
|
+
* canonical ones. In the extreme case where we are only
|
931
|
+
* generating 1 in a million linkages, it's very unlikely
|
932
|
+
* that we'll hit two symmetric variants of the same linkage
|
933
|
+
* anyway.
|
934
|
+
*/
|
935
|
+
/* (optional) first pass: just visit the linkages */
|
936
|
+
/* The purpose of these two passes is to make the post-processing
|
937
|
+
* more efficient. Because (hopefully) by the time you do the
|
938
|
+
* real work in the 2nd pass you've pruned the relevant rule set
|
939
|
+
* in the first pass.
|
940
|
+
*/
|
941
|
+
if (sent->length >= opts->twopass_length)
|
942
|
+
{
|
943
|
+
for (in=0; (in < N_linkages_alloced) &&
|
944
|
+
(!resources_exhausted(opts->resources)); in++)
|
945
|
+
{
|
946
|
+
extract_links(indices[in], sent->null_count, sent->parse_info);
|
947
|
+
if (set_has_fat_down(sent))
|
948
|
+
{
|
949
|
+
if (only_canonical_allowed && !is_canonical_linkage(sent)) continue;
|
950
|
+
analyze_fat_linkage(sent, opts, PP_FIRST_PASS);
|
951
|
+
}
|
952
|
+
else
|
953
|
+
{
|
954
|
+
analyze_thin_linkage(sent, opts, PP_FIRST_PASS);
|
955
|
+
}
|
956
|
+
}
|
957
|
+
}
|
958
|
+
|
959
|
+
/* second pass: actually perform post-processing */
|
960
|
+
N_linkages_post_processed = 0;
|
961
|
+
N_thin_linkages = 0;
|
962
|
+
for (in=0; (in < N_linkages_alloced) &&
|
963
|
+
(!resources_exhausted(opts->resources)); in++)
|
964
|
+
{
|
965
|
+
Linkage_info *lifo = &link_info[N_linkages_post_processed];
|
966
|
+
extract_links(indices[in], sent->null_count, sent->parse_info);
|
967
|
+
if (set_has_fat_down(sent))
|
968
|
+
{
|
969
|
+
canonical = is_canonical_linkage(sent);
|
970
|
+
if (only_canonical_allowed && !canonical) continue;
|
971
|
+
*lifo = analyze_fat_linkage(sent, opts, PP_SECOND_PASS);
|
972
|
+
lifo->fat = TRUE;
|
973
|
+
lifo->canonical = canonical;
|
974
|
+
}
|
975
|
+
else
|
976
|
+
{
|
977
|
+
*lifo = analyze_thin_linkage(sent, opts, PP_SECOND_PASS);
|
978
|
+
lifo->fat = FALSE;
|
979
|
+
lifo->canonical = TRUE;
|
980
|
+
}
|
981
|
+
if (0 == lifo->N_violations)
|
982
|
+
{
|
983
|
+
N_valid_linkages++;
|
984
|
+
if (FALSE == lifo->fat) N_thin_linkages++;
|
985
|
+
}
|
986
|
+
lifo->index = indices[in];
|
987
|
+
lg_corpus_score(sent, lifo);
|
988
|
+
N_linkages_post_processed++;
|
989
|
+
}
|
990
|
+
|
991
|
+
print_time(opts, "Postprocessed all linkages");
|
992
|
+
qsort((void *)link_info, N_linkages_post_processed, sizeof(Linkage_info),
|
993
|
+
(int (*)(const void *, const void *)) opts->cost_model.compare_fn);
|
994
|
+
|
995
|
+
if (!resources_exhausted(opts->resources))
|
996
|
+
{
|
997
|
+
if ((N_linkages_post_processed == 0) &&
|
998
|
+
(N_linkages_found > 0) &&
|
999
|
+
(N_linkages_found < opts->linkage_limit))
|
1000
|
+
{
|
1001
|
+
/* With the current parser, the following sentence will elicit
|
1002
|
+
* this error:
|
1003
|
+
*
|
1004
|
+
* Well, say, Joe, you can be Friar Tuck or Much the miller's
|
1005
|
+
* son, and lam me with a quarter-staff; or I'll be the Sheriff
|
1006
|
+
* of Nottingham and you be Robin Hood a little while and kill
|
1007
|
+
* me.
|
1008
|
+
*/
|
1009
|
+
err_ctxt ec;
|
1010
|
+
ec.sent = sent;
|
1011
|
+
err_msg(&ec, Error, "Error: None of the linkages is canonical\n"
|
1012
|
+
"\tN_linkages_post_processed=%d "
|
1013
|
+
"N_linkages_found=%d\n",
|
1014
|
+
N_linkages_post_processed,
|
1015
|
+
N_linkages_found);
|
1016
|
+
}
|
1017
|
+
}
|
1018
|
+
|
1019
|
+
if (opts->verbosity > 1)
|
1020
|
+
{
|
1021
|
+
err_ctxt ec;
|
1022
|
+
ec.sent = sent;
|
1023
|
+
err_msg(&ec, Info, "Info: %d of %d linkages with no P.P. violations\n",
|
1024
|
+
N_valid_linkages, N_linkages_post_processed);
|
1025
|
+
}
|
1026
|
+
|
1027
|
+
print_time(opts, "Sorted all linkages");
|
1028
|
+
|
1029
|
+
sent->num_linkages_alloced = N_linkages_alloced;
|
1030
|
+
sent->num_linkages_post_processed = N_linkages_post_processed;
|
1031
|
+
sent->num_valid_linkages = N_valid_linkages;
|
1032
|
+
sent->num_thin_linkages = N_thin_linkages;
|
1033
|
+
sent->link_info = link_info;
|
1034
|
+
|
1035
|
+
xfree(indices, N_linkages_alloced * sizeof(int));
|
1036
|
+
/*if(N_valid_linkages == 0) free_andlists(sent); */
|
1037
|
+
}
|
1038
|
+
|
1039
|
+
/***************************************************************
|
1040
|
+
*
|
1041
|
+
* Routines for creating and destroying processing Sentences
|
1042
|
+
*
|
1043
|
+
****************************************************************/
|
1044
|
+
|
1045
|
+
Sentence sentence_create(const char *input_string, Dictionary dict)
|
1046
|
+
{
|
1047
|
+
Sentence sent;
|
1048
|
+
|
1049
|
+
sent = (Sentence) xalloc(sizeof(struct Sentence_s));
|
1050
|
+
memset(sent, 0, sizeof(struct Sentence_s));
|
1051
|
+
sent->dict = dict;
|
1052
|
+
sent->length = 0;
|
1053
|
+
sent->num_linkages_found = 0;
|
1054
|
+
sent->num_linkages_alloced = 0;
|
1055
|
+
sent->num_linkages_post_processed = 0;
|
1056
|
+
sent->num_valid_linkages = 0;
|
1057
|
+
sent->link_info = NULL;
|
1058
|
+
sent->deletable = NULL;
|
1059
|
+
sent->effective_dist = NULL;
|
1060
|
+
sent->num_valid_linkages = 0;
|
1061
|
+
sent->null_count = 0;
|
1062
|
+
sent->parse_info = NULL;
|
1063
|
+
sent->string_set = string_set_create();
|
1064
|
+
|
1065
|
+
sent->q_pruned_rules = FALSE;
|
1066
|
+
sent->is_conjunction = NULL;
|
1067
|
+
|
1068
|
+
sent->dptr = NULL;
|
1069
|
+
sent->deletable = NULL;
|
1070
|
+
|
1071
|
+
/* Make a copy of the input */
|
1072
|
+
sent->orig_sentence = string_set_add (input_string, sent->string_set);
|
1073
|
+
|
1074
|
+
return sent;
|
1075
|
+
}
|
1076
|
+
|
1077
|
+
/* XXX Extreme hack alert -- English-language words are used
|
1078
|
+
* completely naked in the C source code!!! FIXME !!!!
|
1079
|
+
*/
|
1080
|
+
static void set_is_conjunction(Sentence sent)
|
1081
|
+
{
|
1082
|
+
int w;
|
1083
|
+
char * s;
|
1084
|
+
for (w=0; w<sent->length; w++) {
|
1085
|
+
s = sent->word[w].string;
|
1086
|
+
sent->is_conjunction[w] =
|
1087
|
+
(strcmp(s, "and")==0) ||
|
1088
|
+
(strcmp(s, "or" )==0) ||
|
1089
|
+
(strcmp(s, "but")==0) ||
|
1090
|
+
(strcmp(s, "nor")==0);
|
1091
|
+
}
|
1092
|
+
}
|
1093
|
+
|
1094
|
+
int sentence_split(Sentence sent, Parse_Options opts)
|
1095
|
+
{
|
1096
|
+
int i;
|
1097
|
+
Dictionary dict = sent->dict;
|
1098
|
+
|
1099
|
+
/* Cleanup stuff previously allocated. This is because some free
|
1100
|
+
* routines depend on sent-length, which might change in different
|
1101
|
+
* parse-opts settings.
|
1102
|
+
*/
|
1103
|
+
free_deletable(sent);
|
1104
|
+
|
1105
|
+
/* Tokenize */
|
1106
|
+
if (!separate_sentence(sent, opts))
|
1107
|
+
{
|
1108
|
+
return -1;
|
1109
|
+
}
|
1110
|
+
|
1111
|
+
sent->q_pruned_rules = FALSE; /* for post processing */
|
1112
|
+
sent->is_conjunction = (char *) xalloc(sizeof(char)*sent->length);
|
1113
|
+
set_is_conjunction(sent);
|
1114
|
+
initialize_conjunction_tables(sent);
|
1115
|
+
|
1116
|
+
for (i=0; i<sent->length; i++)
|
1117
|
+
{
|
1118
|
+
/* in case we free these before they set to anything else */
|
1119
|
+
sent->word[i].x = NULL;
|
1120
|
+
sent->word[i].d = NULL;
|
1121
|
+
}
|
1122
|
+
|
1123
|
+
if (!(dict->unknown_word_defined && dict->use_unknown_word))
|
1124
|
+
{
|
1125
|
+
if (!sentence_in_dictionary(sent)) {
|
1126
|
+
return -2;
|
1127
|
+
}
|
1128
|
+
}
|
1129
|
+
|
1130
|
+
/* Look up each word in the dictionary, collect up all
|
1131
|
+
* plausible disjunct expressions for each word.
|
1132
|
+
*/
|
1133
|
+
if (!build_sentence_expressions(sent, opts))
|
1134
|
+
{
|
1135
|
+
sent->num_valid_linkages = 0;
|
1136
|
+
return -3;
|
1137
|
+
}
|
1138
|
+
|
1139
|
+
return 0;
|
1140
|
+
}
|
1141
|
+
|
1142
|
+
void sentence_delete(Sentence sent)
|
1143
|
+
{
|
1144
|
+
if (!sent) return;
|
1145
|
+
sat_sentence_delete(sent);
|
1146
|
+
/* free_andlists(sent); */
|
1147
|
+
free_sentence_disjuncts(sent);
|
1148
|
+
free_sentence_expressions(sent);
|
1149
|
+
string_set_delete(sent->string_set);
|
1150
|
+
if (sent->parse_info) free_parse_info(sent->parse_info);
|
1151
|
+
free_post_processing(sent);
|
1152
|
+
post_process_close_sentence(sent->dict->postprocessor);
|
1153
|
+
free_deletable(sent);
|
1154
|
+
free_effective_dist(sent);
|
1155
|
+
free_count(sent);
|
1156
|
+
free_analyze(sent);
|
1157
|
+
if (sent->is_conjunction) xfree(sent->is_conjunction, sizeof(char)*sent->length);
|
1158
|
+
xfree((char *) sent, sizeof(struct Sentence_s));
|
1159
|
+
}
|
1160
|
+
|
1161
|
+
int sentence_length(Sentence sent)
|
1162
|
+
{
|
1163
|
+
if (!sent) return 0;
|
1164
|
+
return sent->length;
|
1165
|
+
}
|
1166
|
+
|
1167
|
+
const char * sentence_get_word(Sentence sent, int index)
|
1168
|
+
{
|
1169
|
+
if (!sent) return NULL;
|
1170
|
+
return sent->word[index].string;
|
1171
|
+
}
|
1172
|
+
|
1173
|
+
const char * sentence_get_nth_word(Sentence sent, int index)
|
1174
|
+
{
|
1175
|
+
if (!sent) return NULL;
|
1176
|
+
return sent->word[index].string;
|
1177
|
+
}
|
1178
|
+
|
1179
|
+
int sentence_null_count(Sentence sent) {
|
1180
|
+
if (!sent) return 0;
|
1181
|
+
return sent->null_count;
|
1182
|
+
}
|
1183
|
+
|
1184
|
+
int sentence_num_thin_linkages(Sentence sent) {
|
1185
|
+
if (!sent) return 0;
|
1186
|
+
return sent->num_thin_linkages;
|
1187
|
+
}
|
1188
|
+
|
1189
|
+
int sentence_num_linkages_found(Sentence sent) {
|
1190
|
+
if (!sent) return 0;
|
1191
|
+
return sent->num_linkages_found;
|
1192
|
+
}
|
1193
|
+
|
1194
|
+
int sentence_num_valid_linkages(Sentence sent) {
|
1195
|
+
if (!sent) return 0;
|
1196
|
+
return sent->num_valid_linkages;
|
1197
|
+
}
|
1198
|
+
|
1199
|
+
int sentence_num_linkages_post_processed(Sentence sent) {
|
1200
|
+
if (!sent) return 0;
|
1201
|
+
return sent->num_linkages_post_processed;
|
1202
|
+
}
|
1203
|
+
|
1204
|
+
int sentence_num_violations(Sentence sent, int i) {
|
1205
|
+
if (!sent) return 0;
|
1206
|
+
|
1207
|
+
/* The sat solver (currently) fails to fill in link_info */
|
1208
|
+
if (!sent->link_info) return 0;
|
1209
|
+
return sent->link_info[i].N_violations;
|
1210
|
+
}
|
1211
|
+
|
1212
|
+
int sentence_and_cost(Sentence sent, int i) {
|
1213
|
+
if (!sent) return 0;
|
1214
|
+
|
1215
|
+
/* The sat solver (currently) fails to fill in link_info */
|
1216
|
+
if (!sent->link_info) return 0;
|
1217
|
+
return sent->link_info[i].and_cost;
|
1218
|
+
}
|
1219
|
+
|
1220
|
+
int sentence_disjunct_cost(Sentence sent, int i) {
|
1221
|
+
if (!sent) return 0;
|
1222
|
+
|
1223
|
+
/* The sat solver (currently) fails to fill in link_info */
|
1224
|
+
if (!sent->link_info) return 0;
|
1225
|
+
return sent->link_info[i].disjunct_cost;
|
1226
|
+
}
|
1227
|
+
|
1228
|
+
int sentence_link_cost(Sentence sent, int i) {
|
1229
|
+
if (!sent) return 0;
|
1230
|
+
|
1231
|
+
/* The sat solver (currently) fails to fill in link_info */
|
1232
|
+
if (!sent->link_info) return 0;
|
1233
|
+
return sent->link_info[i].link_cost;
|
1234
|
+
}
|
1235
|
+
|
1236
|
+
int sentence_nth_word_has_disjunction(Sentence sent, int i)
|
1237
|
+
{
|
1238
|
+
if (!sent) return 0;
|
1239
|
+
prt_error("Warning: sentence_nth_word_has_disjunction() is deprecated!\n");
|
1240
|
+
return (sent->parse_info->chosen_disjuncts[i] != NULL);
|
1241
|
+
}
|
1242
|
+
|
1243
|
+
static void chart_parse(Sentence sent, Parse_Options opts)
|
1244
|
+
{
|
1245
|
+
int nl;
|
1246
|
+
|
1247
|
+
/* Build lists of disjuncts */
|
1248
|
+
prepare_to_parse(sent, opts);
|
1249
|
+
|
1250
|
+
init_fast_matcher(sent);
|
1251
|
+
init_count(sent);
|
1252
|
+
|
1253
|
+
/* A parse set may have been already been built for this sentence,
|
1254
|
+
* if it was previously parsed. If so we free it up before
|
1255
|
+
* building another. */
|
1256
|
+
if (sent->parse_info) free_parse_info(sent->parse_info);
|
1257
|
+
sent->parse_info = parse_info_new(sent->length);
|
1258
|
+
|
1259
|
+
for (nl = opts->min_null_count; nl<=opts->max_null_count ; ++nl)
|
1260
|
+
{
|
1261
|
+
s64 total;
|
1262
|
+
if (resources_exhausted(opts->resources)) break;
|
1263
|
+
sent->null_count = nl;
|
1264
|
+
total = do_parse(sent, sent->null_count, opts);
|
1265
|
+
|
1266
|
+
if (verbosity > 1)
|
1267
|
+
{
|
1268
|
+
prt_error("Info: Total count with %d null links: %lld\n",
|
1269
|
+
sent->null_count, total);
|
1270
|
+
}
|
1271
|
+
|
1272
|
+
/* Give up if the parse count is overflowing */
|
1273
|
+
if (PARSE_NUM_OVERFLOW < total)
|
1274
|
+
{
|
1275
|
+
if (verbosity > 0)
|
1276
|
+
{
|
1277
|
+
prt_error("WARNING: Combinatorial explosion! nulls=%d cnt=%lld\n"
|
1278
|
+
"Consider retrying the parse with the max allowed disjunct cost set lower.\n",
|
1279
|
+
sent->null_count, total);
|
1280
|
+
}
|
1281
|
+
total = (total>INT_MAX) ? INT_MAX : total;
|
1282
|
+
}
|
1283
|
+
|
1284
|
+
sent->num_linkages_found = (int) total;
|
1285
|
+
print_time(opts, "Counted parses");
|
1286
|
+
|
1287
|
+
post_process_linkages(sent, opts);
|
1288
|
+
if (sent->num_valid_linkages > 0) break;
|
1289
|
+
|
1290
|
+
/* If we are here, then no valid linakges were found.
|
1291
|
+
* If there was a parse overflow, give up now. */
|
1292
|
+
if (PARSE_NUM_OVERFLOW < total) break;
|
1293
|
+
}
|
1294
|
+
|
1295
|
+
free_count(sent);
|
1296
|
+
free_fast_matcher(sent);
|
1297
|
+
}
|
1298
|
+
|
1299
|
+
int sentence_parse(Sentence sent, Parse_Options opts)
|
1300
|
+
{
|
1301
|
+
int rc;
|
1302
|
+
|
1303
|
+
verbosity = opts->verbosity;
|
1304
|
+
|
1305
|
+
/* If the sentence has not yet been split, do so now.
|
1306
|
+
* This is for backwards compatibility, for existing programs
|
1307
|
+
* that do not explicitly call the splitter.
|
1308
|
+
*/
|
1309
|
+
if (0 == sent->length)
|
1310
|
+
{
|
1311
|
+
rc = sentence_split(sent, opts);
|
1312
|
+
if (rc) return -1;
|
1313
|
+
}
|
1314
|
+
|
1315
|
+
/* Check for bad sentence length */
|
1316
|
+
if (MAX_SENTENCE <= sent->length)
|
1317
|
+
{
|
1318
|
+
prt_error("Error: sentence too long, contains more than %d words\n",
|
1319
|
+
MAX_SENTENCE);
|
1320
|
+
return -2;
|
1321
|
+
}
|
1322
|
+
|
1323
|
+
/* Initialize/free any leftover garbage */
|
1324
|
+
free_sentence_disjuncts(sent);
|
1325
|
+
resources_reset_space(opts->resources);
|
1326
|
+
|
1327
|
+
if (resources_exhausted(opts->resources)) {
|
1328
|
+
sent->num_valid_linkages = 0;
|
1329
|
+
return 0;
|
1330
|
+
}
|
1331
|
+
|
1332
|
+
init_analyze(sent);
|
1333
|
+
|
1334
|
+
/* Expressions were previously set up during the tokenize stage. */
|
1335
|
+
expression_prune(sent);
|
1336
|
+
print_time(opts, "Finished expression pruning");
|
1337
|
+
if (opts->use_sat_solver)
|
1338
|
+
{
|
1339
|
+
sat_parse(sent, opts);
|
1340
|
+
}
|
1341
|
+
else
|
1342
|
+
{
|
1343
|
+
chart_parse(sent, opts);
|
1344
|
+
}
|
1345
|
+
print_time(opts, "Finished parse");
|
1346
|
+
|
1347
|
+
return sent->num_valid_linkages;
|
1348
|
+
}
|
1349
|
+
|
1350
|
+
/***************************************************************
|
1351
|
+
*
|
1352
|
+
* Routines which allow user access to Linkages.
|
1353
|
+
*
|
1354
|
+
****************************************************************/
|
1355
|
+
|
1356
|
+
Linkage linkage_create(int k, Sentence sent, Parse_Options opts)
|
1357
|
+
{
|
1358
|
+
Linkage linkage;
|
1359
|
+
|
1360
|
+
if (opts->use_sat_solver)
|
1361
|
+
{
|
1362
|
+
//return sat_create_linkage(k, sent, opts);
|
1363
|
+
}
|
1364
|
+
|
1365
|
+
if ((k >= sent->num_linkages_post_processed) || (k < 0)) return NULL;
|
1366
|
+
|
1367
|
+
/* Using exalloc since this is external to the parser itself. */
|
1368
|
+
linkage = (Linkage) exalloc(sizeof(struct Linkage_s));
|
1369
|
+
|
1370
|
+
linkage->num_words = sent->length;
|
1371
|
+
linkage->word = (const char **) exalloc(linkage->num_words*sizeof(char *));
|
1372
|
+
linkage->current = 0;
|
1373
|
+
linkage->num_sublinkages=0;
|
1374
|
+
linkage->sublinkage = NULL;
|
1375
|
+
linkage->unionized = FALSE;
|
1376
|
+
linkage->sent = sent;
|
1377
|
+
linkage->opts = opts;
|
1378
|
+
linkage->info = &sent->link_info[k];
|
1379
|
+
linkage->dis_con_tree = NULL;
|
1380
|
+
|
1381
|
+
extract_links(sent->link_info[k].index, sent->null_count, sent->parse_info);
|
1382
|
+
compute_chosen_words(sent, linkage);
|
1383
|
+
|
1384
|
+
if (set_has_fat_down(sent))
|
1385
|
+
{
|
1386
|
+
extract_fat_linkage(sent, opts, linkage);
|
1387
|
+
}
|
1388
|
+
else
|
1389
|
+
{
|
1390
|
+
extract_thin_linkage(sent, opts, linkage);
|
1391
|
+
}
|
1392
|
+
|
1393
|
+
if (sent->dict->postprocessor != NULL)
|
1394
|
+
{
|
1395
|
+
linkage_post_process(linkage, sent->dict->postprocessor);
|
1396
|
+
}
|
1397
|
+
|
1398
|
+
return linkage;
|
1399
|
+
}
|
1400
|
+
|
1401
|
+
int linkage_get_current_sublinkage(Linkage linkage) {
|
1402
|
+
return linkage->current;
|
1403
|
+
}
|
1404
|
+
|
1405
|
+
int linkage_set_current_sublinkage(Linkage linkage, int index)
|
1406
|
+
{
|
1407
|
+
if ((index < 0) ||
|
1408
|
+
(index >= linkage->num_sublinkages))
|
1409
|
+
{
|
1410
|
+
return 0;
|
1411
|
+
}
|
1412
|
+
linkage->current = index;
|
1413
|
+
return 1;
|
1414
|
+
}
|
1415
|
+
|
1416
|
+
static void exfree_pp_info(PP_info *ppi)
|
1417
|
+
{
|
1418
|
+
if (ppi->num_domains > 0)
|
1419
|
+
exfree(ppi->domain_name, sizeof(const char *)*ppi->num_domains);
|
1420
|
+
ppi->domain_name = NULL;
|
1421
|
+
ppi->num_domains = 0;
|
1422
|
+
}
|
1423
|
+
|
1424
|
+
void linkage_delete(Linkage linkage)
|
1425
|
+
{
|
1426
|
+
int i, j;
|
1427
|
+
Sublinkage *s;
|
1428
|
+
|
1429
|
+
/* Can happen on panic timeout or user error */
|
1430
|
+
if (NULL == linkage) return;
|
1431
|
+
|
1432
|
+
for (i=0; i<linkage->num_words; ++i)
|
1433
|
+
{
|
1434
|
+
exfree((void *) linkage->word[i], strlen(linkage->word[i])+1);
|
1435
|
+
}
|
1436
|
+
exfree(linkage->word, sizeof(char *)*linkage->num_words);
|
1437
|
+
|
1438
|
+
for (i=0; i<linkage->num_sublinkages; ++i)
|
1439
|
+
{
|
1440
|
+
s = &(linkage->sublinkage[i]);
|
1441
|
+
for (j=0; j<s->num_links; ++j) {
|
1442
|
+
exfree_link(s->link[j]);
|
1443
|
+
}
|
1444
|
+
exfree(s->link, sizeof(Link)*s->num_links);
|
1445
|
+
if (s->pp_info != NULL) {
|
1446
|
+
for (j=0; j<s->num_links; ++j) {
|
1447
|
+
exfree_pp_info(&s->pp_info[j]);
|
1448
|
+
}
|
1449
|
+
exfree(s->pp_info, sizeof(PP_info)*s->num_links);
|
1450
|
+
s->pp_info = NULL;
|
1451
|
+
post_process_free_data(&s->pp_data);
|
1452
|
+
}
|
1453
|
+
if (s->violation != NULL) {
|
1454
|
+
exfree((void *) s->violation, sizeof(char)*(strlen(s->violation)+1));
|
1455
|
+
}
|
1456
|
+
}
|
1457
|
+
exfree(linkage->sublinkage, sizeof(Sublinkage)*linkage->num_sublinkages);
|
1458
|
+
if (linkage->dis_con_tree)
|
1459
|
+
free_DIS_tree(linkage->dis_con_tree);
|
1460
|
+
exfree(linkage, sizeof(struct Linkage_s));
|
1461
|
+
}
|
1462
|
+
|
1463
|
+
static int links_are_equal(Link *l, Link *m)
|
1464
|
+
{
|
1465
|
+
return ((l->l == m->l) && (l->r == m->r) && (strcmp(l->name, m->name)==0));
|
1466
|
+
}
|
1467
|
+
|
1468
|
+
static int link_already_appears(Linkage linkage, Link *link, int a)
|
1469
|
+
{
|
1470
|
+
int i, j;
|
1471
|
+
|
1472
|
+
for (i=0; i<a; ++i) {
|
1473
|
+
for (j=0; j<linkage->sublinkage[i].num_links; ++j) {
|
1474
|
+
if (links_are_equal(linkage->sublinkage[i].link[j], link)) return TRUE;
|
1475
|
+
}
|
1476
|
+
}
|
1477
|
+
return FALSE;
|
1478
|
+
}
|
1479
|
+
|
1480
|
+
static PP_info excopy_pp_info(PP_info ppi)
|
1481
|
+
{
|
1482
|
+
PP_info newppi;
|
1483
|
+
int i;
|
1484
|
+
|
1485
|
+
newppi.num_domains = ppi.num_domains;
|
1486
|
+
newppi.domain_name = (const char **) exalloc(sizeof(const char *)*ppi.num_domains);
|
1487
|
+
for (i=0; i<newppi.num_domains; ++i)
|
1488
|
+
{
|
1489
|
+
newppi.domain_name[i] = ppi.domain_name[i];
|
1490
|
+
}
|
1491
|
+
return newppi;
|
1492
|
+
}
|
1493
|
+
|
1494
|
+
|
1495
|
+
static Sublinkage unionize_linkage(Linkage linkage)
|
1496
|
+
{
|
1497
|
+
int i, j, num_in_union=0;
|
1498
|
+
Sublinkage u;
|
1499
|
+
Link *link;
|
1500
|
+
const char *p;
|
1501
|
+
|
1502
|
+
for (i=0; i<linkage->num_sublinkages; ++i) {
|
1503
|
+
for (j=0; j<linkage->sublinkage[i].num_links; ++j) {
|
1504
|
+
link = linkage->sublinkage[i].link[j];
|
1505
|
+
if (!link_already_appears(linkage, link, i)) num_in_union++;
|
1506
|
+
}
|
1507
|
+
}
|
1508
|
+
|
1509
|
+
u.link = (Link **) exalloc(sizeof(Link *)*num_in_union);
|
1510
|
+
u.num_links = num_in_union;
|
1511
|
+
zero_sublinkage(&u);
|
1512
|
+
|
1513
|
+
u.pp_info = (PP_info *) exalloc(sizeof(PP_info)*num_in_union);
|
1514
|
+
u.violation = NULL;
|
1515
|
+
u.num_links = num_in_union;
|
1516
|
+
|
1517
|
+
num_in_union = 0;
|
1518
|
+
|
1519
|
+
for (i=0; i<linkage->num_sublinkages; ++i) {
|
1520
|
+
for (j=0; j<linkage->sublinkage[i].num_links; ++j) {
|
1521
|
+
link = linkage->sublinkage[i].link[j];
|
1522
|
+
if (!link_already_appears(linkage, link, i)) {
|
1523
|
+
u.link[num_in_union] = excopy_link(link);
|
1524
|
+
u.pp_info[num_in_union] = excopy_pp_info(linkage->sublinkage[i].pp_info[j]);
|
1525
|
+
if (((p=linkage->sublinkage[i].violation) != NULL) &&
|
1526
|
+
(u.violation == NULL)) {
|
1527
|
+
char *s = (char *) exalloc((strlen(p)+1)*sizeof(char));
|
1528
|
+
strcpy(s, p);
|
1529
|
+
u.violation = s;
|
1530
|
+
}
|
1531
|
+
num_in_union++;
|
1532
|
+
}
|
1533
|
+
}
|
1534
|
+
}
|
1535
|
+
|
1536
|
+
return u;
|
1537
|
+
}
|
1538
|
+
|
1539
|
+
int linkage_compute_union(Linkage linkage)
|
1540
|
+
{
|
1541
|
+
int i, num_subs=linkage->num_sublinkages;
|
1542
|
+
Sublinkage * new_sublinkage, *s;
|
1543
|
+
|
1544
|
+
if (linkage->unionized) {
|
1545
|
+
linkage->current = linkage->num_sublinkages-1;
|
1546
|
+
return 0;
|
1547
|
+
}
|
1548
|
+
if (num_subs == 1) {
|
1549
|
+
linkage->unionized = TRUE;
|
1550
|
+
return 1;
|
1551
|
+
}
|
1552
|
+
|
1553
|
+
new_sublinkage =
|
1554
|
+
(Sublinkage *) exalloc(sizeof(Sublinkage)*(num_subs+1));
|
1555
|
+
|
1556
|
+
for (i=0; i<num_subs; ++i) {
|
1557
|
+
new_sublinkage[i] = linkage->sublinkage[i];
|
1558
|
+
}
|
1559
|
+
exfree(linkage->sublinkage, sizeof(Sublinkage)*num_subs);
|
1560
|
+
linkage->sublinkage = new_sublinkage;
|
1561
|
+
|
1562
|
+
/* Zero out the new sublinkage, then unionize it. */
|
1563
|
+
s = &new_sublinkage[num_subs];
|
1564
|
+
s->link = NULL;
|
1565
|
+
s->num_links = 0;
|
1566
|
+
zero_sublinkage(s);
|
1567
|
+
linkage->sublinkage[num_subs] = unionize_linkage(linkage);
|
1568
|
+
|
1569
|
+
linkage->num_sublinkages++;
|
1570
|
+
|
1571
|
+
linkage->unionized = TRUE;
|
1572
|
+
linkage->current = linkage->num_sublinkages-1;
|
1573
|
+
return 1;
|
1574
|
+
}
|
1575
|
+
|
1576
|
+
int linkage_get_num_sublinkages(Linkage linkage) {
|
1577
|
+
return linkage->num_sublinkages;
|
1578
|
+
}
|
1579
|
+
|
1580
|
+
int linkage_get_num_words(Linkage linkage)
|
1581
|
+
{
|
1582
|
+
return linkage->num_words;
|
1583
|
+
}
|
1584
|
+
|
1585
|
+
int linkage_get_num_links(Linkage linkage)
|
1586
|
+
{
|
1587
|
+
int current = linkage->current;
|
1588
|
+
return linkage->sublinkage[current].num_links;
|
1589
|
+
}
|
1590
|
+
|
1591
|
+
static inline int verify_link_index(Linkage linkage, int index)
|
1592
|
+
{
|
1593
|
+
if ((index < 0) ||
|
1594
|
+
(index >= linkage->sublinkage[linkage->current].num_links))
|
1595
|
+
{
|
1596
|
+
return 0;
|
1597
|
+
}
|
1598
|
+
return 1;
|
1599
|
+
}
|
1600
|
+
|
1601
|
+
int linkage_get_link_length(Linkage linkage, int index)
|
1602
|
+
{
|
1603
|
+
Link *link;
|
1604
|
+
int word_has_link[MAX_SENTENCE];
|
1605
|
+
int i, length;
|
1606
|
+
int current = linkage->current;
|
1607
|
+
|
1608
|
+
if (!verify_link_index(linkage, index)) return -1;
|
1609
|
+
|
1610
|
+
for (i=0; i<linkage->num_words+1; ++i) {
|
1611
|
+
word_has_link[i] = FALSE;
|
1612
|
+
}
|
1613
|
+
|
1614
|
+
for (i=0; i<linkage->sublinkage[current].num_links; ++i) {
|
1615
|
+
link = linkage->sublinkage[current].link[i];
|
1616
|
+
word_has_link[link->l] = TRUE;
|
1617
|
+
word_has_link[link->r] = TRUE;
|
1618
|
+
}
|
1619
|
+
|
1620
|
+
link = linkage->sublinkage[current].link[index];
|
1621
|
+
length = link->r - link->l;
|
1622
|
+
for (i= link->l+1; i < link->r; ++i) {
|
1623
|
+
if (!word_has_link[i]) length--;
|
1624
|
+
}
|
1625
|
+
return length;
|
1626
|
+
}
|
1627
|
+
|
1628
|
+
int linkage_get_link_lword(Linkage linkage, int index)
|
1629
|
+
{
|
1630
|
+
Link *link;
|
1631
|
+
if (!verify_link_index(linkage, index)) return -1;
|
1632
|
+
link = linkage->sublinkage[linkage->current].link[index];
|
1633
|
+
return link->l;
|
1634
|
+
}
|
1635
|
+
|
1636
|
+
int linkage_get_link_rword(Linkage linkage, int index)
|
1637
|
+
{
|
1638
|
+
Link *link;
|
1639
|
+
if (!verify_link_index(linkage, index)) return -1;
|
1640
|
+
link = linkage->sublinkage[linkage->current].link[index];
|
1641
|
+
return link->r;
|
1642
|
+
}
|
1643
|
+
|
1644
|
+
const char * linkage_get_link_label(Linkage linkage, int index)
|
1645
|
+
{
|
1646
|
+
Link *link;
|
1647
|
+
if (!verify_link_index(linkage, index)) return NULL;
|
1648
|
+
link = linkage->sublinkage[linkage->current].link[index];
|
1649
|
+
return link->name;
|
1650
|
+
}
|
1651
|
+
|
1652
|
+
const char * linkage_get_link_llabel(Linkage linkage, int index)
|
1653
|
+
{
|
1654
|
+
Link *link;
|
1655
|
+
if (!verify_link_index(linkage, index)) return NULL;
|
1656
|
+
link = linkage->sublinkage[linkage->current].link[index];
|
1657
|
+
return link->lc->string;
|
1658
|
+
}
|
1659
|
+
|
1660
|
+
const char * linkage_get_link_rlabel(Linkage linkage, int index)
|
1661
|
+
{
|
1662
|
+
Link *link;
|
1663
|
+
if (!verify_link_index(linkage, index)) return NULL;
|
1664
|
+
link = linkage->sublinkage[linkage->current].link[index];
|
1665
|
+
return link->rc->string;
|
1666
|
+
}
|
1667
|
+
|
1668
|
+
const char ** linkage_get_words(Linkage linkage)
|
1669
|
+
{
|
1670
|
+
return linkage->word;
|
1671
|
+
}
|
1672
|
+
|
1673
|
+
Sentence linkage_get_sentence(Linkage linkage)
|
1674
|
+
{
|
1675
|
+
return linkage->sent;
|
1676
|
+
}
|
1677
|
+
|
1678
|
+
const char * linkage_get_disjunct_str(Linkage linkage, int w)
|
1679
|
+
{
|
1680
|
+
Disjunct *dj;
|
1681
|
+
|
1682
|
+
if (NULL == linkage->info->disjunct_list_str)
|
1683
|
+
{
|
1684
|
+
lg_compute_disjunct_strings(linkage->sent, linkage->info);
|
1685
|
+
}
|
1686
|
+
|
1687
|
+
/* dj will be null if the word wasn't used in the parse. */
|
1688
|
+
dj = linkage->sent->parse_info->chosen_disjuncts[w];
|
1689
|
+
if (NULL == dj) return "";
|
1690
|
+
|
1691
|
+
return linkage->info->disjunct_list_str[w];
|
1692
|
+
}
|
1693
|
+
|
1694
|
+
double linkage_get_disjunct_cost(Linkage linkage, int w)
|
1695
|
+
{
|
1696
|
+
Disjunct *dj = linkage->sent->parse_info->chosen_disjuncts[w];
|
1697
|
+
|
1698
|
+
/* dj may be null, if the word didn't participate in the parse. */
|
1699
|
+
if (dj) return dj->cost;
|
1700
|
+
return 0.0;
|
1701
|
+
}
|
1702
|
+
|
1703
|
+
double linkage_get_disjunct_corpus_score(Linkage linkage, int w)
|
1704
|
+
{
|
1705
|
+
Disjunct *dj = linkage->sent->parse_info->chosen_disjuncts[w];
|
1706
|
+
|
1707
|
+
/* dj may be null, if the word didn't participate in the parse. */
|
1708
|
+
if (NULL == dj) return 99.999;
|
1709
|
+
|
1710
|
+
return lg_corpus_disjunct_score(linkage, w);
|
1711
|
+
}
|
1712
|
+
|
1713
|
+
const char * linkage_get_word(Linkage linkage, int w)
|
1714
|
+
{
|
1715
|
+
return linkage->word[w];
|
1716
|
+
}
|
1717
|
+
|
1718
|
+
int linkage_unused_word_cost(Linkage linkage)
|
1719
|
+
{
|
1720
|
+
/* The sat solver (currently) fails to fill in info */
|
1721
|
+
if (!linkage->info) return 0;
|
1722
|
+
return linkage->info->unused_word_cost;
|
1723
|
+
}
|
1724
|
+
|
1725
|
+
int linkage_disjunct_cost(Linkage linkage)
|
1726
|
+
{
|
1727
|
+
/* The sat solver (currently) fails to fill in info */
|
1728
|
+
if (!linkage->info) return 0;
|
1729
|
+
return (int) floorf(linkage->info->disjunct_cost);
|
1730
|
+
}
|
1731
|
+
|
1732
|
+
int linkage_is_fat(Linkage linkage)
|
1733
|
+
{
|
1734
|
+
/* The sat solver (currently) fails to fill in info */
|
1735
|
+
if (!linkage->info) return 0;
|
1736
|
+
return linkage->info->fat;
|
1737
|
+
}
|
1738
|
+
|
1739
|
+
int linkage_and_cost(Linkage linkage)
|
1740
|
+
{
|
1741
|
+
/* The sat solver (currently) fails to fill in info */
|
1742
|
+
if (!linkage->info) return 0;
|
1743
|
+
return linkage->info->and_cost;
|
1744
|
+
}
|
1745
|
+
|
1746
|
+
int linkage_link_cost(Linkage linkage)
|
1747
|
+
{
|
1748
|
+
/* The sat solver (currently) fails to fill in info */
|
1749
|
+
if (!linkage->info) return 0;
|
1750
|
+
return linkage->info->link_cost;
|
1751
|
+
}
|
1752
|
+
|
1753
|
+
double linkage_corpus_cost(Linkage linkage)
|
1754
|
+
{
|
1755
|
+
/* The sat solver (currently) fails to fill in info */
|
1756
|
+
if (!linkage->info) return 0.0;
|
1757
|
+
return linkage->info->corpus_cost;
|
1758
|
+
}
|
1759
|
+
|
1760
|
+
int linkage_get_link_num_domains(Linkage linkage, int index)
|
1761
|
+
{
|
1762
|
+
PP_info *pp_info;
|
1763
|
+
if (!verify_link_index(linkage, index)) return -1;
|
1764
|
+
pp_info = &linkage->sublinkage[linkage->current].pp_info[index];
|
1765
|
+
return pp_info->num_domains;
|
1766
|
+
}
|
1767
|
+
|
1768
|
+
const char ** linkage_get_link_domain_names(Linkage linkage, int index)
|
1769
|
+
{
|
1770
|
+
PP_info *pp_info;
|
1771
|
+
if (!verify_link_index(linkage, index)) return NULL;
|
1772
|
+
pp_info = &linkage->sublinkage[linkage->current].pp_info[index];
|
1773
|
+
return pp_info->domain_name;
|
1774
|
+
}
|
1775
|
+
|
1776
|
+
const char * linkage_get_violation_name(Linkage linkage)
|
1777
|
+
{
|
1778
|
+
return linkage->sublinkage[linkage->current].violation;
|
1779
|
+
}
|
1780
|
+
|
1781
|
+
int linkage_is_canonical(Linkage linkage)
|
1782
|
+
{
|
1783
|
+
/* The sat solver (currently) fails to fill in info */
|
1784
|
+
if (!linkage->info) return TRUE;
|
1785
|
+
return linkage->info->canonical;
|
1786
|
+
}
|
1787
|
+
|
1788
|
+
int linkage_is_improper(Linkage linkage)
|
1789
|
+
{
|
1790
|
+
/* The sat solver (currently) fails to fill in info */
|
1791
|
+
if (!linkage->info) return FALSE;
|
1792
|
+
return linkage->info->improper_fat_linkage;
|
1793
|
+
}
|
1794
|
+
|
1795
|
+
int linkage_has_inconsistent_domains(Linkage linkage)
|
1796
|
+
{
|
1797
|
+
/* The sat solver (currently) fails to fill in info */
|
1798
|
+
if (!linkage->info) return FALSE;
|
1799
|
+
return linkage->info->inconsistent_domains;
|
1800
|
+
}
|
1801
|
+
|
1802
|
+
void linkage_post_process(Linkage linkage, Postprocessor * postprocessor)
|
1803
|
+
{
|
1804
|
+
int N_sublinkages = linkage_get_num_sublinkages(linkage);
|
1805
|
+
Parse_Options opts = linkage->opts;
|
1806
|
+
Sentence sent = linkage->sent;
|
1807
|
+
Sublinkage * subl;
|
1808
|
+
PP_node * pp;
|
1809
|
+
int i, j, k;
|
1810
|
+
D_type_list * d;
|
1811
|
+
|
1812
|
+
for (i = 0; i < N_sublinkages; ++i)
|
1813
|
+
{
|
1814
|
+
subl = &linkage->sublinkage[i];
|
1815
|
+
if (subl->pp_info != NULL)
|
1816
|
+
{
|
1817
|
+
for (j = 0; j < subl->num_links; ++j)
|
1818
|
+
{
|
1819
|
+
exfree_pp_info(&subl->pp_info[j]);
|
1820
|
+
}
|
1821
|
+
post_process_free_data(&subl->pp_data);
|
1822
|
+
exfree(subl->pp_info, sizeof(PP_info)*subl->num_links);
|
1823
|
+
}
|
1824
|
+
subl->pp_info = (PP_info *) exalloc(sizeof(PP_info)*subl->num_links);
|
1825
|
+
for (j = 0; j < subl->num_links; ++j)
|
1826
|
+
{
|
1827
|
+
subl->pp_info[j].num_domains = 0;
|
1828
|
+
subl->pp_info[j].domain_name = NULL;
|
1829
|
+
}
|
1830
|
+
if (subl->violation != NULL)
|
1831
|
+
{
|
1832
|
+
exfree((void *)subl->violation, sizeof(char)*(strlen(subl->violation)+1));
|
1833
|
+
subl->violation = NULL;
|
1834
|
+
}
|
1835
|
+
|
1836
|
+
if (linkage->info->improper_fat_linkage)
|
1837
|
+
{
|
1838
|
+
pp = NULL;
|
1839
|
+
}
|
1840
|
+
else
|
1841
|
+
{
|
1842
|
+
pp = post_process(postprocessor, opts, sent, subl, FALSE);
|
1843
|
+
/* This can return NULL, for example if there is no
|
1844
|
+
post-processor */
|
1845
|
+
}
|
1846
|
+
|
1847
|
+
if (pp == NULL)
|
1848
|
+
{
|
1849
|
+
for (j = 0; j < subl->num_links; ++j)
|
1850
|
+
{
|
1851
|
+
subl->pp_info[j].num_domains = 0;
|
1852
|
+
subl->pp_info[j].domain_name = NULL;
|
1853
|
+
}
|
1854
|
+
}
|
1855
|
+
else
|
1856
|
+
{
|
1857
|
+
for (j = 0; j < subl->num_links; ++j)
|
1858
|
+
{
|
1859
|
+
k = 0;
|
1860
|
+
for (d = pp->d_type_array[j]; d != NULL; d = d->next) k++;
|
1861
|
+
subl->pp_info[j].num_domains = k;
|
1862
|
+
if (k > 0)
|
1863
|
+
{
|
1864
|
+
subl->pp_info[j].domain_name = (const char **) exalloc(sizeof(const char *)*k);
|
1865
|
+
}
|
1866
|
+
k = 0;
|
1867
|
+
for (d = pp->d_type_array[j]; d != NULL; d = d->next)
|
1868
|
+
{
|
1869
|
+
char buff[5];
|
1870
|
+
sprintf(buff, "%c", d->type);
|
1871
|
+
subl->pp_info[j].domain_name[k] = string_set_add (buff, sent->string_set);
|
1872
|
+
|
1873
|
+
k++;
|
1874
|
+
}
|
1875
|
+
}
|
1876
|
+
subl->pp_data = postprocessor->pp_data;
|
1877
|
+
if (pp->violation != NULL)
|
1878
|
+
{
|
1879
|
+
char * s = (char *) exalloc(sizeof(char)*(strlen(pp->violation)+1));
|
1880
|
+
strcpy(s, pp->violation);
|
1881
|
+
subl->violation = s;
|
1882
|
+
}
|
1883
|
+
}
|
1884
|
+
}
|
1885
|
+
post_process_close_sentence(postprocessor);
|
1886
|
+
}
|
1887
|
+
#endif
|