grammar_cop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,13 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2008, 2009 Linas Vepstas */
|
3
|
+
/* All rights reserved */
|
4
|
+
/* */
|
5
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
6
|
+
/* license set forth in the LICENSE file included with this software, */
|
7
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
8
|
+
/* This license allows free redistribution and use in source and binary */
|
9
|
+
/* forms, with or without modification, subject to certain conditions. */
|
10
|
+
/* */
|
11
|
+
/*************************************************************************/
|
12
|
+
|
13
|
+
void lg_compute_disjunct_strings(Sentence, Linkage_info *);
|
@@ -0,0 +1,92 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#include <stdio.h>
|
15
|
+
#include <stdlib.h>
|
16
|
+
#include <string.h>
|
17
|
+
#include <stdarg.h>
|
18
|
+
|
19
|
+
#ifdef USE_PTHREADS
|
20
|
+
#include <pthread.h>
|
21
|
+
#endif
|
22
|
+
|
23
|
+
#include "error.h"
|
24
|
+
#include "structures.h"
|
25
|
+
#include "api-structures.h"
|
26
|
+
|
27
|
+
#ifdef _MSC_VER
|
28
|
+
#define DLLEXPORT __declspec(dllexport)
|
29
|
+
#else
|
30
|
+
#define DLLEXPORT
|
31
|
+
#endif
|
32
|
+
|
33
|
+
/* ============================================================ */
|
34
|
+
|
35
|
+
static void verr_msg(err_ctxt *ec, severity sev, const char *fmt, va_list args)
|
36
|
+
{
|
37
|
+
fprintf(stderr, "link-grammar: ");
|
38
|
+
vfprintf(stderr, fmt, args);
|
39
|
+
|
40
|
+
if ((Info != sev) && ec->sent != NULL)
|
41
|
+
{
|
42
|
+
int i;
|
43
|
+
fprintf(stderr, "\tFailing sentence was:\n\t");
|
44
|
+
for (i=0; i<ec->sent->length; i++)
|
45
|
+
{
|
46
|
+
fprintf(stderr, "%s ", ec->sent->word[i].string);
|
47
|
+
}
|
48
|
+
fprintf(stderr, "\n");
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
void err_msg(err_ctxt *ec, severity sev, const char *fmt, ...)
|
53
|
+
{
|
54
|
+
va_list args;
|
55
|
+
va_start(args, fmt);
|
56
|
+
verr_msg(ec, sev, fmt, args);
|
57
|
+
va_end(args);
|
58
|
+
}
|
59
|
+
|
60
|
+
void prt_error(const char *fmt, ...)
|
61
|
+
{
|
62
|
+
severity sev;
|
63
|
+
err_ctxt ec;
|
64
|
+
va_list args;
|
65
|
+
|
66
|
+
sev = Error;
|
67
|
+
if (0 == strncmp(fmt, "Fatal", 5)) sev = Fatal;
|
68
|
+
if (0 == strncmp(fmt, "Error:", 6)) sev = Error;
|
69
|
+
if (0 == strncmp(fmt, "Warn", 4)) sev = Warn;
|
70
|
+
if (0 == strncmp(fmt, "Info:", 5)) sev = Info;
|
71
|
+
|
72
|
+
ec.sent = NULL;
|
73
|
+
va_start(args, fmt);
|
74
|
+
verr_msg(&ec, sev, fmt, args);
|
75
|
+
va_end(args);
|
76
|
+
}
|
77
|
+
|
78
|
+
/* ============================================================ */
|
79
|
+
/* These are deprecated, obsolete, and unused, but are still here
|
80
|
+
* because these are exported in the public API. Do not use these.
|
81
|
+
*/
|
82
|
+
DLLEXPORT int lperrno = 0;
|
83
|
+
DLLEXPORT char lperrmsg[1];
|
84
|
+
|
85
|
+
extern void lperror_clear(void);
|
86
|
+
void lperror_clear(void)
|
87
|
+
{
|
88
|
+
lperrmsg[0] = 0x0;
|
89
|
+
lperrno = 0;
|
90
|
+
}
|
91
|
+
|
92
|
+
/* ============================================================ */
|
@@ -0,0 +1,35 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
#ifndef _LINK_GRAMMAR_ERROR_H_
|
14
|
+
#define _LINK_GRAMMAR_ERROR_H_
|
15
|
+
|
16
|
+
#include "link-includes.h"
|
17
|
+
|
18
|
+
typedef struct
|
19
|
+
{
|
20
|
+
Sentence sent;
|
21
|
+
} err_ctxt;
|
22
|
+
|
23
|
+
typedef enum
|
24
|
+
{
|
25
|
+
Fatal = 1,
|
26
|
+
Error,
|
27
|
+
Warn,
|
28
|
+
Info,
|
29
|
+
Debug
|
30
|
+
} severity;
|
31
|
+
|
32
|
+
void err_msg(err_ctxt *, severity, const char *fmt, ...) GNUC_PRINTF(3,4);
|
33
|
+
|
34
|
+
#endif
|
35
|
+
|
@@ -0,0 +1,67 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2009 Linas Vepstas */
|
3
|
+
/* All rights reserved */
|
4
|
+
/* */
|
5
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
6
|
+
/* license set forth in the LICENSE file included with this software, */
|
7
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
8
|
+
/* This license allows free redistribution and use in source and binary */
|
9
|
+
/* forms, with or without modification, subject to certain conditions. */
|
10
|
+
/* */
|
11
|
+
/*************************************************************************/
|
12
|
+
/*
|
13
|
+
* expand.c
|
14
|
+
*
|
15
|
+
* Enlarge the range of possible disjunct to consider while parsing.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#include "api-structures.h"
|
19
|
+
#include "expand.h"
|
20
|
+
#include "disjunct-utils.h"
|
21
|
+
#include "word-utils.h"
|
22
|
+
#include "corpus/cluster.h"
|
23
|
+
|
24
|
+
/* ========================================================= */
|
25
|
+
|
26
|
+
static Disjunct * build_expansion_disjuncts(Cluster *clu, X_node *x)
|
27
|
+
{
|
28
|
+
Disjunct *dj;
|
29
|
+
dj = lg_cluster_get_disjuncts(clu, x->string);
|
30
|
+
if (dj) printf("Expanded %s \n", x->string);
|
31
|
+
return dj;
|
32
|
+
}
|
33
|
+
|
34
|
+
/**
|
35
|
+
* Increase the number of disjuncts associated to each word in the
|
36
|
+
* sentence by working with word-clusters. Return true if the number
|
37
|
+
* of disjuncts were expanded, else return false.
|
38
|
+
*/
|
39
|
+
int lg_expand_disjunct_list(Sentence sent)
|
40
|
+
{
|
41
|
+
int w;
|
42
|
+
|
43
|
+
Cluster *clu = lg_cluster_new();
|
44
|
+
|
45
|
+
int expanded = FALSE;
|
46
|
+
for (w = 0; w < sent->length; w++)
|
47
|
+
{
|
48
|
+
X_node * x;
|
49
|
+
Disjunct * d = sent->word[w].d;
|
50
|
+
for (x = sent->word[w].x; x != NULL; x = x->next)
|
51
|
+
{
|
52
|
+
Disjunct *dx = build_expansion_disjuncts(clu, x);
|
53
|
+
if (dx)
|
54
|
+
{
|
55
|
+
int cnt = count_disjuncts(d);
|
56
|
+
d = catenate_disjuncts(dx, d);
|
57
|
+
d = eliminate_duplicate_disjuncts(d);
|
58
|
+
if (cnt < count_disjuncts(d)) expanded = TRUE;
|
59
|
+
}
|
60
|
+
}
|
61
|
+
sent->word[w].d = d;
|
62
|
+
}
|
63
|
+
lg_cluster_delete(clu);
|
64
|
+
|
65
|
+
return expanded;
|
66
|
+
}
|
67
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2009 Linas Vepstas */
|
3
|
+
/* All rights reserved */
|
4
|
+
/* */
|
5
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
6
|
+
/* license set forth in the LICENSE file included with this software, */
|
7
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
8
|
+
/* This license allows free redistribution and use in source and binary */
|
9
|
+
/* forms, with or without modification, subject to certain conditions. */
|
10
|
+
/* */
|
11
|
+
/*************************************************************************/
|
12
|
+
|
13
|
+
int lg_expand_disjunct_list(Sentence sent);
|
@@ -0,0 +1,22 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
/* verbosity global is held in utilities.c */
|
15
|
+
extern int verbosity; /* the verbosity level for error messages */
|
16
|
+
|
17
|
+
/* size of random table for computing the
|
18
|
+
hash functions. must be a power of 2 */
|
19
|
+
#define RTSIZE 256
|
20
|
+
|
21
|
+
extern unsigned int randtable[RTSIZE]; /* random table for hashing */
|
22
|
+
|
@@ -0,0 +1,625 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* Copyright (c) 2010 Linas Vepstas */
|
5
|
+
/* All rights reserved */
|
6
|
+
/* */
|
7
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
8
|
+
/* license set forth in the LICENSE file included with this software, */
|
9
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
10
|
+
/* This license allows free redistribution and use in source and binary */
|
11
|
+
/* forms, with or without modification, subject to certain conditions. */
|
12
|
+
/* */
|
13
|
+
/*************************************************************************/
|
14
|
+
|
15
|
+
#include "api.h"
|
16
|
+
|
17
|
+
/**
|
18
|
+
* The first thing we do is we build a data structure to represent the
|
19
|
+
* result of the entire parse search. There will be a set of nodes
|
20
|
+
* built for each call to the count() function that returned a non-zero
|
21
|
+
* value, AND which is part of a valid linkage. Each of these nodes
|
22
|
+
* represents a valid continuation, and contains pointers to two other
|
23
|
+
* sets (one for the left continuation and one for the right
|
24
|
+
* continuation).
|
25
|
+
*/
|
26
|
+
|
27
|
+
static Parse_set * dummy_set(void)
|
28
|
+
{
|
29
|
+
static Parse_set ds;
|
30
|
+
ds.first = ds.current = NULL;
|
31
|
+
ds.count = 1;
|
32
|
+
return &ds;
|
33
|
+
}
|
34
|
+
|
35
|
+
/** Returns an empty set of parses */
|
36
|
+
static Parse_set * empty_set(void)
|
37
|
+
{
|
38
|
+
Parse_set *s;
|
39
|
+
s = (Parse_set *) xalloc(sizeof(Parse_set));
|
40
|
+
s->first = s->current = NULL;
|
41
|
+
s->count = 0;
|
42
|
+
return s;
|
43
|
+
}
|
44
|
+
|
45
|
+
static void free_set(Parse_set *s)
|
46
|
+
{
|
47
|
+
Parse_choice *p, *xp;
|
48
|
+
if (s == NULL) return;
|
49
|
+
for (p=s->first; p != NULL; p = xp) {
|
50
|
+
xp = p->next;
|
51
|
+
xfree((void *)p, sizeof(*p));
|
52
|
+
}
|
53
|
+
xfree((void *)s, sizeof(*s));
|
54
|
+
}
|
55
|
+
|
56
|
+
static Parse_choice * make_choice(Parse_set *lset, int llw, int lrw, Connector * llc, Connector * lrc,
|
57
|
+
Parse_set *rset, int rlw, int rrw, Connector * rlc, Connector * rrc,
|
58
|
+
Disjunct *ld, Disjunct *md, Disjunct *rd)
|
59
|
+
{
|
60
|
+
Parse_choice *pc;
|
61
|
+
pc = (Parse_choice *) xalloc(sizeof(*pc));
|
62
|
+
pc->next = NULL;
|
63
|
+
pc->set[0] = lset;
|
64
|
+
pc->link[0].l = llw;
|
65
|
+
pc->link[0].r = lrw;
|
66
|
+
pc->link[0].lc = llc;
|
67
|
+
pc->link[0].rc = lrc;
|
68
|
+
pc->set[1] = rset;
|
69
|
+
pc->link[1].l = rlw;
|
70
|
+
pc->link[1].r = rrw;
|
71
|
+
pc->link[1].lc = rlc;
|
72
|
+
pc->link[1].rc = rrc;
|
73
|
+
pc->ld = ld;
|
74
|
+
pc->md = md;
|
75
|
+
pc->rd = rd;
|
76
|
+
return pc;
|
77
|
+
}
|
78
|
+
|
79
|
+
/**
|
80
|
+
* Put this parse_choice into a given set. The current pointer is always
|
81
|
+
* left pointing to the end of the list.
|
82
|
+
*/
|
83
|
+
static void put_choice_in_set(Parse_set *s, Parse_choice *pc)
|
84
|
+
{
|
85
|
+
if (s->first == NULL)
|
86
|
+
{
|
87
|
+
s->first = pc;
|
88
|
+
}
|
89
|
+
else
|
90
|
+
{
|
91
|
+
s->current->next = pc;
|
92
|
+
}
|
93
|
+
s->current = pc;
|
94
|
+
pc->next = NULL;
|
95
|
+
}
|
96
|
+
|
97
|
+
/**
|
98
|
+
* Allocate the parse info struct
|
99
|
+
*
|
100
|
+
* A piecewise exponential function determines the size of the hash
|
101
|
+
* table. Probably should make use of the actual number of disjuncts,
|
102
|
+
* rather than just the number of words.
|
103
|
+
*/
|
104
|
+
Parse_info parse_info_new(int nwords)
|
105
|
+
{
|
106
|
+
int log2_table_size;
|
107
|
+
Parse_info pi;
|
108
|
+
|
109
|
+
pi = (Parse_info) xalloc(sizeof(struct Parse_info_struct));
|
110
|
+
memset(pi, 0, sizeof(struct Parse_info_struct));
|
111
|
+
pi->N_words = nwords;
|
112
|
+
pi->parse_set = NULL;
|
113
|
+
|
114
|
+
pi->chosen_disjuncts = (Disjunct **) xalloc(nwords * sizeof(Disjunct *));
|
115
|
+
memset(pi->chosen_disjuncts, 0, nwords * sizeof(Disjunct *));
|
116
|
+
|
117
|
+
pi->image_array = (Image_node **) xalloc(nwords * sizeof(Image_node *));
|
118
|
+
memset(pi->image_array, 0, nwords * sizeof(Image_node *));
|
119
|
+
|
120
|
+
pi->has_fat_down = (char *) xalloc(nwords * sizeof(Boolean));
|
121
|
+
memset(pi->has_fat_down, 0, nwords * sizeof(Boolean));
|
122
|
+
|
123
|
+
/* Alloc the x_table */
|
124
|
+
if (nwords >= 10) {
|
125
|
+
log2_table_size = 14;
|
126
|
+
} else if (nwords >= 4) {
|
127
|
+
log2_table_size = nwords;
|
128
|
+
} else {
|
129
|
+
log2_table_size = 4;
|
130
|
+
}
|
131
|
+
pi->log2_x_table_size = log2_table_size;
|
132
|
+
pi->x_table_size = (1 << log2_table_size);
|
133
|
+
|
134
|
+
/*printf("Allocating x_table of size %d\n", x_table_size);*/
|
135
|
+
pi->x_table = (X_table_connector**) xalloc(pi->x_table_size * sizeof(X_table_connector*));
|
136
|
+
memset(pi->x_table, 0, pi->x_table_size * sizeof(X_table_connector*));
|
137
|
+
|
138
|
+
return pi;
|
139
|
+
}
|
140
|
+
|
141
|
+
/**
|
142
|
+
* This is the function that should be used to free the set structure. Since
|
143
|
+
* it's a dag, a recursive free function won't work. Every time we create
|
144
|
+
* a set element, we put it in the hash table, so this is OK.
|
145
|
+
*/
|
146
|
+
void free_parse_info(Parse_info pi)
|
147
|
+
{
|
148
|
+
int i, len;
|
149
|
+
X_table_connector *t, *x;
|
150
|
+
|
151
|
+
len = pi->N_words;
|
152
|
+
xfree(pi->chosen_disjuncts, len * sizeof(Disjunct *));
|
153
|
+
xfree(pi->image_array, len * sizeof(Image_node*));
|
154
|
+
xfree(pi->has_fat_down, len * sizeof(Boolean));
|
155
|
+
|
156
|
+
for (i=0; i<pi->x_table_size; i++)
|
157
|
+
{
|
158
|
+
for(t = pi->x_table[i]; t!= NULL; t=x)
|
159
|
+
{
|
160
|
+
x = t->next;
|
161
|
+
free_set(t->set);
|
162
|
+
xfree((void *) t, sizeof(X_table_connector));
|
163
|
+
}
|
164
|
+
}
|
165
|
+
pi->parse_set = NULL;
|
166
|
+
|
167
|
+
/*printf("Freeing x_table of size %d\n", x_table_size);*/
|
168
|
+
xfree((void *) pi->x_table, pi->x_table_size * sizeof(X_table_connector*));
|
169
|
+
pi->x_table_size = 0;
|
170
|
+
pi->x_table = NULL;
|
171
|
+
|
172
|
+
xfree((void *) pi, sizeof(struct Parse_info_struct));
|
173
|
+
}
|
174
|
+
|
175
|
+
/**
|
176
|
+
* Returns the pointer to this info, NULL if not there.
|
177
|
+
*/
|
178
|
+
static X_table_connector * x_table_pointer(int lw, int rw, Connector *le, Connector *re,
|
179
|
+
int cost, Parse_info pi)
|
180
|
+
{
|
181
|
+
X_table_connector *t;
|
182
|
+
t = pi->x_table[pair_hash(pi->log2_x_table_size, lw, rw, le, re, cost)];
|
183
|
+
for (; t != NULL; t = t->next) {
|
184
|
+
if ((t->lw == lw) && (t->rw == rw) && (t->le == le) && (t->re == re) && (t->cost == cost)) return t;
|
185
|
+
}
|
186
|
+
return NULL;
|
187
|
+
}
|
188
|
+
|
189
|
+
#if DEAD_CODE
|
190
|
+
Parse_set * x_table_lookup(int lw, int rw, Connector *le, Connector *re,
|
191
|
+
int cost, Parse_info pi) {
|
192
|
+
/* returns the count for this quintuple if there, -1 otherwise */
|
193
|
+
X_table_connector *t = x_table_pointer(lw, rw, le, re, cost, pi);
|
194
|
+
|
195
|
+
if (t == NULL) return -1; else return t->set;
|
196
|
+
}
|
197
|
+
#endif
|
198
|
+
|
199
|
+
/**
|
200
|
+
* Stores the value in the x_table. Assumes it's not already there.
|
201
|
+
*/
|
202
|
+
static X_table_connector * x_table_store(int lw, int rw, Connector *le, Connector *re,
|
203
|
+
int cost, Parse_set * set, Parse_info pi)
|
204
|
+
{
|
205
|
+
X_table_connector *t, *n;
|
206
|
+
int h;
|
207
|
+
|
208
|
+
n = (X_table_connector *) xalloc(sizeof(X_table_connector));
|
209
|
+
n->set = set;
|
210
|
+
n->lw = lw; n->rw = rw; n->le = le; n->re = re; n->cost = cost;
|
211
|
+
h = pair_hash(pi->log2_x_table_size, lw, rw, le, re, cost);
|
212
|
+
t = pi->x_table[h];
|
213
|
+
n->next = t;
|
214
|
+
pi->x_table[h] = n;
|
215
|
+
return n;
|
216
|
+
}
|
217
|
+
|
218
|
+
#ifdef UNUSED_FUNCTION
|
219
|
+
static void x_table_update(int lw, int rw, Connector *le, Connector *re,
|
220
|
+
int cost, Parse_set * set, Parse_info pi) {
|
221
|
+
/* Stores the value in the x_table. Unlike x_table_store, it assumes it's already there */
|
222
|
+
X_table_connector *t = x_table_pointer(lw, rw, le, re, cost, pi);
|
223
|
+
|
224
|
+
assert(t != NULL, "This entry is supposed to be in the x_table.");
|
225
|
+
t->set = set;
|
226
|
+
}
|
227
|
+
#endif
|
228
|
+
|
229
|
+
|
230
|
+
/**
|
231
|
+
* returns NULL if there are no ways to parse, or returns a pointer
|
232
|
+
* to a set structure representing all the ways to parse.
|
233
|
+
*
|
234
|
+
* This code is similar to code in count.c
|
235
|
+
* (grep for end_word in these files).
|
236
|
+
*/
|
237
|
+
static Parse_set * parse_set(Sentence sent,
|
238
|
+
Disjunct *ld, Disjunct *rd, int lw, int rw,
|
239
|
+
Connector *le, Connector *re, int cost,
|
240
|
+
int islands_ok, Parse_info pi)
|
241
|
+
{
|
242
|
+
Disjunct * d, * dis;
|
243
|
+
int start_word, end_word, w;
|
244
|
+
int lcost, rcost, Lmatch, Rmatch;
|
245
|
+
int i, j;
|
246
|
+
Parse_set *ls[4], *rs[4], *lset, *rset;
|
247
|
+
Parse_choice * a_choice;
|
248
|
+
|
249
|
+
Match_node * m, *m1;
|
250
|
+
X_table_connector *xt;
|
251
|
+
s64 count;
|
252
|
+
|
253
|
+
assert(cost >= 0, "parse_set() called with cost < 0.");
|
254
|
+
|
255
|
+
count = table_lookup(sent, lw, rw, le, re, cost);
|
256
|
+
|
257
|
+
/*
|
258
|
+
assert(count >= 0, "parse_set() called on params that were not in the table.");
|
259
|
+
Actually, we can't assert this, because of the pseudocount technique that's
|
260
|
+
used in count(). It's not the case that every call to parse_set() has already
|
261
|
+
been put into the table.
|
262
|
+
*/
|
263
|
+
|
264
|
+
if ((count == 0) || (count == -1)) return NULL;
|
265
|
+
|
266
|
+
xt = x_table_pointer(lw, rw, le, re, cost, pi);
|
267
|
+
|
268
|
+
if (xt != NULL) return xt->set; /* we've already computed it */
|
269
|
+
|
270
|
+
/* Start it out with the empty set of options. */
|
271
|
+
/* This entry must be updated before we return. */
|
272
|
+
xt = x_table_store(lw, rw, le, re, cost, empty_set(), pi);
|
273
|
+
|
274
|
+
xt->set->count = count; /* the count we already computed */
|
275
|
+
/* this count is non-zero */
|
276
|
+
|
277
|
+
if (rw == 1 + lw) return xt->set;
|
278
|
+
|
279
|
+
if ((le == NULL) && (re == NULL))
|
280
|
+
{
|
281
|
+
if (!islands_ok && (lw != -1)) return xt->set;
|
282
|
+
|
283
|
+
if (cost == 0) return xt->set;
|
284
|
+
|
285
|
+
w = lw + 1;
|
286
|
+
for (dis = sent->word[w].d; dis != NULL; dis = dis->next)
|
287
|
+
{
|
288
|
+
if (dis->left == NULL)
|
289
|
+
{
|
290
|
+
rs[0] = parse_set(sent, dis, NULL, w, rw, dis->right,
|
291
|
+
NULL, cost-1, islands_ok, pi);
|
292
|
+
if (rs[0] == NULL) continue;
|
293
|
+
a_choice = make_choice(dummy_set(), lw, w, NULL, NULL,
|
294
|
+
rs[0], w, rw, NULL, NULL,
|
295
|
+
NULL, NULL, NULL);
|
296
|
+
put_choice_in_set(xt->set, a_choice);
|
297
|
+
}
|
298
|
+
}
|
299
|
+
rs[0] = parse_set(sent, NULL, NULL, w, rw, NULL, NULL,
|
300
|
+
cost-1, islands_ok, pi);
|
301
|
+
if (rs[0] != NULL)
|
302
|
+
{
|
303
|
+
a_choice = make_choice(dummy_set(), lw, w, NULL, NULL,
|
304
|
+
rs[0], w, rw, NULL, NULL,
|
305
|
+
NULL, NULL, NULL);
|
306
|
+
put_choice_in_set(xt->set, a_choice);
|
307
|
+
}
|
308
|
+
return xt->set;
|
309
|
+
}
|
310
|
+
|
311
|
+
if (le == NULL)
|
312
|
+
{
|
313
|
+
start_word = lw + 1;
|
314
|
+
}
|
315
|
+
else
|
316
|
+
{
|
317
|
+
start_word = le->word;
|
318
|
+
}
|
319
|
+
|
320
|
+
if (re == NULL)
|
321
|
+
{
|
322
|
+
end_word = rw;
|
323
|
+
}
|
324
|
+
else
|
325
|
+
{
|
326
|
+
end_word = re->word + 1;
|
327
|
+
}
|
328
|
+
|
329
|
+
for (w = start_word; w < end_word; w++)
|
330
|
+
{
|
331
|
+
m1 = m = form_match_list(sent, w, le, lw, re, rw);
|
332
|
+
for (; m!=NULL; m=m->next)
|
333
|
+
{
|
334
|
+
d = m->d;
|
335
|
+
for (lcost = 0; lcost <= cost; lcost++)
|
336
|
+
{
|
337
|
+
rcost = cost-lcost;
|
338
|
+
/* now lcost and rcost are the costs we're assigning to
|
339
|
+
* those parts respectively */
|
340
|
+
|
341
|
+
/* Now, we determine if (based on table only) we can see that
|
342
|
+
the current range is not parsable. */
|
343
|
+
|
344
|
+
Lmatch = (le != NULL) && (d->left != NULL) && do_match(sent, le, d->left, lw, w);
|
345
|
+
Rmatch = (d->right != NULL) && (re != NULL) && do_match(sent, d->right, re, w, rw);
|
346
|
+
for (i=0; i<4; i++) {ls[i] = rs[i] = NULL;}
|
347
|
+
if (Lmatch)
|
348
|
+
{
|
349
|
+
ls[0] = parse_set(sent, ld, d, lw, w, le->next, d->left->next, lcost, islands_ok, pi);
|
350
|
+
if (le->multi) ls[1] = parse_set(sent, ld, d, lw, w, le, d->left->next, lcost, islands_ok, pi);
|
351
|
+
if (d->left->multi) ls[2] = parse_set(sent, ld, d, lw, w, le->next, d->left, lcost, islands_ok, pi);
|
352
|
+
if (le->multi && d->left->multi) ls[3] = parse_set(sent, ld, d, lw, w, le, d->left, lcost, islands_ok, pi);
|
353
|
+
}
|
354
|
+
if (Rmatch)
|
355
|
+
{
|
356
|
+
rs[0] = parse_set(sent, d, rd, w, rw, d->right->next, re->next, rcost, islands_ok, pi);
|
357
|
+
if (d->right->multi) rs[1] = parse_set(sent, d, rd, w,rw,d->right,re->next, rcost, islands_ok, pi);
|
358
|
+
if (re->multi) rs[2] = parse_set(sent, d, rd, w, rw, d->right->next, re, rcost, islands_ok, pi);
|
359
|
+
if (d->right->multi && re->multi) rs[3] = parse_set(sent, d, rd, w, rw, d->right, re, rcost, islands_ok, pi);
|
360
|
+
}
|
361
|
+
|
362
|
+
for (i=0; i<4; i++)
|
363
|
+
{
|
364
|
+
/* this ordering is probably not consistent with that
|
365
|
+
* needed to use list_links */
|
366
|
+
if (ls[i] == NULL) continue;
|
367
|
+
for (j=0; j<4; j++)
|
368
|
+
{
|
369
|
+
if (rs[j] == NULL) continue;
|
370
|
+
a_choice = make_choice(ls[i], lw, w, le, d->left,
|
371
|
+
rs[j], w, rw, d->right, re,
|
372
|
+
ld, d, rd);
|
373
|
+
put_choice_in_set(xt->set, a_choice);
|
374
|
+
}
|
375
|
+
}
|
376
|
+
|
377
|
+
if (ls[0] != NULL || ls[1] != NULL || ls[2] != NULL || ls[3] != NULL)
|
378
|
+
{
|
379
|
+
/* evaluate using the left match, but not the right */
|
380
|
+
rset = parse_set(sent, d, rd, w, rw, d->right, re, rcost, islands_ok, pi);
|
381
|
+
if (rset != NULL)
|
382
|
+
{
|
383
|
+
for (i=0; i<4; i++)
|
384
|
+
{
|
385
|
+
if (ls[i] == NULL) continue;
|
386
|
+
/* this ordering is probably not consistent with
|
387
|
+
* that needed to use list_links */
|
388
|
+
a_choice = make_choice(ls[i], lw, w, le, d->left,
|
389
|
+
rset, w, rw, NULL /* d->right */,
|
390
|
+
re, /* the NULL indicates no link*/
|
391
|
+
ld, d, rd);
|
392
|
+
put_choice_in_set(xt->set, a_choice);
|
393
|
+
}
|
394
|
+
}
|
395
|
+
}
|
396
|
+
if ((le == NULL) && (rs[0] != NULL ||
|
397
|
+
rs[1] != NULL || rs[2] != NULL || rs[3] != NULL))
|
398
|
+
{
|
399
|
+
/* evaluate using the right match, but not the left */
|
400
|
+
lset = parse_set(sent, ld, d, lw, w, le, d->left, lcost, islands_ok, pi);
|
401
|
+
|
402
|
+
if (lset != NULL)
|
403
|
+
{
|
404
|
+
for (i=0; i<4; i++)
|
405
|
+
{
|
406
|
+
if (rs[i] == NULL) continue;
|
407
|
+
/* this ordering is probably not consistent with
|
408
|
+
* that needed to use list_links */
|
409
|
+
a_choice = make_choice(lset, lw, w, NULL /* le */,
|
410
|
+
d->left, /* NULL indicates no link */
|
411
|
+
rs[i], w, rw, d->right, re,
|
412
|
+
ld, d, rd);
|
413
|
+
put_choice_in_set(xt->set, a_choice);
|
414
|
+
}
|
415
|
+
}
|
416
|
+
}
|
417
|
+
}
|
418
|
+
}
|
419
|
+
put_match_list(sent, m1);
|
420
|
+
}
|
421
|
+
xt->set->current = xt->set->first;
|
422
|
+
return xt->set;
|
423
|
+
}
|
424
|
+
|
425
|
+
/**
|
426
|
+
* return TRUE if and only if overflow in the number of parses occured.
|
427
|
+
* Use a 64-bit int for counting.
|
428
|
+
*/
|
429
|
+
static int verify_set_node(Parse_set *set)
|
430
|
+
{
|
431
|
+
Parse_choice *pc;
|
432
|
+
s64 n;
|
433
|
+
if (set == NULL || set->first == NULL) return FALSE;
|
434
|
+
n = 0;
|
435
|
+
for (pc = set->first; pc != NULL; pc = pc->next)
|
436
|
+
{
|
437
|
+
n += pc->set[0]->count * pc->set[1]->count;
|
438
|
+
if (PARSE_NUM_OVERFLOW < n) return TRUE;
|
439
|
+
}
|
440
|
+
return FALSE;
|
441
|
+
}
|
442
|
+
|
443
|
+
static int verify_set(Parse_info pi)
|
444
|
+
{
|
445
|
+
int i;
|
446
|
+
|
447
|
+
assert(pi->x_table != NULL, "called verify_set when x_table==NULL");
|
448
|
+
for (i=0; i<pi->x_table_size; i++)
|
449
|
+
{
|
450
|
+
X_table_connector *t;
|
451
|
+
for(t = pi->x_table[i]; t != NULL; t = t->next)
|
452
|
+
{
|
453
|
+
if (verify_set_node(t->set)) return TRUE;
|
454
|
+
}
|
455
|
+
}
|
456
|
+
return FALSE;
|
457
|
+
}
|
458
|
+
|
459
|
+
/**
|
460
|
+
* This is the top level call that computes the whole parse_set. It
|
461
|
+
* points whole_set at the result. It creates the necessary hash
|
462
|
+
* table (x_table) which will be freed at the same time the
|
463
|
+
* whole_set is freed.
|
464
|
+
*
|
465
|
+
* It also assumes that count() has been run, and that hash table is
|
466
|
+
* filled with the values thus computed. Therefore this function
|
467
|
+
* must be structured just like parse() (the main function for
|
468
|
+
* count()).
|
469
|
+
*
|
470
|
+
* If the number of linkages gets huge, then the counts can overflow.
|
471
|
+
* We check if this has happened when verifying the parse set.
|
472
|
+
* This routine returns TRUE iff overflowed occurred.
|
473
|
+
*/
|
474
|
+
|
475
|
+
int build_parse_set(Sentence sent, int cost, Parse_Options opts)
|
476
|
+
{
|
477
|
+
Parse_set * whole_set;
|
478
|
+
|
479
|
+
whole_set =
|
480
|
+
parse_set(sent, NULL, NULL, -1, sent->length, NULL, NULL, cost+1,
|
481
|
+
opts->islands_ok, sent->parse_info);
|
482
|
+
|
483
|
+
if ((whole_set != NULL) && (whole_set->current != NULL)) {
|
484
|
+
whole_set->current = whole_set->first;
|
485
|
+
}
|
486
|
+
|
487
|
+
sent->parse_info->parse_set = whole_set;
|
488
|
+
|
489
|
+
return verify_set(sent->parse_info);
|
490
|
+
}
|
491
|
+
|
492
|
+
static void initialize_links(Parse_info pi)
|
493
|
+
{
|
494
|
+
pi->N_links = 0;
|
495
|
+
memset(pi->chosen_disjuncts, 0, pi->N_words * sizeof(Disjunct *));
|
496
|
+
}
|
497
|
+
|
498
|
+
static void issue_link(Parse_info pi, Disjunct * ld, Disjunct * rd, Link link)
|
499
|
+
{
|
500
|
+
assert(pi->N_links <= MAX_LINKS-1, "Too many links");
|
501
|
+
pi->link_array[pi->N_links] = link;
|
502
|
+
pi->N_links++;
|
503
|
+
|
504
|
+
pi->chosen_disjuncts[link.l] = ld;
|
505
|
+
pi->chosen_disjuncts[link.r] = rd;
|
506
|
+
}
|
507
|
+
|
508
|
+
static void issue_links_for_choice(Parse_info pi, Parse_choice *pc)
|
509
|
+
{
|
510
|
+
if (pc->link[0].lc != NULL) { /* there is a link to generate */
|
511
|
+
issue_link(pi, pc->ld, pc->md, pc->link[0]);
|
512
|
+
}
|
513
|
+
if (pc->link[1].lc != NULL) {
|
514
|
+
issue_link(pi, pc->md, pc->rd, pc->link[1]);
|
515
|
+
}
|
516
|
+
}
|
517
|
+
|
518
|
+
#ifdef NOT_USED_ANYWHERE
|
519
|
+
static void build_current_linkage_recursive(Parse_info pi, Parse_set *set)
|
520
|
+
{
|
521
|
+
if (set == NULL) return;
|
522
|
+
if (set->current == NULL) return;
|
523
|
+
|
524
|
+
issue_links_for_choice(pi, set->current);
|
525
|
+
build_current_linkage_recursive(pi, set->current->set[0]);
|
526
|
+
build_current_linkage_recursive(pi, set->current->set[1]);
|
527
|
+
}
|
528
|
+
|
529
|
+
/**
|
530
|
+
* This function takes the "current" point in the given set and
|
531
|
+
* generates the linkage that it represents.
|
532
|
+
*/
|
533
|
+
void build_current_linkage(Parse_info pi)
|
534
|
+
{
|
535
|
+
initialize_links(pi);
|
536
|
+
build_current_linkage_recursive(pi, pi->parse_set);
|
537
|
+
}
|
538
|
+
|
539
|
+
/**
|
540
|
+
* Advance the "current" linkage to the next one
|
541
|
+
* return 1 if there's a "carry" from this node,
|
542
|
+
* which indicates that the scan of this node has
|
543
|
+
* just been completed, and it's now back to it's
|
544
|
+
* starting state.
|
545
|
+
*/
|
546
|
+
static int advance_linkage(Parse_info pi, Parse_set * set)
|
547
|
+
{
|
548
|
+
if (set == NULL) return 1; /* probably can't happen */
|
549
|
+
if (set->first == NULL) return 1; /* the empty set */
|
550
|
+
if (advance_linkage(pi, set->current->set[0]) == 1) {
|
551
|
+
if (advance_linkage(pi, set->current->set[1]) == 1) {
|
552
|
+
if (set->current->next == NULL) {
|
553
|
+
set->current = set->first;
|
554
|
+
return 1;
|
555
|
+
}
|
556
|
+
set->current = set->current->next;
|
557
|
+
}
|
558
|
+
}
|
559
|
+
return 0;
|
560
|
+
}
|
561
|
+
|
562
|
+
static void advance_parse_set(Parse_info pi)
|
563
|
+
{
|
564
|
+
advance_linkage(pi, pi->parse_set);
|
565
|
+
}
|
566
|
+
#endif
|
567
|
+
|
568
|
+
static void list_links(Parse_info pi, Parse_set * set, int index)
|
569
|
+
{
|
570
|
+
Parse_choice *pc;
|
571
|
+
s64 n;
|
572
|
+
|
573
|
+
if (set == NULL || set->first == NULL) return;
|
574
|
+
for (pc = set->first; pc != NULL; pc = pc->next) {
|
575
|
+
n = pc->set[0]->count * pc->set[1]->count;
|
576
|
+
if (index < n) break;
|
577
|
+
index -= n;
|
578
|
+
}
|
579
|
+
assert(pc != NULL, "walked off the end in list_links");
|
580
|
+
issue_links_for_choice(pi, pc);
|
581
|
+
list_links(pi, pc->set[0], index % pc->set[0]->count);
|
582
|
+
list_links(pi, pc->set[1], index / pc->set[0]->count);
|
583
|
+
}
|
584
|
+
|
585
|
+
static void list_random_links(Parse_info pi, Parse_set * set)
|
586
|
+
{
|
587
|
+
Parse_choice *pc;
|
588
|
+
int num_pc, new_index;
|
589
|
+
|
590
|
+
if (set == NULL || set->first == NULL) return;
|
591
|
+
num_pc = 0;
|
592
|
+
for (pc = set->first; pc != NULL; pc = pc->next) {
|
593
|
+
num_pc++;
|
594
|
+
}
|
595
|
+
|
596
|
+
new_index = rand_r(&pi->rand_state) % num_pc;
|
597
|
+
|
598
|
+
num_pc = 0;
|
599
|
+
for (pc = set->first; pc != NULL; pc = pc->next) {
|
600
|
+
if (new_index == num_pc) break;
|
601
|
+
num_pc++;
|
602
|
+
}
|
603
|
+
|
604
|
+
assert(pc != NULL, "Couldn't get a random parse choice");
|
605
|
+
issue_links_for_choice(pi, pc);
|
606
|
+
list_random_links(pi, pc->set[0]);
|
607
|
+
list_random_links(pi, pc->set[1]);
|
608
|
+
}
|
609
|
+
|
610
|
+
/**
|
611
|
+
* Generate the list of all links of the index'th parsing of the
|
612
|
+
* sentence. For this to work, you must have already called parse, and
|
613
|
+
* already built the whole_set.
|
614
|
+
*/
|
615
|
+
void extract_links(int index, int cost, Parse_info pi)
|
616
|
+
{
|
617
|
+
initialize_links(pi);
|
618
|
+
pi->rand_state = index;
|
619
|
+
if (index < 0) {
|
620
|
+
list_random_links(pi, pi->parse_set);
|
621
|
+
}
|
622
|
+
else {
|
623
|
+
list_links(pi, pi->parse_set, index);
|
624
|
+
}
|
625
|
+
}
|