grammar_cop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
Parse Ranking and Word Sense Statistics
|
3
|
+
---------------------------------------
|
4
|
+
|
5
|
+
This directory contains code that computes a parse ranking, as well
|
6
|
+
as a word-sense probability (based on WordNet 3.0) by looking up
|
7
|
+
frequency statistics from an SQL database. The SQLite database engine
|
8
|
+
is used because it is "administration-free" for the user, and because
|
9
|
+
its license is compatbile with the current link-grammar license.
|
10
|
+
|
11
|
+
This directory also contains code for "broadening" word linakges.
|
12
|
+
See data/sql/README for more info.
|
13
|
+
|
14
|
+
This directory contains one administrative tool, "cluster-pop",
|
15
|
+
which is not built by default because users do not need this tool.
|
16
|
+
See the Makefile.am for notes on how to build it.
|
17
|
+
|
@@ -0,0 +1,286 @@
|
|
1
|
+
/*
|
2
|
+
* cluster.c
|
3
|
+
*
|
4
|
+
* Data for related-word clusters. Meant to expand disjunct coverage
|
5
|
+
* for the case where a parse cannot be completed without ommitting
|
6
|
+
* a word.
|
7
|
+
*
|
8
|
+
* Copyright (c) 2009 Linas Vepstas <linasvepstas@gmail.com>
|
9
|
+
*/
|
10
|
+
|
11
|
+
#include <stdio.h>
|
12
|
+
#include <stdlib.h>
|
13
|
+
#include <string.h>
|
14
|
+
#include <sqlite3.h>
|
15
|
+
#include "cluster.h"
|
16
|
+
#include "../build-disjuncts.h"
|
17
|
+
#include "../disjunct-utils.h"
|
18
|
+
#include "../structures.h"
|
19
|
+
#include "../utilities.h"
|
20
|
+
|
21
|
+
struct cluster_s
|
22
|
+
{
|
23
|
+
char * dbname;
|
24
|
+
sqlite3 *dbconn;
|
25
|
+
sqlite3_stmt *clu_query;
|
26
|
+
sqlite3_stmt *dj_query;
|
27
|
+
char *errmsg;
|
28
|
+
int rc;
|
29
|
+
};
|
30
|
+
|
31
|
+
/* ========================================================= */
|
32
|
+
|
33
|
+
static void * db_file_open(const char * dbname, void * user_data)
|
34
|
+
{
|
35
|
+
Cluster *c = (Cluster *) user_data;
|
36
|
+
int rc;
|
37
|
+
sqlite3 *dbconn;
|
38
|
+
c->rc = sqlite3_open_v2(dbname, &dbconn, SQLITE_OPEN_READONLY, NULL);
|
39
|
+
if (c->rc)
|
40
|
+
{
|
41
|
+
sqlite3_close(dbconn);
|
42
|
+
return NULL;
|
43
|
+
}
|
44
|
+
|
45
|
+
c->dbname = strdup(dbname);
|
46
|
+
return dbconn;
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Initialize the cluster statistics subsystem.
|
52
|
+
*/
|
53
|
+
Cluster * lg_cluster_new(void)
|
54
|
+
{
|
55
|
+
int rc;
|
56
|
+
|
57
|
+
Cluster *c = (Cluster *) malloc(sizeof(Cluster));
|
58
|
+
c->clu_query = NULL;
|
59
|
+
c->dj_query = NULL;
|
60
|
+
c->errmsg = NULL;
|
61
|
+
c->dbname = NULL;
|
62
|
+
|
63
|
+
/* dbname = "/link-grammar/data/en/sql/clusters.db"; */
|
64
|
+
#define DBNAME "sql/clusters.db"
|
65
|
+
c->dbconn = object_open(DBNAME, db_file_open, c);
|
66
|
+
if (NULL == c->dbconn)
|
67
|
+
{
|
68
|
+
/* Very weird .. but if the database is not found, then sqlite
|
69
|
+
* reports an "out of memory" error! So hide this misleading
|
70
|
+
* error message.
|
71
|
+
*/
|
72
|
+
if (SQLITE_CANTOPEN == c->rc)
|
73
|
+
{
|
74
|
+
prt_error("Warning: Can't open database: File not found\n"
|
75
|
+
"\tWas looking for: " DBNAME);
|
76
|
+
}
|
77
|
+
else
|
78
|
+
{
|
79
|
+
prt_error("Warning: Can't open database: %s\n"
|
80
|
+
"\tWas looking for: " DBNAME,
|
81
|
+
sqlite3_errmsg(c->dbconn));
|
82
|
+
}
|
83
|
+
return c;
|
84
|
+
}
|
85
|
+
|
86
|
+
/* Now prepare the statements we plan to use */
|
87
|
+
rc = sqlite3_prepare_v2(c->dbconn,
|
88
|
+
"SELECT cluster_name FROM ClusterMembers "
|
89
|
+
"WHERE inflected_word = ?;",
|
90
|
+
-1, &c->clu_query, NULL);
|
91
|
+
if (rc != SQLITE_OK)
|
92
|
+
{
|
93
|
+
prt_error("Error: Can't prepare the cluster member statment: %s\n",
|
94
|
+
sqlite3_errmsg(c->dbconn));
|
95
|
+
}
|
96
|
+
|
97
|
+
rc = sqlite3_prepare_v2(c->dbconn,
|
98
|
+
"SELECT disjunct, cost FROM ClusterDisjuncts "
|
99
|
+
"WHERE cluster_name = ?;",
|
100
|
+
-1, &c->dj_query, NULL);
|
101
|
+
if (rc != SQLITE_OK)
|
102
|
+
{
|
103
|
+
prt_error("Error: Can't prepare the disjunct statment: %s\n",
|
104
|
+
sqlite3_errmsg(c->dbconn));
|
105
|
+
}
|
106
|
+
|
107
|
+
prt_error("Info: Cluster grouping database found at %s\n", c->dbname);
|
108
|
+
return c;
|
109
|
+
}
|
110
|
+
|
111
|
+
/**
|
112
|
+
* lg_cluster_delete -- shut down the cluster statistics subsystem.
|
113
|
+
*/
|
114
|
+
void lg_cluster_delete(Cluster *c)
|
115
|
+
{
|
116
|
+
if (NULL == c) return;
|
117
|
+
|
118
|
+
if (c->clu_query)
|
119
|
+
{
|
120
|
+
sqlite3_finalize(c->clu_query);
|
121
|
+
c->clu_query = NULL;
|
122
|
+
}
|
123
|
+
|
124
|
+
if (c->dj_query)
|
125
|
+
{
|
126
|
+
sqlite3_finalize(c->dj_query);
|
127
|
+
c->dj_query = NULL;
|
128
|
+
}
|
129
|
+
|
130
|
+
if (c->dbconn)
|
131
|
+
{
|
132
|
+
sqlite3_close(c->dbconn);
|
133
|
+
c->dbconn = NULL;
|
134
|
+
}
|
135
|
+
|
136
|
+
if (c->dbname)
|
137
|
+
{
|
138
|
+
free(c->dbname);
|
139
|
+
c->dbname = NULL;
|
140
|
+
}
|
141
|
+
free(c);
|
142
|
+
}
|
143
|
+
|
144
|
+
/* ========================================================= */
|
145
|
+
|
146
|
+
static Exp * make_exp(const char *djstr, double cost)
|
147
|
+
{
|
148
|
+
Exp *e = (Exp *) malloc(sizeof(Exp));
|
149
|
+
e->multi = 0;
|
150
|
+
e->dir = ' ';
|
151
|
+
e->cost = cost;
|
152
|
+
|
153
|
+
/* If its just a single connector, then do just that */
|
154
|
+
char *sp = strchr (djstr, ' ');
|
155
|
+
if (NULL == sp || 0x0 == sp[1])
|
156
|
+
{
|
157
|
+
e->type = CONNECTOR_type;
|
158
|
+
if ('@' == djstr[0]) { e->multi = 1; djstr++; }
|
159
|
+
size_t len = strlen(djstr) - 1;
|
160
|
+
if (sp) len--;
|
161
|
+
e->u.string = strndup(djstr, len);
|
162
|
+
e->dir = djstr[len];
|
163
|
+
return e;
|
164
|
+
}
|
165
|
+
|
166
|
+
/* If there are multiple connectors, and them together */
|
167
|
+
size_t len = sp - djstr;
|
168
|
+
char * tmp = strndup(djstr, len);
|
169
|
+
Exp *p1 = make_exp(tmp, 0.0);
|
170
|
+
free (tmp);
|
171
|
+
Exp *p2 = make_exp(sp+1, 0.0);
|
172
|
+
|
173
|
+
E_list *l;
|
174
|
+
E_list *lhead = NULL;
|
175
|
+
|
176
|
+
l = (E_list *) malloc(sizeof(E_list));
|
177
|
+
l->next = lhead;
|
178
|
+
l->e = p2;
|
179
|
+
lhead = l;
|
180
|
+
|
181
|
+
l = (E_list *) malloc(sizeof(E_list));
|
182
|
+
l->next = lhead;
|
183
|
+
l->e = p1;
|
184
|
+
lhead = l;
|
185
|
+
|
186
|
+
e->type = AND_type;
|
187
|
+
e->u.l = lhead;
|
188
|
+
|
189
|
+
return e;
|
190
|
+
}
|
191
|
+
|
192
|
+
#if NOT_NEEDED
|
193
|
+
static Exp * or_exp(Exp *p1, Exp *p2)
|
194
|
+
{
|
195
|
+
if (NULL == p2) return p1;
|
196
|
+
|
197
|
+
Exp *e = (Exp *) malloc(sizeof(Exp));
|
198
|
+
e->multi = 0;
|
199
|
+
e->dir = ' ';
|
200
|
+
e->cost = 0.0;
|
201
|
+
e->type = OR_type;
|
202
|
+
|
203
|
+
E_list *l;
|
204
|
+
E_list *lhead = NULL;
|
205
|
+
|
206
|
+
l = (E_list *) malloc(sizeof(E_list));
|
207
|
+
l->next = lhead;
|
208
|
+
l->e = p2;
|
209
|
+
lhead = l;
|
210
|
+
|
211
|
+
l = (E_list *) malloc(sizeof(E_list));
|
212
|
+
l->next = lhead;
|
213
|
+
l->e = p1;
|
214
|
+
lhead = l;
|
215
|
+
|
216
|
+
e->u.l = lhead;
|
217
|
+
return e;
|
218
|
+
}
|
219
|
+
#endif
|
220
|
+
|
221
|
+
static void free_exp(Exp *e)
|
222
|
+
{
|
223
|
+
if (CONNECTOR_type != e->type)
|
224
|
+
{
|
225
|
+
E_list *l = e->u.l;
|
226
|
+
while(l)
|
227
|
+
{
|
228
|
+
free_exp(l->e);
|
229
|
+
E_list *ln = l->next;
|
230
|
+
free(l);
|
231
|
+
l = ln;
|
232
|
+
}
|
233
|
+
return;
|
234
|
+
}
|
235
|
+
|
236
|
+
free((char *) e->u.string);
|
237
|
+
free(e);
|
238
|
+
}
|
239
|
+
|
240
|
+
Disjunct * lg_cluster_get_disjuncts(Cluster *c, const char * wrd)
|
241
|
+
{
|
242
|
+
Disjunct *djl = NULL;
|
243
|
+
int rc;
|
244
|
+
|
245
|
+
/* Look for a cluster containing this word */
|
246
|
+
rc = sqlite3_bind_text(c->clu_query, 1, wrd, -1, SQLITE_STATIC);
|
247
|
+
rc = sqlite3_step(c->clu_query);
|
248
|
+
if (rc != SQLITE_ROW) goto noclust;
|
249
|
+
|
250
|
+
/* Get the cluster name, and look for the disjuncts */
|
251
|
+
const char * cluname = sqlite3_column_text(c->clu_query,0);
|
252
|
+
rc = sqlite3_bind_text(c->dj_query, 1, cluname, -1, SQLITE_STATIC);
|
253
|
+
|
254
|
+
while(1)
|
255
|
+
{
|
256
|
+
rc = sqlite3_step(c->dj_query);
|
257
|
+
if (rc != SQLITE_ROW) break;
|
258
|
+
const char * djs = sqlite3_column_text(c->dj_query,0);
|
259
|
+
double cost = sqlite3_column_double(c->dj_query,1);
|
260
|
+
|
261
|
+
/* All expanded disjuncts are costly! */
|
262
|
+
// cost += 0.5;
|
263
|
+
cost -= 6.0;
|
264
|
+
if (cost < 0.0) cost = 0.0;
|
265
|
+
|
266
|
+
/* Building expressions */
|
267
|
+
Exp *e = make_exp(djs, cost);
|
268
|
+
X_node x;
|
269
|
+
x.exp = e;
|
270
|
+
x.string = wrd;
|
271
|
+
Disjunct *dj = build_disjuncts_for_X_node(&x, MAX_CONNECTOR_COST);
|
272
|
+
djl = catenate_disjuncts(dj, djl);
|
273
|
+
free_exp(e);
|
274
|
+
}
|
275
|
+
|
276
|
+
sqlite3_reset(c->dj_query);
|
277
|
+
sqlite3_clear_bindings(c->dj_query);
|
278
|
+
|
279
|
+
noclust:
|
280
|
+
sqlite3_reset(c->clu_query);
|
281
|
+
sqlite3_clear_bindings(c->clu_query);
|
282
|
+
return djl;
|
283
|
+
}
|
284
|
+
|
285
|
+
|
286
|
+
/* ======================= END OF FILE ===================== */
|
@@ -0,0 +1,32 @@
|
|
1
|
+
/*
|
2
|
+
* cluster.h
|
3
|
+
*
|
4
|
+
* Data for related-word clusters. Meant to expand disjunct covereage
|
5
|
+
* for the case where a parse cannot be completed without ommitting
|
6
|
+
* a word.
|
7
|
+
*
|
8
|
+
* Copyright (c) 2009 Linas Vepstas <linasvepstas@gmail.com>
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef _LINKGRAMMAR_CLUSTER_H
|
12
|
+
#define _LINKGRAMMAR_CLUSTER_H
|
13
|
+
|
14
|
+
#ifdef USE_CORPUS
|
15
|
+
|
16
|
+
#include "../api-types.h"
|
17
|
+
#include "../link-includes.h"
|
18
|
+
|
19
|
+
Cluster * lg_cluster_new(void);
|
20
|
+
void lg_cluster_delete(Cluster *);
|
21
|
+
|
22
|
+
Disjunct * lg_cluster_get_disjuncts(Cluster *, const char * wrd);
|
23
|
+
|
24
|
+
#else /* USE_CORPUS */
|
25
|
+
|
26
|
+
static inline Cluster * lg_cluster_new(void) { return NULL; }
|
27
|
+
static inline void lg_cluster_delete(Cluster *c) {}
|
28
|
+
static inline Disjunct * lg_cluster_get_disjuncts(Cluster *c, const char * wrd) { return NULL; }
|
29
|
+
|
30
|
+
#endif /* USE_CORPUS */
|
31
|
+
|
32
|
+
#endif /* _LINKGRAMMAR_CLUSTER_H */
|
@@ -0,0 +1,483 @@
|
|
1
|
+
/*
|
2
|
+
* corpus.c
|
3
|
+
*
|
4
|
+
* Data for corpus statistics, used to provide a parse ranking
|
5
|
+
* to drive the SAT solver, as well as parse ranking with the
|
6
|
+
* ordinary solver.
|
7
|
+
*
|
8
|
+
* Copyright (c) 2008, 2009 Linas Vepstas <linasvepstas@gmail.com>
|
9
|
+
*/
|
10
|
+
|
11
|
+
#include <stdio.h>
|
12
|
+
#include <stdlib.h>
|
13
|
+
#include <string.h>
|
14
|
+
#include <sqlite3.h>
|
15
|
+
#include "corpus.h"
|
16
|
+
#include "../api-structures.h"
|
17
|
+
#include "../disjuncts.h"
|
18
|
+
#include "../utilities.h"
|
19
|
+
|
20
|
+
struct corpus_s
|
21
|
+
{
|
22
|
+
char * dbname;
|
23
|
+
sqlite3 *dbconn;
|
24
|
+
sqlite3_stmt *rank_query;
|
25
|
+
sqlite3_stmt *sense_query;
|
26
|
+
char *errmsg;
|
27
|
+
int rc;
|
28
|
+
};
|
29
|
+
|
30
|
+
struct sense_s
|
31
|
+
{
|
32
|
+
int word;
|
33
|
+
const char * inflected_word;
|
34
|
+
const char * disjunct;
|
35
|
+
char * sense;
|
36
|
+
double score;
|
37
|
+
Sense *next;
|
38
|
+
};
|
39
|
+
|
40
|
+
/* ========================================================= */
|
41
|
+
|
42
|
+
static void * db_file_open(const char * dbname, void * user_data)
|
43
|
+
{
|
44
|
+
Corpus *c = (Corpus *) user_data;
|
45
|
+
int rc;
|
46
|
+
sqlite3 *dbconn;
|
47
|
+
c->rc = sqlite3_open_v2(dbname, &dbconn, SQLITE_OPEN_READONLY, NULL);
|
48
|
+
if (c->rc)
|
49
|
+
{
|
50
|
+
sqlite3_close(dbconn);
|
51
|
+
return NULL;
|
52
|
+
}
|
53
|
+
|
54
|
+
c->dbname = strdup(dbname);
|
55
|
+
return dbconn;
|
56
|
+
}
|
57
|
+
|
58
|
+
|
59
|
+
/**
|
60
|
+
* Initialize the corpus statistics subsystem.
|
61
|
+
*/
|
62
|
+
Corpus * lg_corpus_new(void)
|
63
|
+
{
|
64
|
+
int rc;
|
65
|
+
|
66
|
+
Corpus *c = (Corpus *) malloc(sizeof(Corpus));
|
67
|
+
c->rank_query = NULL;
|
68
|
+
c->sense_query = NULL;
|
69
|
+
c->errmsg = NULL;
|
70
|
+
c->dbname = NULL;
|
71
|
+
|
72
|
+
/* dbname = "/link-grammar/data/en/sql/disjuncts.db"; */
|
73
|
+
#define DBNAME "sql/disjuncts.db"
|
74
|
+
c->dbconn = object_open(DBNAME, db_file_open, c);
|
75
|
+
if (NULL == c->dbconn)
|
76
|
+
{
|
77
|
+
/* Very weird .. but if the database is not found, then sqlite
|
78
|
+
* reports an "out of memory" error! So hide this misleading
|
79
|
+
* error message.
|
80
|
+
*/
|
81
|
+
if (SQLITE_CANTOPEN == c->rc)
|
82
|
+
{
|
83
|
+
prt_error("Warning: Can't open database: File not found\n"
|
84
|
+
"\tWas looking for: " DBNAME);
|
85
|
+
}
|
86
|
+
else
|
87
|
+
{
|
88
|
+
prt_error("Warning: Can't open database: %s\n"
|
89
|
+
"\tWas looking for: " DBNAME,
|
90
|
+
sqlite3_errmsg(c->dbconn));
|
91
|
+
}
|
92
|
+
return c;
|
93
|
+
}
|
94
|
+
|
95
|
+
/* Now prepare the statements we plan to use */
|
96
|
+
rc = sqlite3_prepare_v2(c->dbconn,
|
97
|
+
"SELECT log_cond_probability FROM Disjuncts "
|
98
|
+
"WHERE inflected_word = ? AND disjunct = ?;",
|
99
|
+
-1, &c->rank_query, NULL);
|
100
|
+
if (rc != SQLITE_OK)
|
101
|
+
{
|
102
|
+
prt_error("Error: Can't prepare the ranking statment: %s\n",
|
103
|
+
sqlite3_errmsg(c->dbconn));
|
104
|
+
}
|
105
|
+
|
106
|
+
/* Results are returned in sorted order .. would it be faster
|
107
|
+
* to sort locally? Don't know ... */
|
108
|
+
rc = sqlite3_prepare_v2(c->dbconn,
|
109
|
+
"SELECT word_sense, log_cond_probability FROM DisjunctSenses "
|
110
|
+
"WHERE inflected_word = ? AND disjunct = ? "
|
111
|
+
"ORDER BY log_cond_probability ASC;",
|
112
|
+
-1, &c->sense_query, NULL);
|
113
|
+
if (rc != SQLITE_OK)
|
114
|
+
{
|
115
|
+
prt_error("Error: Can't prepare the sense statment: %s\n",
|
116
|
+
sqlite3_errmsg(c->dbconn));
|
117
|
+
}
|
118
|
+
|
119
|
+
prt_error("Info: Corpus statistics database found at %s\n", c->dbname);
|
120
|
+
return c;
|
121
|
+
}
|
122
|
+
|
123
|
+
/**
|
124
|
+
* lg_corpus_delete -- shut down the corpus statistics subsystem.
|
125
|
+
*/
|
126
|
+
void lg_corpus_delete(Corpus *c)
|
127
|
+
{
|
128
|
+
if (NULL == c) return;
|
129
|
+
|
130
|
+
if (c->rank_query)
|
131
|
+
{
|
132
|
+
sqlite3_finalize(c->rank_query);
|
133
|
+
c->rank_query = NULL;
|
134
|
+
}
|
135
|
+
|
136
|
+
if (c->sense_query)
|
137
|
+
{
|
138
|
+
sqlite3_finalize(c->sense_query);
|
139
|
+
c->sense_query = NULL;
|
140
|
+
}
|
141
|
+
|
142
|
+
if (c->dbconn)
|
143
|
+
{
|
144
|
+
sqlite3_close(c->dbconn);
|
145
|
+
c->dbconn = NULL;
|
146
|
+
}
|
147
|
+
|
148
|
+
if (c->dbname)
|
149
|
+
{
|
150
|
+
free(c->dbname);
|
151
|
+
c->dbname = NULL;
|
152
|
+
}
|
153
|
+
free(c);
|
154
|
+
}
|
155
|
+
|
156
|
+
/* ========================================================= */
|
157
|
+
|
158
|
+
/* LOW_SCORE is what is assumed if a disjunct-word pair is not found
|
159
|
+
* in the dictionary. It is meant to be -log_2(prob(d|w)) where
|
160
|
+
* prob(d|w) is the conditional probability of seeing the disjunct d
|
161
|
+
* given the word w. A value of 17 is about equal to 1 in 100,000.
|
162
|
+
*/
|
163
|
+
#define LOW_SCORE 17.0
|
164
|
+
|
165
|
+
/**
|
166
|
+
* get_disjunct_score -- get log probability of observing disjunt.
|
167
|
+
*
|
168
|
+
* Given an "inflected" word and a disjunct, thris routine returns the
|
169
|
+
* -log_2 conditional probability prob(d|w) of seeing the disjunct 'd'
|
170
|
+
* given that the word 'w' was observed. Here, "inflected word" means
|
171
|
+
* the link-grammar dictionary entry, complete with its trailing period
|
172
|
+
* and tag -- e.g. run.v or running.g -- everything after the dot is the
|
173
|
+
* "inflection".
|
174
|
+
*/
|
175
|
+
static double get_disjunct_score(Corpus *corp,
|
176
|
+
const char * inflected_word,
|
177
|
+
const char * disjunct)
|
178
|
+
{
|
179
|
+
double val;
|
180
|
+
int rc;
|
181
|
+
|
182
|
+
/* Look up the disjunct in the database */
|
183
|
+
rc = sqlite3_bind_text(corp->rank_query, 1,
|
184
|
+
inflected_word, -1, SQLITE_STATIC);
|
185
|
+
if (rc != SQLITE_OK)
|
186
|
+
{
|
187
|
+
prt_error("Error: SQLite can't bind word: rc=%d \n", rc);
|
188
|
+
return LOW_SCORE;
|
189
|
+
}
|
190
|
+
|
191
|
+
rc = sqlite3_bind_text(corp->rank_query, 2,
|
192
|
+
disjunct, -1, SQLITE_STATIC);
|
193
|
+
if (rc != SQLITE_OK)
|
194
|
+
{
|
195
|
+
prt_error("Error: SQLite can't bind disjunct: rc=%d \n", rc);
|
196
|
+
return LOW_SCORE;
|
197
|
+
}
|
198
|
+
|
199
|
+
rc = sqlite3_step(corp->rank_query);
|
200
|
+
if (rc != SQLITE_ROW)
|
201
|
+
{
|
202
|
+
val = LOW_SCORE;
|
203
|
+
#ifdef DEBUG
|
204
|
+
printf ("Word=%s dj=%s not found in dict, assume score=%f\n",
|
205
|
+
inflected_word, disjunct, val);
|
206
|
+
#endif
|
207
|
+
}
|
208
|
+
else
|
209
|
+
{
|
210
|
+
val = sqlite3_column_double(corp->rank_query, 0);
|
211
|
+
if (LOW_SCORE < val) val = LOW_SCORE;
|
212
|
+
#ifdef DEBUG
|
213
|
+
printf ("Word=%s dj=%s score=%f\n", inflected_word, disjunct, val);
|
214
|
+
#endif
|
215
|
+
}
|
216
|
+
|
217
|
+
/* Failure to do both a reset *and* a clear will cause subsequent
|
218
|
+
* binds tp fail. */
|
219
|
+
sqlite3_reset(corp->rank_query);
|
220
|
+
sqlite3_clear_bindings(corp->rank_query);
|
221
|
+
return val;
|
222
|
+
}
|
223
|
+
|
224
|
+
/* ========================================================= */
|
225
|
+
|
226
|
+
/**
|
227
|
+
* lg_corpus_score -- compute parse-ranking score for sentence.
|
228
|
+
*
|
229
|
+
* Given a parsed sentence, this routine will compute a parse ranking
|
230
|
+
* score, based on the probabilites of observing the indicated set of
|
231
|
+
* disjuncts in the statistics database.
|
232
|
+
*
|
233
|
+
* The score is stored in the Linkage_info->corpus_cost struct member.
|
234
|
+
*
|
235
|
+
* The score is currently computed as the average -log_2 conditional
|
236
|
+
* probability p(d|w) of observing disjunct 'd', given word 'w'.
|
237
|
+
* Lower scores are better -- they indicate more likely parses.
|
238
|
+
*/
|
239
|
+
void lg_corpus_score(Sentence sent, Linkage_info *lifo)
|
240
|
+
{
|
241
|
+
const char *infword, *djstr;
|
242
|
+
double tot_score = 0.0f;
|
243
|
+
Corpus *corp = sent->dict->corpus;
|
244
|
+
int nwords = sent->length;
|
245
|
+
int w;
|
246
|
+
|
247
|
+
/* No-op if the database is not open */
|
248
|
+
if (NULL == corp->dbconn) return;
|
249
|
+
|
250
|
+
lg_compute_disjunct_strings(sent, lifo);
|
251
|
+
|
252
|
+
/* Decrement nwords, so as to ignore the RIGHT-WALL */
|
253
|
+
nwords --;
|
254
|
+
|
255
|
+
/* Loop over each word in the sentence (skipping LEFT-WALL, which is
|
256
|
+
* word 0. */
|
257
|
+
for (w=1; w<nwords; w++)
|
258
|
+
{
|
259
|
+
Disjunct *disj = sent->parse_info->chosen_disjuncts[w];
|
260
|
+
|
261
|
+
/* disj is NULL if word did not participate in parse */
|
262
|
+
if (NULL == disj)
|
263
|
+
{
|
264
|
+
tot_score += LOW_SCORE;
|
265
|
+
continue;
|
266
|
+
}
|
267
|
+
infword = disj->string;
|
268
|
+
djstr = lifo->disjunct_list_str[w];
|
269
|
+
tot_score += get_disjunct_score(corp, infword, djstr);
|
270
|
+
}
|
271
|
+
|
272
|
+
/* Decrement nwords, so as to ignore the LEFT-WALL */
|
273
|
+
--nwords;
|
274
|
+
tot_score /= nwords;
|
275
|
+
lifo->corpus_cost = tot_score;
|
276
|
+
}
|
277
|
+
|
278
|
+
double lg_corpus_disjunct_score(Linkage linkage, int w)
|
279
|
+
{
|
280
|
+
double score;
|
281
|
+
const char *infword, *djstr;
|
282
|
+
Sentence sent = linkage->sent;
|
283
|
+
Linkage_info *lifo = linkage->info;
|
284
|
+
Corpus *corp = sent->dict->corpus;
|
285
|
+
Disjunct *disj;
|
286
|
+
|
287
|
+
/* No-op if the database is not open */
|
288
|
+
if (NULL == corp->dbconn) return LOW_SCORE;
|
289
|
+
|
290
|
+
/* disj is NULL if word did not participate in parse */
|
291
|
+
disj = sent->parse_info->chosen_disjuncts[w];
|
292
|
+
if (NULL == disj) return LOW_SCORE;
|
293
|
+
|
294
|
+
lg_compute_disjunct_strings(sent, lifo);
|
295
|
+
|
296
|
+
infword = disj->string;
|
297
|
+
djstr = lifo->disjunct_list_str[w];
|
298
|
+
score = get_disjunct_score(corp, infword, djstr);
|
299
|
+
|
300
|
+
return score;
|
301
|
+
}
|
302
|
+
|
303
|
+
/* ========================================================= */
|
304
|
+
|
305
|
+
/**
|
306
|
+
* lg_corpus_senses -- Given word and disjunct, look up senses.
|
307
|
+
*
|
308
|
+
* Given a particular disjunct for a word, look up its most
|
309
|
+
* likely sense assignments from the database.
|
310
|
+
*/
|
311
|
+
|
312
|
+
static Sense * lg_corpus_senses(Corpus *corp,
|
313
|
+
const char * inflected_word,
|
314
|
+
const char * disjunct,
|
315
|
+
int wrd)
|
316
|
+
{
|
317
|
+
double log_prob;
|
318
|
+
const unsigned char *sense;
|
319
|
+
Sense *sns, *head = NULL;
|
320
|
+
int rc;
|
321
|
+
|
322
|
+
/* Look up the disjunct in the database */
|
323
|
+
rc = sqlite3_bind_text(corp->sense_query, 1,
|
324
|
+
inflected_word, -1, SQLITE_STATIC);
|
325
|
+
if (rc != SQLITE_OK)
|
326
|
+
{
|
327
|
+
prt_error("Error: SQLite can't bind word in sense query: rc=%d \n", rc);
|
328
|
+
return NULL;
|
329
|
+
}
|
330
|
+
|
331
|
+
rc = sqlite3_bind_text(corp->sense_query, 2,
|
332
|
+
disjunct, -1, SQLITE_STATIC);
|
333
|
+
if (rc != SQLITE_OK)
|
334
|
+
{
|
335
|
+
prt_error("Error: SQLite can't bind disjunct in sense query: rc=%d \n", rc);
|
336
|
+
return NULL;
|
337
|
+
}
|
338
|
+
|
339
|
+
rc = sqlite3_step(corp->sense_query);
|
340
|
+
while (SQLITE_ROW == rc)
|
341
|
+
{
|
342
|
+
sense = sqlite3_column_text(corp->sense_query, 0);
|
343
|
+
log_prob = sqlite3_column_double(corp->sense_query, 1);
|
344
|
+
// printf ("Word=%s dj=%s sense=%s score=%f\n",
|
345
|
+
// inflected_word, disjunct, sense, log_prob);
|
346
|
+
|
347
|
+
sns = (Sense *) malloc(sizeof(Sense));
|
348
|
+
sns->next = head;
|
349
|
+
head = sns;
|
350
|
+
|
351
|
+
sns->inflected_word = inflected_word;
|
352
|
+
sns->disjunct = disjunct;
|
353
|
+
sns->sense = strdup(sense);
|
354
|
+
sns->score = log_prob;
|
355
|
+
sns->word = wrd;
|
356
|
+
|
357
|
+
/* Get the next row, if any */
|
358
|
+
rc = sqlite3_step(corp->sense_query);
|
359
|
+
}
|
360
|
+
|
361
|
+
/* Failure to do both a reset *and* a clear will cause subsequent
|
362
|
+
* binds tp fail. */
|
363
|
+
sqlite3_reset(corp->sense_query);
|
364
|
+
sqlite3_clear_bindings(corp->sense_query);
|
365
|
+
|
366
|
+
return head;
|
367
|
+
}
|
368
|
+
|
369
|
+
/* ========================================================= */
|
370
|
+
|
371
|
+
/**
|
372
|
+
* lg_corpus_linkage_senses -- Given a linkage, look up senses.
|
373
|
+
*
|
374
|
+
* Given a particular linakge, look up the most likely sense
|
375
|
+
* assignments from the database.
|
376
|
+
*
|
377
|
+
* This function is not used to guide the parsing process; it is
|
378
|
+
* only an informational look-up.
|
379
|
+
*/
|
380
|
+
|
381
|
+
void lg_corpus_linkage_senses(Linkage linkage)
|
382
|
+
{
|
383
|
+
const char * infword;
|
384
|
+
Sentence sent = linkage->sent;
|
385
|
+
Dictionary dict = sent->dict;
|
386
|
+
Corpus *corp = dict->corpus;
|
387
|
+
int nwords = sent->length;
|
388
|
+
Linkage_info *lifo = linkage->info;
|
389
|
+
int w;
|
390
|
+
|
391
|
+
if (lifo->sense_list) return;
|
392
|
+
|
393
|
+
/* Set up the disjunct strings first */
|
394
|
+
lg_compute_disjunct_strings(sent, lifo);
|
395
|
+
|
396
|
+
lifo->nwords = nwords;
|
397
|
+
lifo->sense_list = (Sense **) malloc(nwords * sizeof (Sense *));
|
398
|
+
memset(lifo->sense_list, 0, nwords * sizeof (Sense *));
|
399
|
+
|
400
|
+
/* Decrement nwords, so as to ignore the RIGHT-WALL */
|
401
|
+
nwords --;
|
402
|
+
|
403
|
+
/* Loop over each word in the sentence (skipping LEFT-WALL, which is
|
404
|
+
* word 0. */
|
405
|
+
for (w=1; w<nwords; w++)
|
406
|
+
{
|
407
|
+
Disjunct *disj = sent->parse_info->chosen_disjuncts[w];
|
408
|
+
|
409
|
+
/* disj is NULL if word did not participate in parse */
|
410
|
+
if (NULL == disj)
|
411
|
+
{
|
412
|
+
continue;
|
413
|
+
}
|
414
|
+
infword = disj->string;
|
415
|
+
|
416
|
+
lifo->sense_list[w] = lg_corpus_senses(corp, infword,
|
417
|
+
lifo->disjunct_list_str[w], w);
|
418
|
+
}
|
419
|
+
}
|
420
|
+
|
421
|
+
/* ========================================================= */
|
422
|
+
/* Return bits and pieces of the sense assignments */
|
423
|
+
|
424
|
+
Sense * lg_get_word_sense(Linkage_info *lifo, int word)
|
425
|
+
{
|
426
|
+
if (!lifo->sense_list) return NULL;
|
427
|
+
if (lifo->nwords <= word) return NULL;
|
428
|
+
return lifo->sense_list[word];
|
429
|
+
}
|
430
|
+
|
431
|
+
Sense * lg_sense_next(Sense *sns)
|
432
|
+
{
|
433
|
+
return sns->next;
|
434
|
+
}
|
435
|
+
|
436
|
+
int lg_sense_get_index(Sense *sns)
|
437
|
+
{
|
438
|
+
return sns->word;
|
439
|
+
}
|
440
|
+
|
441
|
+
const char * lg_sense_get_subscripted_word(Sense *sns)
|
442
|
+
{
|
443
|
+
return sns->inflected_word;
|
444
|
+
}
|
445
|
+
|
446
|
+
const char * lg_sense_get_disjunct(Sense *sns)
|
447
|
+
{
|
448
|
+
return sns->disjunct;
|
449
|
+
}
|
450
|
+
|
451
|
+
const char * lg_sense_get_sense(Sense *sns)
|
452
|
+
{
|
453
|
+
return sns->sense;
|
454
|
+
}
|
455
|
+
|
456
|
+
double lg_sense_get_score(Sense *sns)
|
457
|
+
{
|
458
|
+
return sns->score;
|
459
|
+
}
|
460
|
+
|
461
|
+
void lg_sense_delete(Linkage_info *lifo)
|
462
|
+
{
|
463
|
+
size_t nwords = lifo->nwords;
|
464
|
+
size_t w;
|
465
|
+
|
466
|
+
if (NULL == lifo->sense_list) return;
|
467
|
+
|
468
|
+
for (w=0; w<nwords; w++)
|
469
|
+
{
|
470
|
+
Sense *sns = lifo->sense_list[w];
|
471
|
+
while (sns)
|
472
|
+
{
|
473
|
+
Sense * nxt = sns->next;
|
474
|
+
free(sns->sense);
|
475
|
+
free(sns);
|
476
|
+
sns = nxt;
|
477
|
+
}
|
478
|
+
}
|
479
|
+
free (lifo->sense_list);
|
480
|
+
lifo->sense_list = NULL;
|
481
|
+
}
|
482
|
+
|
483
|
+
/* ======================= END OF FILE ===================== */
|