grammar_cop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,849 @@
|
|
1
|
+
/***************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* Copyright (c) 2008 Linas Vepstas */
|
5
|
+
/* All rights reserved */
|
6
|
+
/* */
|
7
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
8
|
+
/* license set forth in the LICENSE file included with this software, */
|
9
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
10
|
+
/* This license allows free redistribution and use in source and binary */
|
11
|
+
/* forms, with or without modification, subject to certain conditions. */
|
12
|
+
/* */
|
13
|
+
/***************************************************************************/
|
14
|
+
|
15
|
+
/****************************************************************************
|
16
|
+
*
|
17
|
+
* This is a simple example of the link parser API. It similates most of
|
18
|
+
* the functionality of the original link grammar parser, allowing sentences
|
19
|
+
* to be typed in either interactively or in "batch" mode (if -batch is
|
20
|
+
* specified on the command line, and stdin is redirected to a file).
|
21
|
+
* The program:
|
22
|
+
* Opens up a dictionary
|
23
|
+
* Iterates:
|
24
|
+
* 1. Reads from stdin to get an input string to parse
|
25
|
+
* 2. Tokenizes the string to form a Sentence
|
26
|
+
* 3. Tries to parse it with cost 0
|
27
|
+
* 4. Tries to parse with increasing cost
|
28
|
+
* When a parse is found:
|
29
|
+
* 1. Extracts each Linkage
|
30
|
+
* 2. Passes it to process_some_linkages()
|
31
|
+
* 3. Deletes linkage
|
32
|
+
* After parsing each Sentence is deleted by making a call to
|
33
|
+
* sentence_delete.
|
34
|
+
*
|
35
|
+
****************************************************************************/
|
36
|
+
|
37
|
+
#include <errno.h>
|
38
|
+
#include <locale.h>
|
39
|
+
#include <stdio.h>
|
40
|
+
#include <stdlib.h>
|
41
|
+
#include <string.h>
|
42
|
+
#include <wchar.h>
|
43
|
+
|
44
|
+
/* Used for terminal resizing */
|
45
|
+
#ifndef _WIN32
|
46
|
+
#include <termios.h>
|
47
|
+
#include <sys/ioctl.h>
|
48
|
+
#include <fcntl.h>
|
49
|
+
#include <unistd.h>
|
50
|
+
#endif
|
51
|
+
|
52
|
+
#ifdef HAVE_EDITLINE
|
53
|
+
#include <editline/readline.h>
|
54
|
+
#endif
|
55
|
+
|
56
|
+
#ifdef _MSC_VER
|
57
|
+
#define LINK_GRAMMAR_DLL_EXPORT 0
|
58
|
+
#endif
|
59
|
+
|
60
|
+
#include <link-grammar/link-includes.h>
|
61
|
+
#include <link-grammar/structures.h>
|
62
|
+
#include <link-grammar/error.h>
|
63
|
+
|
64
|
+
#include "command-line.h"
|
65
|
+
#include "expand.h"
|
66
|
+
|
67
|
+
#define MAX_INPUT 1024
|
68
|
+
#define DISPLAY_MAX 1024
|
69
|
+
#define COMMENT_CHAR '%' /* input lines beginning with this are ignored */
|
70
|
+
|
71
|
+
static int batch_errors = 0;
|
72
|
+
static int input_pending=FALSE;
|
73
|
+
static Parse_Options opts;
|
74
|
+
static Parse_Options panic_parse_opts;
|
75
|
+
static int verbosity = 0;
|
76
|
+
|
77
|
+
typedef enum
|
78
|
+
{
|
79
|
+
UNGRAMMATICAL='*',
|
80
|
+
PARSE_WITH_DISJUNCT_COST_GT_0=':', /* Not used anywhere, currently ... */
|
81
|
+
NO_LABEL=' '
|
82
|
+
} Label;
|
83
|
+
|
84
|
+
static char *
|
85
|
+
fget_input_string(FILE *in, FILE *out, Parse_Options opts)
|
86
|
+
{
|
87
|
+
#ifdef HAVE_EDITLINE
|
88
|
+
static char * pline = NULL;
|
89
|
+
const char * prompt = "linkparser> ";
|
90
|
+
|
91
|
+
if (in != stdin)
|
92
|
+
{
|
93
|
+
static char input_string[MAX_INPUT];
|
94
|
+
input_pending = FALSE;
|
95
|
+
if (fgets(input_string, MAX_INPUT, in)) return input_string;
|
96
|
+
return NULL;
|
97
|
+
}
|
98
|
+
|
99
|
+
if (input_pending && pline != NULL)
|
100
|
+
{
|
101
|
+
input_pending = FALSE;
|
102
|
+
return pline;
|
103
|
+
}
|
104
|
+
if (parse_options_get_batch_mode(opts) ||
|
105
|
+
(verbosity == 0) ||
|
106
|
+
input_pending)
|
107
|
+
{
|
108
|
+
prompt = "";
|
109
|
+
}
|
110
|
+
input_pending = FALSE;
|
111
|
+
if (pline) free(pline);
|
112
|
+
pline = readline(prompt);
|
113
|
+
|
114
|
+
/* Save non-blank lines */
|
115
|
+
if (pline && *pline)
|
116
|
+
{
|
117
|
+
if (*pline) add_history(pline);
|
118
|
+
}
|
119
|
+
return pline;
|
120
|
+
|
121
|
+
#else
|
122
|
+
static char input_string[MAX_INPUT];
|
123
|
+
|
124
|
+
if ((!parse_options_get_batch_mode(opts)) &&
|
125
|
+
(verbosity > 0) &&
|
126
|
+
(!input_pending))
|
127
|
+
{
|
128
|
+
fprintf(out, "linkparser> ");
|
129
|
+
fflush(out);
|
130
|
+
}
|
131
|
+
input_pending = FALSE;
|
132
|
+
|
133
|
+
/* For UTF-8 input, I think its still technically correct to
|
134
|
+
* use fgets() and not fgetws() at this point. */
|
135
|
+
if (fgets(input_string, MAX_INPUT, in)) return input_string;
|
136
|
+
else return NULL;
|
137
|
+
#endif
|
138
|
+
}
|
139
|
+
|
140
|
+
static int fget_input_char(FILE *in, FILE *out, Parse_Options opts)
|
141
|
+
{
|
142
|
+
#ifdef HAVE_EDITLINE
|
143
|
+
char * pline = fget_input_string(in, out, opts);
|
144
|
+
if (NULL == pline) return EOF;
|
145
|
+
if (*pline)
|
146
|
+
{
|
147
|
+
input_pending = TRUE;
|
148
|
+
return *pline;
|
149
|
+
}
|
150
|
+
return '\n';
|
151
|
+
|
152
|
+
#else
|
153
|
+
int c;
|
154
|
+
|
155
|
+
if (!parse_options_get_batch_mode(opts) && (verbosity > 0))
|
156
|
+
fprintf(out, "linkparser> ");
|
157
|
+
fflush(out);
|
158
|
+
|
159
|
+
/* For UTF-8 input, I think its still technically correct to
|
160
|
+
* use fgetc() and not fgetwc() at this point. */
|
161
|
+
c = fgetc(in);
|
162
|
+
if (c != '\n')
|
163
|
+
{
|
164
|
+
ungetc(c, in);
|
165
|
+
input_pending = TRUE;
|
166
|
+
}
|
167
|
+
return c;
|
168
|
+
#endif
|
169
|
+
}
|
170
|
+
|
171
|
+
/**************************************************************************
|
172
|
+
*
|
173
|
+
* This procedure displays a linkage graphically. Since the diagrams
|
174
|
+
* are passed as character strings, they need to be deleted with a
|
175
|
+
* call to free.
|
176
|
+
*
|
177
|
+
**************************************************************************/
|
178
|
+
|
179
|
+
static void process_linkage(Linkage linkage, Parse_Options opts)
|
180
|
+
{
|
181
|
+
char * string;
|
182
|
+
int j, mode, first_sublinkage;
|
183
|
+
int nlink;
|
184
|
+
|
185
|
+
if (!linkage) return; /* Can happen in timeout mode */
|
186
|
+
|
187
|
+
if (parse_options_get_use_fat_links(opts) &&
|
188
|
+
parse_options_get_display_union(opts))
|
189
|
+
{
|
190
|
+
linkage_compute_union(linkage);
|
191
|
+
first_sublinkage = linkage_get_num_sublinkages(linkage)-1;
|
192
|
+
}
|
193
|
+
else
|
194
|
+
{
|
195
|
+
first_sublinkage = 0;
|
196
|
+
}
|
197
|
+
|
198
|
+
nlink = linkage_get_num_sublinkages(linkage);
|
199
|
+
for (j=first_sublinkage; j<nlink; ++j)
|
200
|
+
{
|
201
|
+
linkage_set_current_sublinkage(linkage, j);
|
202
|
+
if (parse_options_get_display_on(opts))
|
203
|
+
{
|
204
|
+
string = linkage_print_diagram(linkage);
|
205
|
+
fprintf(stdout, "%s", string);
|
206
|
+
linkage_free_diagram(string);
|
207
|
+
}
|
208
|
+
if (parse_options_get_display_links(opts))
|
209
|
+
{
|
210
|
+
string = linkage_print_links_and_domains(linkage);
|
211
|
+
fprintf(stdout, "%s", string);
|
212
|
+
linkage_free_links_and_domains(string);
|
213
|
+
}
|
214
|
+
if (parse_options_get_display_senses(opts))
|
215
|
+
{
|
216
|
+
string = linkage_print_senses(linkage);
|
217
|
+
fprintf(stdout, "%s", string);
|
218
|
+
linkage_free_senses(string);
|
219
|
+
}
|
220
|
+
if (parse_options_get_display_disjuncts(opts))
|
221
|
+
{
|
222
|
+
string = linkage_print_disjuncts(linkage);
|
223
|
+
fprintf(stdout, "%s", string);
|
224
|
+
linkage_free_disjuncts(string);
|
225
|
+
}
|
226
|
+
if (parse_options_get_display_postscript(opts))
|
227
|
+
{
|
228
|
+
string = linkage_print_postscript(linkage, FALSE);
|
229
|
+
fprintf(stdout, "%s\n", string);
|
230
|
+
linkage_free_postscript(string);
|
231
|
+
}
|
232
|
+
}
|
233
|
+
if ((mode = parse_options_get_display_constituents(opts)))
|
234
|
+
{
|
235
|
+
string = linkage_print_constituent_tree(linkage, mode);
|
236
|
+
if (string != NULL)
|
237
|
+
{
|
238
|
+
fprintf(stdout, "%s\n", string);
|
239
|
+
linkage_free_constituent_tree_str(string);
|
240
|
+
}
|
241
|
+
else
|
242
|
+
{
|
243
|
+
fprintf(stderr, "Can't generate constituents.\n");
|
244
|
+
fprintf(stderr, "Constituent processing has been turned off.\n");
|
245
|
+
}
|
246
|
+
}
|
247
|
+
}
|
248
|
+
|
249
|
+
static void print_parse_statistics(Sentence sent, Parse_Options opts)
|
250
|
+
{
|
251
|
+
if (sentence_num_linkages_found(sent) > 0)
|
252
|
+
{
|
253
|
+
if (sentence_num_linkages_found(sent) >
|
254
|
+
parse_options_get_linkage_limit(opts))
|
255
|
+
{
|
256
|
+
fprintf(stdout, "Found %d linkage%s (%d of %d random " \
|
257
|
+
"linkages had no P.P. violations)",
|
258
|
+
sentence_num_linkages_found(sent),
|
259
|
+
sentence_num_linkages_found(sent) == 1 ? "" : "s",
|
260
|
+
sentence_num_valid_linkages(sent),
|
261
|
+
sentence_num_linkages_post_processed(sent));
|
262
|
+
}
|
263
|
+
else
|
264
|
+
{
|
265
|
+
fprintf(stdout, "Found %d linkage%s (%d had no P.P. violations)",
|
266
|
+
sentence_num_linkages_post_processed(sent),
|
267
|
+
sentence_num_linkages_found(sent) == 1 ? "" : "s",
|
268
|
+
sentence_num_valid_linkages(sent));
|
269
|
+
}
|
270
|
+
if (sentence_null_count(sent) > 0)
|
271
|
+
{
|
272
|
+
fprintf(stdout, " at null count %d", sentence_null_count(sent));
|
273
|
+
}
|
274
|
+
fprintf(stdout, "\n");
|
275
|
+
}
|
276
|
+
}
|
277
|
+
|
278
|
+
|
279
|
+
static int process_some_linkages(Sentence sent, Parse_Options opts)
|
280
|
+
{
|
281
|
+
int c;
|
282
|
+
int i, num_to_query, num_to_display, num_displayed;
|
283
|
+
Linkage linkage;
|
284
|
+
double corpus_cost;
|
285
|
+
|
286
|
+
if (verbosity > 0) print_parse_statistics(sent, opts);
|
287
|
+
num_to_query = MIN(sentence_num_linkages_post_processed(sent),
|
288
|
+
DISPLAY_MAX);
|
289
|
+
if (!parse_options_get_display_bad(opts))
|
290
|
+
{
|
291
|
+
num_to_display = MIN(sentence_num_valid_linkages(sent),
|
292
|
+
DISPLAY_MAX);
|
293
|
+
}
|
294
|
+
else
|
295
|
+
{
|
296
|
+
num_to_display = MIN(sentence_num_linkages_post_processed(sent),
|
297
|
+
DISPLAY_MAX);
|
298
|
+
}
|
299
|
+
|
300
|
+
for (i=0, num_displayed=0; i<num_to_query; i++)
|
301
|
+
{
|
302
|
+
if ((sentence_num_violations(sent, i) > 0) &&
|
303
|
+
(!parse_options_get_display_bad(opts)))
|
304
|
+
{
|
305
|
+
continue;
|
306
|
+
}
|
307
|
+
|
308
|
+
linkage = linkage_create(i, sent, opts);
|
309
|
+
|
310
|
+
/* Currently, sat solver returns NULL when there ain't no more */
|
311
|
+
if (!linkage) break;
|
312
|
+
|
313
|
+
if (verbosity > 0)
|
314
|
+
{
|
315
|
+
if ((sentence_num_valid_linkages(sent) == 1) &&
|
316
|
+
(!parse_options_get_display_bad(opts)))
|
317
|
+
{
|
318
|
+
fprintf(stdout, " Unique linkage, ");
|
319
|
+
}
|
320
|
+
else if ((parse_options_get_display_bad(opts)) &&
|
321
|
+
(sentence_num_violations(sent, i) > 0))
|
322
|
+
{
|
323
|
+
fprintf(stdout, " Linkage %d (bad), ", num_displayed+1);
|
324
|
+
}
|
325
|
+
else
|
326
|
+
{
|
327
|
+
fprintf(stdout, " Linkage %d, ", num_displayed+1);
|
328
|
+
}
|
329
|
+
|
330
|
+
if (!linkage_is_canonical(linkage))
|
331
|
+
{
|
332
|
+
fprintf(stdout, "non-canonical, ");
|
333
|
+
}
|
334
|
+
if (linkage_is_improper(linkage))
|
335
|
+
{
|
336
|
+
fprintf(stdout, "improper fat linkage, ");
|
337
|
+
}
|
338
|
+
if (linkage_has_inconsistent_domains(linkage))
|
339
|
+
{
|
340
|
+
fprintf(stdout, "inconsistent domains, ");
|
341
|
+
}
|
342
|
+
|
343
|
+
corpus_cost = linkage_corpus_cost(linkage);
|
344
|
+
if (corpus_cost < 0.0f)
|
345
|
+
{
|
346
|
+
fprintf(stdout, "cost vector = (UNUSED=%d DIS=%d FAT=%d AND=%d LEN=%d)\n",
|
347
|
+
linkage_unused_word_cost(linkage),
|
348
|
+
linkage_disjunct_cost(linkage),
|
349
|
+
linkage_is_fat(linkage),
|
350
|
+
linkage_and_cost(linkage),
|
351
|
+
linkage_link_cost(linkage));
|
352
|
+
}
|
353
|
+
else
|
354
|
+
{
|
355
|
+
fprintf(stdout, "cost vector = (CORP=%6.4f UNUSED=%d DIS=%d FAT=%d AND=%d LEN=%d)\n",
|
356
|
+
corpus_cost,
|
357
|
+
linkage_unused_word_cost(linkage),
|
358
|
+
linkage_disjunct_cost(linkage),
|
359
|
+
linkage_is_fat(linkage),
|
360
|
+
linkage_and_cost(linkage),
|
361
|
+
linkage_link_cost(linkage));
|
362
|
+
}
|
363
|
+
}
|
364
|
+
|
365
|
+
process_linkage(linkage, opts);
|
366
|
+
linkage_delete(linkage);
|
367
|
+
|
368
|
+
if (++num_displayed < num_to_display)
|
369
|
+
{
|
370
|
+
if (verbosity > 0)
|
371
|
+
{
|
372
|
+
fprintf(stdout, "Press RETURN for the next linkage.\n");
|
373
|
+
}
|
374
|
+
c = fget_input_char(stdin, stdout, opts);
|
375
|
+
if (c != '\n') return c;
|
376
|
+
}
|
377
|
+
else
|
378
|
+
{
|
379
|
+
break;
|
380
|
+
}
|
381
|
+
}
|
382
|
+
return 'x';
|
383
|
+
}
|
384
|
+
|
385
|
+
static int there_was_an_error(Label label, Sentence sent, Parse_Options opts)
|
386
|
+
{
|
387
|
+
if (sentence_num_valid_linkages(sent) > 0) {
|
388
|
+
if (label == UNGRAMMATICAL) {
|
389
|
+
batch_errors++;
|
390
|
+
return UNGRAMMATICAL;
|
391
|
+
}
|
392
|
+
if ((sentence_disjunct_cost(sent, 0) == 0) &&
|
393
|
+
(label == PARSE_WITH_DISJUNCT_COST_GT_0)) {
|
394
|
+
batch_errors++;
|
395
|
+
return PARSE_WITH_DISJUNCT_COST_GT_0;
|
396
|
+
}
|
397
|
+
} else {
|
398
|
+
if (label != UNGRAMMATICAL) {
|
399
|
+
batch_errors++;
|
400
|
+
return UNGRAMMATICAL;
|
401
|
+
}
|
402
|
+
}
|
403
|
+
return FALSE;
|
404
|
+
}
|
405
|
+
|
406
|
+
static void batch_process_some_linkages(Label label,
|
407
|
+
Sentence sent,
|
408
|
+
Parse_Options opts)
|
409
|
+
{
|
410
|
+
Linkage linkage;
|
411
|
+
|
412
|
+
if (there_was_an_error(label, sent, opts)) {
|
413
|
+
/* Note: sentence_num_linkages_found returns total linkages
|
414
|
+
* not valid linkages. So the printed linkage might be bad...
|
415
|
+
*/
|
416
|
+
if (sentence_num_linkages_found(sent) > 0) {
|
417
|
+
linkage = linkage_create(0, sent, opts);
|
418
|
+
process_linkage(linkage, opts);
|
419
|
+
linkage_delete(linkage);
|
420
|
+
}
|
421
|
+
fprintf(stdout, "+++++ error %d\n", batch_errors);
|
422
|
+
}
|
423
|
+
}
|
424
|
+
|
425
|
+
static int special_command(char *input_string, Dictionary dict)
|
426
|
+
{
|
427
|
+
if (input_string[0] == '\n') return TRUE;
|
428
|
+
if (input_string[0] == COMMENT_CHAR) return TRUE;
|
429
|
+
if (input_string[0] == '!') {
|
430
|
+
if (strncmp(input_string, "!panic_", 7) == 0)
|
431
|
+
{
|
432
|
+
issue_special_command(input_string+7, panic_parse_opts, dict);
|
433
|
+
return TRUE;
|
434
|
+
}
|
435
|
+
|
436
|
+
issue_special_command(input_string+1, opts, dict);
|
437
|
+
return TRUE;
|
438
|
+
}
|
439
|
+
return FALSE;
|
440
|
+
}
|
441
|
+
|
442
|
+
static Label strip_off_label(char * input_string)
|
443
|
+
{
|
444
|
+
Label c;
|
445
|
+
|
446
|
+
c = (Label) input_string[0];
|
447
|
+
switch(c) {
|
448
|
+
case UNGRAMMATICAL:
|
449
|
+
case PARSE_WITH_DISJUNCT_COST_GT_0:
|
450
|
+
input_string[0] = ' ';
|
451
|
+
return c;
|
452
|
+
case NO_LABEL:
|
453
|
+
default:
|
454
|
+
return NO_LABEL;
|
455
|
+
}
|
456
|
+
}
|
457
|
+
|
458
|
+
static void setup_panic_parse_options(Parse_Options opts)
|
459
|
+
{
|
460
|
+
parse_options_set_disjunct_costf(opts, 3.0f);
|
461
|
+
parse_options_set_min_null_count(opts, 1);
|
462
|
+
parse_options_set_max_null_count(opts, MAX_SENTENCE);
|
463
|
+
parse_options_set_max_parse_time(opts, 60);
|
464
|
+
parse_options_set_use_fat_links(opts, 0);
|
465
|
+
parse_options_set_islands_ok(opts, 1);
|
466
|
+
parse_options_set_short_length(opts, 6);
|
467
|
+
parse_options_set_all_short_connectors(opts, 1);
|
468
|
+
parse_options_set_linkage_limit(opts, 100);
|
469
|
+
parse_options_set_spell_guess(opts, FALSE);
|
470
|
+
}
|
471
|
+
|
472
|
+
static void print_usage(char *str) {
|
473
|
+
fprintf(stderr,
|
474
|
+
"Usage: %s [language|dictionary location]\n"
|
475
|
+
" [-<special \"!\" command>]\n"
|
476
|
+
" [--version]\n", str);
|
477
|
+
|
478
|
+
fprintf(stderr, "\nSpecial commands are:\n");
|
479
|
+
opts = parse_options_create();
|
480
|
+
issue_special_command("var", opts, NULL);
|
481
|
+
exit(-1);
|
482
|
+
}
|
483
|
+
|
484
|
+
/**
|
485
|
+
* On Unix, this checks for the current window size,
|
486
|
+
* and sets the output screen width accordingly.
|
487
|
+
* Not sure how MS Windows does this.
|
488
|
+
*/
|
489
|
+
static void check_winsize(Parse_Options popts)
|
490
|
+
{
|
491
|
+
/* Neither windows nor MSYS have the ioctl support needed for this. */
|
492
|
+
#ifdef _WIN32
|
493
|
+
parse_options_set_screen_width(popts, 79);
|
494
|
+
#else
|
495
|
+
struct winsize ws;
|
496
|
+
int fd = open("/dev/tty", O_RDWR);
|
497
|
+
|
498
|
+
if (0 != ioctl(fd, TIOCGWINSZ, &ws))
|
499
|
+
{
|
500
|
+
perror("ioctl(/dev/tty, TIOCGWINSZ)");
|
501
|
+
close(fd);
|
502
|
+
return;
|
503
|
+
}
|
504
|
+
close(fd);
|
505
|
+
|
506
|
+
/* printf("rows %i\n", ws.ws_row); */
|
507
|
+
/* printf("cols %i\n", ws.ws_col); */
|
508
|
+
|
509
|
+
/* Set the screen width only if the returned value seems
|
510
|
+
* rational: its positive and not insanely tiny.
|
511
|
+
*/
|
512
|
+
if ((10 < ws.ws_col) && (16123 > ws.ws_col))
|
513
|
+
{
|
514
|
+
parse_options_set_screen_width(popts, ws.ws_col - 1);
|
515
|
+
}
|
516
|
+
#endif /* _WIN32 */
|
517
|
+
}
|
518
|
+
|
519
|
+
int main(int argc, char * argv[])
|
520
|
+
{
|
521
|
+
FILE *input_fh = stdin;
|
522
|
+
Dictionary dict;
|
523
|
+
const char *language="en"; /* default to english, and not locale */
|
524
|
+
int num_linkages, i;
|
525
|
+
Label label = NO_LABEL;
|
526
|
+
const char *codeset;
|
527
|
+
|
528
|
+
#if LATER
|
529
|
+
/* Try to catch the SIGNWINCH ... except this is not working. */
|
530
|
+
struct sigaction winch_act;
|
531
|
+
winch_act.sa_handler = winch_handler;
|
532
|
+
winch_act.sa_sigaction = NULL;
|
533
|
+
sigemptyset (&winch_act.sa_mask);
|
534
|
+
winch_act.sa_flags = 0;
|
535
|
+
sigaction (SIGWINCH, &winch_act, NULL);
|
536
|
+
#endif
|
537
|
+
|
538
|
+
i = 1;
|
539
|
+
if ((argc > 1) && (argv[1][0] != '-')) {
|
540
|
+
/* the dictionary is the first argument if it doesn't begin with "-" */
|
541
|
+
language = argv[1];
|
542
|
+
i++;
|
543
|
+
}
|
544
|
+
|
545
|
+
/* Get the locale from the environment...
|
546
|
+
* perhaps we should someday get it from the dictionary ??
|
547
|
+
*/
|
548
|
+
setlocale(LC_ALL, "");
|
549
|
+
|
550
|
+
/* Check to make sure the current locale is UTF8; if its not,
|
551
|
+
* then force-set this to the english utf8 locale
|
552
|
+
*/
|
553
|
+
codeset = nl_langinfo(CODESET);
|
554
|
+
if (!strstr(codeset, "UTF") && !strstr(codeset, "utf"))
|
555
|
+
{
|
556
|
+
fprintf(stderr,
|
557
|
+
"%s: Warning: locale %s was not UTF-8; force-setting to en_US.UTF-8\n",
|
558
|
+
argv[0], codeset);
|
559
|
+
setlocale(LC_CTYPE, "en_US.UTF-8");
|
560
|
+
}
|
561
|
+
|
562
|
+
for (; i<argc; i++)
|
563
|
+
{
|
564
|
+
if (argv[i][0] == '-')
|
565
|
+
{
|
566
|
+
if (strcmp("--version", argv[i])==0)
|
567
|
+
{
|
568
|
+
printf("Version: %s\n", linkgrammar_get_version());
|
569
|
+
exit(0);
|
570
|
+
}
|
571
|
+
/* TBD remove these in version 5.0 */
|
572
|
+
else if ((strcmp("-ppoff", argv[i])==0) ||
|
573
|
+
(strcmp("-coff", argv[i])==0) ||
|
574
|
+
(strcmp("-aoff", argv[i])==0))
|
575
|
+
{
|
576
|
+
fprintf(stderr, "%s: Warning: %s flag ignored\n", argv[0], argv[i]);
|
577
|
+
}
|
578
|
+
}
|
579
|
+
else
|
580
|
+
{
|
581
|
+
print_usage(argv[0]);
|
582
|
+
}
|
583
|
+
}
|
584
|
+
|
585
|
+
opts = parse_options_create();
|
586
|
+
if (opts == NULL)
|
587
|
+
{
|
588
|
+
fprintf(stderr, "%s: Fatal error: unable to create parse options\n", argv[0]);
|
589
|
+
exit(-1);
|
590
|
+
}
|
591
|
+
|
592
|
+
panic_parse_opts = parse_options_create();
|
593
|
+
if (panic_parse_opts == NULL)
|
594
|
+
{
|
595
|
+
fprintf(stderr, "%s: Fatal error: unable to create panic parse options\n", argv[0]);
|
596
|
+
exit(-1);
|
597
|
+
}
|
598
|
+
setup_panic_parse_options(panic_parse_opts);
|
599
|
+
parse_options_set_max_sentence_length(opts, 170);
|
600
|
+
parse_options_set_panic_mode(opts, TRUE);
|
601
|
+
parse_options_set_max_parse_time(opts, 30);
|
602
|
+
parse_options_set_linkage_limit(opts, 1000);
|
603
|
+
parse_options_set_short_length(opts, 10);
|
604
|
+
parse_options_set_disjunct_costf(opts, 2.0f);
|
605
|
+
parse_options_set_min_null_count(opts, 0);
|
606
|
+
parse_options_set_max_null_count(opts, 0);
|
607
|
+
|
608
|
+
if (language && *language)
|
609
|
+
dict = dictionary_create_lang(language);
|
610
|
+
else
|
611
|
+
dict = dictionary_create_default_lang();
|
612
|
+
|
613
|
+
if (dict == NULL)
|
614
|
+
{
|
615
|
+
fprintf(stderr, "%s: Fatal error: Unable to open dictionary.\n", argv[0]);
|
616
|
+
exit(-1);
|
617
|
+
}
|
618
|
+
|
619
|
+
/* Process the command line like commands */
|
620
|
+
for (i=1; i<argc; i++)
|
621
|
+
{
|
622
|
+
/* TBD remove these in version 5.0 */
|
623
|
+
if ((strcmp("-pp", argv[i]) == 0) ||
|
624
|
+
(strcmp("-c", argv[i]) == 0) ||
|
625
|
+
(strcmp("-a", argv[i]) == 0) ||
|
626
|
+
(strcmp("-ppoff", argv[i]) == 0) ||
|
627
|
+
(strcmp("-coff", argv[i]) == 0) ||
|
628
|
+
(strcmp("-aoff", argv[i]) == 0))
|
629
|
+
{
|
630
|
+
i++;
|
631
|
+
}
|
632
|
+
else if (argv[i][0] == '-')
|
633
|
+
{
|
634
|
+
int rc;
|
635
|
+
if (argv[i][1] == '!')
|
636
|
+
rc = issue_special_command(argv[i]+2, opts, dict);
|
637
|
+
else
|
638
|
+
rc = issue_special_command(argv[i]+1, opts, dict);
|
639
|
+
|
640
|
+
if (rc)
|
641
|
+
print_usage(argv[0]);
|
642
|
+
}
|
643
|
+
}
|
644
|
+
|
645
|
+
verbosity = parse_options_get_verbosity(opts);
|
646
|
+
check_winsize(opts);
|
647
|
+
|
648
|
+
prt_error("Info: Dictionary version %s.\n",
|
649
|
+
linkgrammar_get_dict_version(dict));
|
650
|
+
prt_error("Info: Library version %s. Enter \"!help\" for help.\n",
|
651
|
+
linkgrammar_get_version());
|
652
|
+
|
653
|
+
/* Main input loop */
|
654
|
+
while (1)
|
655
|
+
{
|
656
|
+
char *input_string;
|
657
|
+
Sentence sent = NULL;
|
658
|
+
|
659
|
+
input_string = fget_input_string(input_fh, stdout, opts);
|
660
|
+
check_winsize(opts);
|
661
|
+
|
662
|
+
if (NULL == input_string)
|
663
|
+
{
|
664
|
+
if (input_fh == stdin) break;
|
665
|
+
fclose (input_fh);
|
666
|
+
input_fh = stdin;
|
667
|
+
continue;
|
668
|
+
}
|
669
|
+
|
670
|
+
if ((strcmp(input_string, "!quit") == 0) ||
|
671
|
+
(strcmp(input_string, "!exit") == 0)) break;
|
672
|
+
|
673
|
+
/* We have to handle the !file command inline; its too hairy
|
674
|
+
* otherwise ... */
|
675
|
+
if (strncmp(input_string, "!file", 5) == 0)
|
676
|
+
{
|
677
|
+
char * filename = &input_string[6];
|
678
|
+
input_fh = fopen(filename, "r");
|
679
|
+
if (NULL == input_fh)
|
680
|
+
{
|
681
|
+
int perr = errno;
|
682
|
+
fprintf(stderr, "Error: %s (%d) %s\n",
|
683
|
+
filename, perr, strerror(perr));
|
684
|
+
input_fh = stdin;
|
685
|
+
continue;
|
686
|
+
}
|
687
|
+
continue;
|
688
|
+
}
|
689
|
+
|
690
|
+
if (special_command(input_string, dict)) continue;
|
691
|
+
if (parse_options_get_echo_on(opts))
|
692
|
+
{
|
693
|
+
printf("%s ", input_string);
|
694
|
+
}
|
695
|
+
|
696
|
+
if (parse_options_get_batch_mode(opts))
|
697
|
+
{
|
698
|
+
label = strip_off_label(input_string);
|
699
|
+
}
|
700
|
+
|
701
|
+
sent = sentence_create(input_string, dict);
|
702
|
+
|
703
|
+
if (sent == NULL) continue;
|
704
|
+
|
705
|
+
if (sentence_length(sent) > parse_options_get_max_sentence_length(opts))
|
706
|
+
{
|
707
|
+
if (verbosity > 0)
|
708
|
+
{
|
709
|
+
fprintf(stdout,
|
710
|
+
"Sentence length (%d words) exceeds maximum allowable (%d words)\n",
|
711
|
+
sentence_length(sent), parse_options_get_max_sentence_length(opts));
|
712
|
+
}
|
713
|
+
sentence_delete(sent);
|
714
|
+
sent = NULL;
|
715
|
+
continue;
|
716
|
+
}
|
717
|
+
|
718
|
+
/* First parse with cost 0 or 1 and no null links */
|
719
|
+
// parse_options_set_disjunct_costf(opts, 2.0f);
|
720
|
+
parse_options_set_min_null_count(opts, 0);
|
721
|
+
parse_options_set_max_null_count(opts, 0);
|
722
|
+
parse_options_reset_resources(opts);
|
723
|
+
|
724
|
+
num_linkages = sentence_parse(sent, opts);
|
725
|
+
if (num_linkages < 0)
|
726
|
+
{
|
727
|
+
sentence_delete(sent);
|
728
|
+
sent = NULL;
|
729
|
+
continue;
|
730
|
+
}
|
731
|
+
|
732
|
+
#if 0
|
733
|
+
/* Try again, this time ommitting the requirement for
|
734
|
+
* definite articles, etc. This should allow for the parsing
|
735
|
+
* of newspaper headlines and other clipped speech.
|
736
|
+
*
|
737
|
+
* XXX Unfortunately, this also allows for the parsing of
|
738
|
+
* all sorts of ungrammatical sentences which should not
|
739
|
+
* parse, and leads to bad parses of many other unparsable
|
740
|
+
* but otherwise grammatical sentences. Thus, this trick
|
741
|
+
* pretty much fails; we leave it here to document the
|
742
|
+
* experiment.
|
743
|
+
*/
|
744
|
+
if (num_linkages == 0)
|
745
|
+
{
|
746
|
+
parse_options_set_disjunct_costf(opts, 3.5f);
|
747
|
+
num_linkages = sentence_parse(sent, opts);
|
748
|
+
if (num_linkages < 0) continue;
|
749
|
+
}
|
750
|
+
#endif
|
751
|
+
|
752
|
+
/* Try using a larger list of disjuncts */
|
753
|
+
if ((num_linkages == 0) && parse_options_get_use_cluster_disjuncts(opts))
|
754
|
+
{
|
755
|
+
int expanded;
|
756
|
+
if (verbosity > 0) fprintf(stdout, "No standard linkages, expanding disjunct set.\n");
|
757
|
+
parse_options_set_disjunct_costf(opts, 2.9f);
|
758
|
+
expanded = lg_expand_disjunct_list(sent);
|
759
|
+
if (expanded)
|
760
|
+
{
|
761
|
+
num_linkages = sentence_parse(sent, opts);
|
762
|
+
}
|
763
|
+
if (0 < num_linkages) printf("Got One !!!!!!!!!!!!!!!!!\n");
|
764
|
+
}
|
765
|
+
|
766
|
+
/* If asked to show bad linkages, then show them. */
|
767
|
+
if ((num_linkages == 0) && (!parse_options_get_batch_mode(opts)))
|
768
|
+
{
|
769
|
+
if (parse_options_get_display_bad(opts))
|
770
|
+
{
|
771
|
+
num_linkages = sentence_num_linkages_found(sent);
|
772
|
+
}
|
773
|
+
}
|
774
|
+
|
775
|
+
/* Now parse with null links */
|
776
|
+
if ((num_linkages == 0) && (!parse_options_get_batch_mode(opts)))
|
777
|
+
{
|
778
|
+
if (verbosity > 0) fprintf(stdout, "No complete linkages found.\n");
|
779
|
+
|
780
|
+
if (parse_options_get_allow_null(opts))
|
781
|
+
{
|
782
|
+
/* XXX should use expanded disjunct list here too */
|
783
|
+
parse_options_set_min_null_count(opts, 1);
|
784
|
+
parse_options_set_max_null_count(opts, sentence_length(sent));
|
785
|
+
num_linkages = sentence_parse(sent, opts);
|
786
|
+
}
|
787
|
+
}
|
788
|
+
|
789
|
+
if (parse_options_timer_expired(opts))
|
790
|
+
{
|
791
|
+
if (verbosity > 0) fprintf(stdout, "Timer is expired!\n");
|
792
|
+
}
|
793
|
+
if (parse_options_memory_exhausted(opts))
|
794
|
+
{
|
795
|
+
if (verbosity > 0) fprintf(stdout, "Memory is exhausted!\n");
|
796
|
+
}
|
797
|
+
|
798
|
+
if ((num_linkages == 0) &&
|
799
|
+
parse_options_resources_exhausted(opts) &&
|
800
|
+
parse_options_get_panic_mode(opts))
|
801
|
+
{
|
802
|
+
/* print_total_time(opts); */
|
803
|
+
batch_errors++;
|
804
|
+
if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n");
|
805
|
+
parse_options_reset_resources(panic_parse_opts);
|
806
|
+
parse_options_set_verbosity(panic_parse_opts, verbosity);
|
807
|
+
num_linkages = sentence_parse(sent, panic_parse_opts);
|
808
|
+
if (parse_options_timer_expired(panic_parse_opts)) {
|
809
|
+
if (verbosity > 0) fprintf(stdout, "Timer is expired!\n");
|
810
|
+
}
|
811
|
+
}
|
812
|
+
|
813
|
+
/* print_total_time(opts); */
|
814
|
+
|
815
|
+
if (parse_options_get_batch_mode(opts))
|
816
|
+
{
|
817
|
+
batch_process_some_linkages(label, sent, opts);
|
818
|
+
}
|
819
|
+
else
|
820
|
+
{
|
821
|
+
int c = process_some_linkages(sent, opts);
|
822
|
+
if (c == EOF)
|
823
|
+
{
|
824
|
+
sentence_delete(sent);
|
825
|
+
sent = NULL;
|
826
|
+
break;
|
827
|
+
}
|
828
|
+
}
|
829
|
+
fflush(stdout);
|
830
|
+
|
831
|
+
sentence_delete(sent);
|
832
|
+
sent = NULL;
|
833
|
+
}
|
834
|
+
|
835
|
+
if (parse_options_get_batch_mode(opts))
|
836
|
+
{
|
837
|
+
/* print_time(opts, "Total"); */
|
838
|
+
fprintf(stderr,
|
839
|
+
"%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s");
|
840
|
+
}
|
841
|
+
|
842
|
+
/* Free stuff, so that mem-leak detectors don't commplain. */
|
843
|
+
parse_options_delete(panic_parse_opts);
|
844
|
+
parse_options_delete(opts);
|
845
|
+
dictionary_delete(dict);
|
846
|
+
|
847
|
+
printf ("Bye.\n");
|
848
|
+
return 0;
|
849
|
+
}
|