grammar_cop 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
link_private void free_AND_tables(Sentence sent);
|
14
|
+
link_private void print_AND_statistics(Sentence sent);
|
15
|
+
link_private void init_andable_hash_table(Dictionary dict);
|
16
|
+
link_private void free_andable_hash_table(Dictionary dict);
|
17
|
+
link_private void initialize_conjunction_tables(Sentence sent);
|
18
|
+
link_private int is_canonical_linkage(Sentence sent);
|
19
|
+
link_private Disjunct * build_AND_disjunct_list(Sentence sent, char *);
|
20
|
+
link_private Disjunct * build_COMMA_disjunct_list(Sentence sent);
|
21
|
+
link_private Disjunct * explode_disjunct_list(Sentence sent, Disjunct *);
|
22
|
+
link_private void build_conjunction_tables(Sentence);
|
23
|
+
link_private void compute_pp_link_array_connectors(Sentence sent, Sublinkage *sublinkage);
|
24
|
+
|
25
|
+
/* Following need to be visible to sat solver, can't be private */
|
26
|
+
int set_has_fat_down(Sentence sent);
|
27
|
+
const char * intersect_strings(Sentence sent, const char * s, const char * t);
|
@@ -0,0 +1,362 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
/*****************************************************************************
|
15
|
+
*
|
16
|
+
* NOTE: There are five basic "types" in the link parser API. These are:
|
17
|
+
*
|
18
|
+
* Dictionary, Parse_Options, Sentence, Linkage, PostProcessor
|
19
|
+
*
|
20
|
+
* To make the use of the API simpler, each of these is typedef'ed as a pointer
|
21
|
+
* to a data structure. As a result, some of the code may look a little funny,
|
22
|
+
* since it uses pointers in a way that is syntactically inconsistent. After
|
23
|
+
* working a bit with these basic types enough, this should not be confusing.
|
24
|
+
*
|
25
|
+
******************************************************************************/
|
26
|
+
|
27
|
+
#ifndef _API_STRUCTURESH_
|
28
|
+
#define _API_STRUCTURESH_
|
29
|
+
|
30
|
+
#include <wchar.h>
|
31
|
+
#include "api-types.h"
|
32
|
+
#include "structures.h" /* for definition of Link */
|
33
|
+
#include "corpus/corpus.h"
|
34
|
+
#include "error.h"
|
35
|
+
|
36
|
+
typedef enum
|
37
|
+
{
|
38
|
+
VDAL=1, /* Sort by Violations, Disjunct cost, And cost, Link cost */
|
39
|
+
CORPUS, /* Sort by Corpus cost */
|
40
|
+
} Cost_Model_type;
|
41
|
+
|
42
|
+
struct Cost_Model_s
|
43
|
+
{
|
44
|
+
Cost_Model_type type;
|
45
|
+
int (*compare_fn)(Linkage_info *, Linkage_info *);
|
46
|
+
};
|
47
|
+
|
48
|
+
struct Resources_s
|
49
|
+
{
|
50
|
+
int max_parse_time; /* in seconds */
|
51
|
+
size_t max_memory; /* in bytes */
|
52
|
+
double time_when_parse_started;
|
53
|
+
size_t space_when_parse_started;
|
54
|
+
double when_created;
|
55
|
+
double when_last_called;
|
56
|
+
double cumulative_time;
|
57
|
+
int memory_exhausted;
|
58
|
+
int timer_expired;
|
59
|
+
};
|
60
|
+
|
61
|
+
struct Parse_Options_s
|
62
|
+
{
|
63
|
+
int verbosity; /* Level of detail to give about the computation 0 */
|
64
|
+
int use_sat_solver; /* Use the Boolean SAT based parser */
|
65
|
+
int linkage_limit; /* The maximum number of linkages processed 100 */
|
66
|
+
float disjunct_cost; /* Max disjunct cost to allow */
|
67
|
+
int use_fat_links; /* Look for fat linkages */
|
68
|
+
int min_null_count; /* The minimum number of null links to allow */
|
69
|
+
int max_null_count; /* The maximum number of null links to allow */
|
70
|
+
int null_block; /* consecutive blocks of this many words are
|
71
|
+
considered as one null link (default=1) */
|
72
|
+
int islands_ok; /* If TRUE, then linkages with islands
|
73
|
+
(separate component of the link graph)
|
74
|
+
will be generated (default=FALSE) */
|
75
|
+
int twopass_length; /* min length for two-pass post processing */
|
76
|
+
int max_sentence_length;
|
77
|
+
int short_length; /* Links that are limited in length can be
|
78
|
+
* no longer than this. Default = 6 */
|
79
|
+
int all_short; /* If true, there can be no connectors that are exempt */
|
80
|
+
int use_spell_guess; /* Perform spell-guessing of unknown words. */
|
81
|
+
Cost_Model cost_model; /* For sorting linkages in post_processing */
|
82
|
+
Resources resources; /* For deciding when to abort the parsing */
|
83
|
+
|
84
|
+
/* Flags governing the command-line client; not used by parser */
|
85
|
+
int display_short;
|
86
|
+
int display_word_subscripts; /* as in "dog.n" as opposed to "dog" */
|
87
|
+
int display_link_subscripts; /* as in "Ss" as opposed to "S" */
|
88
|
+
int display_walls;
|
89
|
+
int display_union; /* print squashed version of linkage with conjunction? */
|
90
|
+
int allow_null; /* true if we allow null links in parsing */
|
91
|
+
int use_cluster_disjuncts; /* if true, atttempt using a borader list of disjuncts */
|
92
|
+
int echo_on; /* true if we should echo the input sentence */
|
93
|
+
int batch_mode; /* if true, process sentences non-interactively */
|
94
|
+
int panic_mode; /* if true, parse in "panic mode" after all else fails */
|
95
|
+
int screen_width; /* width of screen for displaying linkages */
|
96
|
+
int display_on; /* if true, output graphical linkage diagram */
|
97
|
+
int display_postscript; /* if true, output postscript linkage */
|
98
|
+
int display_constituents; /* if true, output treebank-style constituent structure */
|
99
|
+
int display_bad; /* if true, bad linkages are displayed */
|
100
|
+
int display_disjuncts; /* if true, print disjuncts that were used */
|
101
|
+
int display_links; /* if true, a list o' links is printed out */
|
102
|
+
int display_senses; /* if true, sense candidates are printed out */
|
103
|
+
};
|
104
|
+
|
105
|
+
struct Connector_set_s
|
106
|
+
{
|
107
|
+
Connector ** hash_table;
|
108
|
+
int table_size;
|
109
|
+
int is_defined; /* if 0 then there is no such set */
|
110
|
+
};
|
111
|
+
|
112
|
+
struct Dictionary_s
|
113
|
+
{
|
114
|
+
Dict_node * root;
|
115
|
+
Regex_node * regex_root;
|
116
|
+
const char * name;
|
117
|
+
const char * lang;
|
118
|
+
|
119
|
+
int use_unknown_word;
|
120
|
+
int unknown_word_defined;
|
121
|
+
|
122
|
+
/* If not null, then use spelling guesser for unknown words */
|
123
|
+
void * spell_checker; /* spell checker handle */
|
124
|
+
#if USE_CORPUS
|
125
|
+
Corpus * corpus; /* Statistics database */
|
126
|
+
#endif
|
127
|
+
|
128
|
+
#if DONT_USE_REGEX_GUESSING
|
129
|
+
/* English language morphology bits
|
130
|
+
* replaced by regex-based morpho guesser
|
131
|
+
* Dead code, remove at leisure.
|
132
|
+
*/
|
133
|
+
int capitalized_word_defined;
|
134
|
+
int pl_capitalized_word_defined;
|
135
|
+
int hyphenated_word_defined;
|
136
|
+
int number_word_defined;
|
137
|
+
int ing_word_defined;
|
138
|
+
int s_word_defined;
|
139
|
+
int ed_word_defined;
|
140
|
+
int ly_word_defined;
|
141
|
+
#endif /* DONT_USE_REGEX_GUESSING */
|
142
|
+
|
143
|
+
int left_wall_defined;
|
144
|
+
int right_wall_defined;
|
145
|
+
|
146
|
+
/* Affixes are used during the tokenization stage. */
|
147
|
+
Dictionary affix_table;
|
148
|
+
int r_strippable; /* right */
|
149
|
+
int l_strippable; /* left */
|
150
|
+
int u_strippable; /* units on left */
|
151
|
+
int s_strippable; /* generic suffix */
|
152
|
+
int p_strippable; /* generic prefix */
|
153
|
+
const char ** strip_left;
|
154
|
+
const char ** strip_right;
|
155
|
+
const char ** strip_units;
|
156
|
+
const char ** prefix;
|
157
|
+
const char ** suffix;
|
158
|
+
|
159
|
+
Postprocessor * postprocessor;
|
160
|
+
Postprocessor * constituent_pp;
|
161
|
+
int andable_defined;
|
162
|
+
Connector_set * andable_connector_set; /* NULL=everything is andable */
|
163
|
+
Connector_set * unlimited_connector_set; /* NULL=everthing is unlimited */
|
164
|
+
int max_cost;
|
165
|
+
String_set * string_set; /* Set of link names constructed during parsing */
|
166
|
+
int num_entries;
|
167
|
+
Word_file * word_file_header;
|
168
|
+
|
169
|
+
/* exp_list links together all the Exp structs that are allocated
|
170
|
+
* in reading this dictionary. Needed for freeing the dictionary
|
171
|
+
*/
|
172
|
+
Exp * exp_list;
|
173
|
+
|
174
|
+
/* Private data elements that come in play only while the
|
175
|
+
* dictionary is being read, and are not otherwise used.
|
176
|
+
*/
|
177
|
+
FILE * fp;
|
178
|
+
char token[MAX_TOKEN_LENGTH];
|
179
|
+
int is_special; /* boolean */
|
180
|
+
wint_t already_got_it;
|
181
|
+
int line_number;
|
182
|
+
int recursive_error; /* boolean */
|
183
|
+
mbstate_t mbss; /* multi-byte shift state */
|
184
|
+
};
|
185
|
+
|
186
|
+
struct Label_node_s
|
187
|
+
{
|
188
|
+
int label;
|
189
|
+
Label_node * next;
|
190
|
+
};
|
191
|
+
|
192
|
+
#define HT_SIZE (1<<10)
|
193
|
+
|
194
|
+
struct And_data_s
|
195
|
+
{
|
196
|
+
int LT_bound;
|
197
|
+
int LT_size;
|
198
|
+
Disjunct ** label_table;
|
199
|
+
Label_node * hash_table[HT_SIZE];
|
200
|
+
|
201
|
+
/* keeping statistics */
|
202
|
+
int STAT_N_disjuncts;
|
203
|
+
int STAT_calls_to_equality_test;
|
204
|
+
};
|
205
|
+
|
206
|
+
struct Parse_info_struct
|
207
|
+
{
|
208
|
+
int x_table_size;
|
209
|
+
int log2_x_table_size;
|
210
|
+
X_table_connector ** x_table;
|
211
|
+
Parse_set * parse_set;
|
212
|
+
int N_words;
|
213
|
+
Disjunct ** chosen_disjuncts;
|
214
|
+
int N_links;
|
215
|
+
Link link_array[MAX_LINKS];
|
216
|
+
|
217
|
+
/* Points to the image structure for each word.
|
218
|
+
* NULL if not a fat word. */
|
219
|
+
Image_node ** image_array;
|
220
|
+
|
221
|
+
/* Array of boolean flags, one per word. Set to TRUE if this
|
222
|
+
* word has a fat down link. FALSE otherise */
|
223
|
+
Boolean *has_fat_down;
|
224
|
+
|
225
|
+
/* thread-safe random number state */
|
226
|
+
unsigned int rand_state;
|
227
|
+
};
|
228
|
+
|
229
|
+
struct Sentence_s
|
230
|
+
{
|
231
|
+
Dictionary dict; /* words are defined from this dictionary */
|
232
|
+
const char *orig_sentence; /* Copy of original sentence */
|
233
|
+
int length; /* number of words */
|
234
|
+
Word word[MAX_SENTENCE]; /* array of words after tokenization */
|
235
|
+
char * is_conjunction; /* Array of flags, one per word; set to
|
236
|
+
TRUE if conjunction, as defined by dictionary */
|
237
|
+
char** deletable; /* deletable regions in a sentence with conjunction */
|
238
|
+
char** dptr; /* private pointer for mem management only */
|
239
|
+
char** effective_dist;
|
240
|
+
int num_linkages_found; /* total number before postprocessing. This
|
241
|
+
is returned by the count() function */
|
242
|
+
int num_linkages_alloced;/* total number of linkages allocated.
|
243
|
+
the number post-processed might be fewer
|
244
|
+
because some are non-canonical */
|
245
|
+
int num_linkages_post_processed;
|
246
|
+
/* The number of linkages that are actually
|
247
|
+
put into the array that was alloced.
|
248
|
+
This is not the same as num alloced
|
249
|
+
because some may be non-canonical. */
|
250
|
+
int num_valid_linkages; /* number with no pp violations */
|
251
|
+
int num_thin_linkages; /* valid linkages which are not fat */
|
252
|
+
int null_links; /* null links allowed */
|
253
|
+
int null_count; /* number of null links in linkages */
|
254
|
+
Parse_info parse_info; /* set of parses for the sentence */
|
255
|
+
Linkage_info * link_info; /* array of valid and invalid linkages (sorted) */
|
256
|
+
String_set * string_set; /* used for word names, not connectors */
|
257
|
+
And_data and_data; /* used to keep track of fat disjuncts */
|
258
|
+
char q_pruned_rules; /* don't prune rules more than once in p.p. */
|
259
|
+
int post_quote[MAX_SENTENCE]; /* Used only by tokenizer. */
|
260
|
+
|
261
|
+
analyze_context_t * analyze_ctxt; /* private state used for analyzing */
|
262
|
+
count_context_t * count_ctxt; /* private state info used for counting */
|
263
|
+
match_context_t * match_ctxt; /* private state info used for matching */
|
264
|
+
/* thread-safe random number state */
|
265
|
+
unsigned int rand_state;
|
266
|
+
|
267
|
+
/* Hook for the SAT solver */
|
268
|
+
void *hook;
|
269
|
+
};
|
270
|
+
|
271
|
+
/*********************************************************
|
272
|
+
*
|
273
|
+
* Post processing
|
274
|
+
*
|
275
|
+
**********************************************************/
|
276
|
+
|
277
|
+
struct Domain_s
|
278
|
+
{
|
279
|
+
const char * string;
|
280
|
+
int size;
|
281
|
+
List_o_links * lol;
|
282
|
+
int start_link; /* the link that started this domain */
|
283
|
+
int type; /* one letter name */
|
284
|
+
DTreeLeaf * child;
|
285
|
+
Domain * parent;
|
286
|
+
};
|
287
|
+
|
288
|
+
|
289
|
+
struct DTreeLeaf_s
|
290
|
+
{
|
291
|
+
Domain * parent;
|
292
|
+
int link;
|
293
|
+
DTreeLeaf * next;
|
294
|
+
};
|
295
|
+
|
296
|
+
struct PP_data_s
|
297
|
+
{
|
298
|
+
int N_domains;
|
299
|
+
List_o_links * word_links[MAX_SENTENCE];
|
300
|
+
List_o_links * links_to_ignore;
|
301
|
+
Domain domain_array[MAX_LINKS]; /* the domains, sorted by size */
|
302
|
+
int length; /* length of current sentence */
|
303
|
+
};
|
304
|
+
|
305
|
+
struct PP_info_s
|
306
|
+
{
|
307
|
+
int num_domains;
|
308
|
+
const char ** domain_name;
|
309
|
+
};
|
310
|
+
|
311
|
+
struct Postprocessor_s
|
312
|
+
{
|
313
|
+
pp_knowledge *knowledge; /* internal rep'n of the actual rules */
|
314
|
+
int n_global_rules_firing; /* this & the next are diagnostic */
|
315
|
+
int n_local_rules_firing;
|
316
|
+
pp_linkset *set_of_links_of_sentence; /* seen in *any* linkage of sent */
|
317
|
+
pp_linkset *set_of_links_in_an_active_rule;/*used in *some* linkage of sent*/
|
318
|
+
int *relevant_contains_one_rules; /* -1-terminated list of indices */
|
319
|
+
int *relevant_contains_none_rules;
|
320
|
+
/* the following maintain state during a call to post_process() */
|
321
|
+
String_set *sentence_link_name_set; /* link names seen for sentence */
|
322
|
+
int visited[MAX_SENTENCE]; /* for the depth-first search */
|
323
|
+
PP_node *pp_node;
|
324
|
+
PP_data pp_data;
|
325
|
+
};
|
326
|
+
|
327
|
+
|
328
|
+
/*********************************************************
|
329
|
+
*
|
330
|
+
* Linkages
|
331
|
+
*
|
332
|
+
**********************************************************/
|
333
|
+
|
334
|
+
struct Sublinkage_s
|
335
|
+
{
|
336
|
+
int num_links; /* Number of links in array */
|
337
|
+
Link ** link; /* Array of links */
|
338
|
+
PP_info * pp_info; /* PP info for each link */
|
339
|
+
const char * violation; /* Name of violation, if any */
|
340
|
+
PP_data pp_data;
|
341
|
+
};
|
342
|
+
|
343
|
+
typedef struct DIS_node_struct DIS_node;
|
344
|
+
|
345
|
+
struct Linkage_s
|
346
|
+
{
|
347
|
+
int num_words; /* number of (tokenized) words */
|
348
|
+
const char * * word; /* array of word spellings */
|
349
|
+
Linkage_info* info; /* index and cost information */
|
350
|
+
int num_sublinkages; /* One for thin linkages, bigger for fat */
|
351
|
+
int current; /* Allows user to select particular sublinkage */
|
352
|
+
Sublinkage * sublinkage; /* A parse with conjunctions will have several */
|
353
|
+
int unionized; /* if TRUE, union of links has been computed */
|
354
|
+
Sentence sent;
|
355
|
+
Parse_Options opts;
|
356
|
+
DIS_node * dis_con_tree; /* Disjunction-conjunction tree */
|
357
|
+
};
|
358
|
+
|
359
|
+
|
360
|
+
|
361
|
+
#endif
|
362
|
+
|
@@ -0,0 +1,72 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#ifndef _API_TYPES_H_
|
15
|
+
#define _API_TYPES_H_
|
16
|
+
|
17
|
+
#define MAX_TOKEN_LENGTH 50 /* maximum number of chars in a token */
|
18
|
+
|
19
|
+
/* MAX_SENTENCE cannot be more than 254, because word MAX_SENTENCE+1 is
|
20
|
+
* used to indicate that nothing can connect to this connector, and this
|
21
|
+
* should fit in one byte (if the word field of a connector is an
|
22
|
+
* unsigned char).
|
23
|
+
*/
|
24
|
+
#define MAX_SENTENCE 250 /* maximum number of words in a sentence */
|
25
|
+
#define MAX_LINKS (2*MAX_SENTENCE-3) /* maximum number of links allowed */
|
26
|
+
|
27
|
+
|
28
|
+
/* "public" typedefs */
|
29
|
+
typedef struct And_data_s And_data;
|
30
|
+
typedef struct Connector_struct Connector;
|
31
|
+
typedef struct Cost_Model_s Cost_Model;
|
32
|
+
typedef struct Dict_node_struct Dict_node;
|
33
|
+
typedef struct Domain_s Domain;
|
34
|
+
typedef struct DTreeLeaf_s DTreeLeaf;
|
35
|
+
typedef struct Image_node_struct Image_node;
|
36
|
+
typedef struct Label_node_s Label_node;
|
37
|
+
typedef struct Linkage_info_struct Linkage_info;
|
38
|
+
typedef struct Parse_info_struct *Parse_info;
|
39
|
+
typedef struct Postprocessor_s Postprocessor;
|
40
|
+
typedef struct PP_data_s PP_data;
|
41
|
+
typedef struct PP_info_s PP_info;
|
42
|
+
typedef struct Regex_node_s Regex_node;
|
43
|
+
typedef struct Resources_s * Resources;
|
44
|
+
typedef struct Sublinkage_s Sublinkage;
|
45
|
+
|
46
|
+
/* Some private typedefs */
|
47
|
+
typedef char Boolean;
|
48
|
+
typedef struct analyze_context_s analyze_context_t;
|
49
|
+
typedef struct count_context_s count_context_t;
|
50
|
+
typedef struct match_context_s match_context_t;
|
51
|
+
|
52
|
+
typedef struct Connector_set_s Connector_set;
|
53
|
+
typedef struct Disjunct_struct Disjunct;
|
54
|
+
typedef struct Exp_struct Exp;
|
55
|
+
typedef struct E_list_struct E_list;
|
56
|
+
typedef struct Link_s Link;
|
57
|
+
typedef struct List_o_links_struct List_o_links;
|
58
|
+
typedef struct Parse_set_struct Parse_set;
|
59
|
+
typedef struct String_set_s String_set;
|
60
|
+
typedef struct Word_struct Word;
|
61
|
+
typedef struct Word_file_struct Word_file;
|
62
|
+
typedef struct X_table_connector_struct X_table_connector;
|
63
|
+
|
64
|
+
|
65
|
+
typedef struct pp_knowledge_s pp_knowledge;
|
66
|
+
|
67
|
+
typedef struct corpus_s Corpus;
|
68
|
+
typedef struct sense_s Sense;
|
69
|
+
typedef struct cluster_s Cluster;
|
70
|
+
|
71
|
+
#endif
|
72
|
+
|