grammar_cop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2009 Linas Vepstas */
|
3
|
+
/* Copyright (c) 2009 Vikas N. Kumar */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#ifndef SPELLCHECK_HUN_C
|
15
|
+
#define
|
16
|
+
|
17
|
+
#include <stdio.h>
|
18
|
+
#include <stdlib.h>
|
19
|
+
#include "link-includes.h"
|
20
|
+
#include "spellcheck.h"
|
21
|
+
#include "utilities.h" /* For Win32 compatibility */
|
22
|
+
|
23
|
+
#ifdef HAVE_HUNSPELL
|
24
|
+
|
25
|
+
#ifndef HUNSPELL_DICT_DIR
|
26
|
+
#define HUNSPELL_DICT_DIR (char *)0
|
27
|
+
#endif /* HUNSPELL_DICT_DIR */
|
28
|
+
|
29
|
+
static const char *hunspell_dict_dirs[] = {
|
30
|
+
"/usr/share/myspell/dicts",
|
31
|
+
"/usr/share/hunspell/dicts",
|
32
|
+
"/usr/local/share/myspell/dicts",
|
33
|
+
"/usr/local/share/hunspell/dicts",
|
34
|
+
"/usr/share/myspell",
|
35
|
+
"/usr/share/hunspell",
|
36
|
+
"/usr/local/share/myspell",
|
37
|
+
"/usr/local/share/hunspell",
|
38
|
+
HUNSPELL_DICT_DIR
|
39
|
+
};
|
40
|
+
|
41
|
+
static const char *spellcheck_lang_mapping[] = {
|
42
|
+
"en" /* link-grammar language */, "en-US" /* hunspell filename */,
|
43
|
+
"en" /* link-grammar language */, "en_US" /* hunspell filename */
|
44
|
+
};
|
45
|
+
|
46
|
+
#define FPATHLEN 256
|
47
|
+
static char hunspell_aff_file[FPATHLEN];
|
48
|
+
static char hunspell_dic_file[FPATHLEN];
|
49
|
+
|
50
|
+
#include <hunspell.h>
|
51
|
+
#include <string.h>
|
52
|
+
|
53
|
+
void * spellcheck_create(const char * lang)
|
54
|
+
{
|
55
|
+
size_t i = 0, j = 0;
|
56
|
+
Hunhandle *h = NULL;
|
57
|
+
|
58
|
+
memset(hunspell_aff_file, 0, FPATHLEN);
|
59
|
+
memset(hunspell_dic_file, 0, FPATHLEN);
|
60
|
+
for (i = 0; i < sizeof(spellcheck_lang_mapping)/sizeof(char *); i += 2)
|
61
|
+
{
|
62
|
+
if (0 != strcmp(lang, spellcheck_lang_mapping[i])) continue;
|
63
|
+
|
64
|
+
/* check in each hunspell_dict_dir if the files exist */
|
65
|
+
for (j = 0; j < sizeof(hunspell_dict_dirs)/sizeof(char *); ++j)
|
66
|
+
{
|
67
|
+
FILE *fh;
|
68
|
+
/* if the directory name is NULL then ignore */
|
69
|
+
if (hunspell_dict_dirs[j] == NULL) continue;
|
70
|
+
|
71
|
+
snprintf(hunspell_aff_file, FPATHLEN, "%s/%s.aff", hunspell_dict_dirs[j],
|
72
|
+
spellcheck_lang_mapping[i+1]);
|
73
|
+
snprintf(hunspell_dic_file, FPATHLEN, "%s/%s.dic", hunspell_dict_dirs[j],
|
74
|
+
spellcheck_lang_mapping[i+1]);
|
75
|
+
|
76
|
+
/* Some versions of Hunspell_create() will succeed even if
|
77
|
+
* there are no dictionary files. So test for permissions.
|
78
|
+
*/
|
79
|
+
fh = fopen(hunspell_aff_file, "r");
|
80
|
+
if (fh) fclose (fh);
|
81
|
+
else continue;
|
82
|
+
|
83
|
+
fh = fopen(hunspell_dic_file, "r");
|
84
|
+
if (fh) fclose (fh);
|
85
|
+
else continue;
|
86
|
+
|
87
|
+
h = Hunspell_create(hunspell_aff_file, hunspell_dic_file);
|
88
|
+
/* if hunspell handle was created break from loop */
|
89
|
+
if (h != NULL)
|
90
|
+
break;
|
91
|
+
}
|
92
|
+
/* if hunspell handle was created break from loop */
|
93
|
+
if (h != NULL) break;
|
94
|
+
}
|
95
|
+
return h;
|
96
|
+
}
|
97
|
+
|
98
|
+
void spellcheck_destroy(void * chk)
|
99
|
+
{
|
100
|
+
Hunhandle *h = (Hunhandle *) chk;
|
101
|
+
Hunspell_destroy(h);
|
102
|
+
}
|
103
|
+
|
104
|
+
/**
|
105
|
+
* Return boolean: 1 if spelling looks good, else zero
|
106
|
+
*/
|
107
|
+
int spellcheck_test(void * chk, const char * word)
|
108
|
+
{
|
109
|
+
if (NULL == chk)
|
110
|
+
{
|
111
|
+
prt_error("Error: no spell-check handle specified!\n");
|
112
|
+
return 0;
|
113
|
+
}
|
114
|
+
|
115
|
+
return Hunspell_spell((Hunhandle *)chk, word);
|
116
|
+
}
|
117
|
+
|
118
|
+
int spellcheck_suggest(void * chk, char ***sug, const char * word)
|
119
|
+
{
|
120
|
+
if (NULL == chk)
|
121
|
+
{
|
122
|
+
prt_error("Error: no spell-check handle specified!\n");
|
123
|
+
return 0;
|
124
|
+
}
|
125
|
+
|
126
|
+
return Hunspell_suggest((Hunhandle *)chk, sug, word);
|
127
|
+
}
|
128
|
+
|
129
|
+
void spellcheck_free_suggest(char **sug, int size)
|
130
|
+
{
|
131
|
+
free(sug);
|
132
|
+
}
|
133
|
+
|
134
|
+
#endif /* #ifdef HAVE_HUNSPELL */
|
135
|
+
|
136
|
+
#endif
|
@@ -0,0 +1,34 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2009 Linas Vepstas */
|
3
|
+
/* All rights reserved */
|
4
|
+
/* */
|
5
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
6
|
+
/* license set forth in the LICENSE file included with this software, */
|
7
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
8
|
+
/* This license allows free redistribution and use in source and binary */
|
9
|
+
/* forms, with or without modification, subject to certain conditions. */
|
10
|
+
/* */
|
11
|
+
/*************************************************************************/
|
12
|
+
|
13
|
+
#ifndef SPELLCHECK_H
|
14
|
+
#define SPELLCHECK_H
|
15
|
+
#if (defined HAVE_HUNSPELL) || (defined HAVE_ASPELL)
|
16
|
+
|
17
|
+
void * spellcheck_create(const char * lang);
|
18
|
+
void spellcheck_destroy(void *);
|
19
|
+
int spellcheck_test(void *, const char * word);
|
20
|
+
int spellcheck_suggest(void * chk, char ***sug, const char * word);
|
21
|
+
void spellcheck_free_suggest(char **sug, int size);
|
22
|
+
|
23
|
+
#else
|
24
|
+
|
25
|
+
#include "utilities.h" /* For MSVC inline portability */
|
26
|
+
|
27
|
+
static inline void * spellcheck_create(const char * lang) { return NULL; }
|
28
|
+
static inline void spellcheck_destroy(void * chk) {}
|
29
|
+
static inline int spellcheck_test(void * chk, const char * word) { return 0; }
|
30
|
+
static inline int spellcheck_suggest(void * chk, char ***sug, const char * word) { return 0; }
|
31
|
+
static inline void spellcheck_free_suggest(char **sug, int size) {}
|
32
|
+
|
33
|
+
#endif
|
34
|
+
#endif //endif SPELLCHECK_H
|
@@ -0,0 +1,169 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#include "api.h"
|
15
|
+
|
16
|
+
/**
|
17
|
+
* Suppose you have a program that generates strings and keeps pointers to them.
|
18
|
+
The program never needs to change these strings once they're generated.
|
19
|
+
If it generates the same string again, then it can reuse the one it
|
20
|
+
generated before. This is what this package supports.
|
21
|
+
|
22
|
+
String_set is the object. The functions are:
|
23
|
+
|
24
|
+
char * string_set_add(char * source_string, String_set * ss);
|
25
|
+
This function returns a pointer to a string with the same
|
26
|
+
contents as the source_string. If that string is already
|
27
|
+
in the table, then it uses that copy, otherwise it generates
|
28
|
+
and inserts a new one.
|
29
|
+
|
30
|
+
char * string_set_lookup(char * source_string, String_set * ss);
|
31
|
+
This function returns a pointer to a string with the same
|
32
|
+
contents as the source_string. If that string is not already
|
33
|
+
in the table, returns NULL;
|
34
|
+
|
35
|
+
String_set * string_set_create(void);
|
36
|
+
Create a new empty String_set.
|
37
|
+
|
38
|
+
string_set_delete(String_set *ss);
|
39
|
+
Free all the space associated with this string set.
|
40
|
+
|
41
|
+
The implementation uses probed hashing (i.e. not bucket).
|
42
|
+
*/
|
43
|
+
|
44
|
+
static int hash_string(const char *sa, const String_set *ss)
|
45
|
+
{
|
46
|
+
unsigned char *str = (unsigned char *) sa;
|
47
|
+
unsigned int accum = 0;
|
48
|
+
for (;*str != '\0'; str++) accum = ((256*accum) + (*str)) % (ss->size);
|
49
|
+
return accum;
|
50
|
+
}
|
51
|
+
|
52
|
+
static int stride_hash_string(const char *sa, const String_set *ss)
|
53
|
+
{
|
54
|
+
unsigned char *str = (unsigned char *) sa;
|
55
|
+
/* This is the stride used, so we have to make sure that its value is not 0 */
|
56
|
+
unsigned int accum = 0;
|
57
|
+
for (;*str != '\0'; str++) accum = ((17*accum) + (*str)) % (ss->size);
|
58
|
+
if (accum == 0) accum = 1;
|
59
|
+
return accum;
|
60
|
+
}
|
61
|
+
|
62
|
+
/* return the next prime up from start */
|
63
|
+
static int next_prime_up(int start)
|
64
|
+
{
|
65
|
+
int i;
|
66
|
+
start = start | 1; /* make it odd */
|
67
|
+
for (;;) {
|
68
|
+
for (i=3; (i <= (start/i)); i += 2) {
|
69
|
+
if (start % i == 0) break;
|
70
|
+
}
|
71
|
+
if (start % i == 0) {
|
72
|
+
start += 2;
|
73
|
+
} else {
|
74
|
+
return start;
|
75
|
+
}
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
String_set * string_set_create(void)
|
80
|
+
{
|
81
|
+
String_set *ss;
|
82
|
+
int i;
|
83
|
+
ss = (String_set *) xalloc(sizeof(String_set));
|
84
|
+
ss->size = next_prime_up(100);
|
85
|
+
ss->table = (char **) xalloc(ss->size * sizeof(char *));
|
86
|
+
ss->count = 0;
|
87
|
+
for (i=0; i<ss->size; i++) ss->table[i] = NULL;
|
88
|
+
return ss;
|
89
|
+
}
|
90
|
+
|
91
|
+
/**
|
92
|
+
* lookup the given string in the table. Return a pointer
|
93
|
+
* to the place it is, or the place where it should be.
|
94
|
+
*/
|
95
|
+
static int find_place(const char * str, String_set *ss)
|
96
|
+
{
|
97
|
+
int h, s, i;
|
98
|
+
h = hash_string(str, ss);
|
99
|
+
s = stride_hash_string(str, ss);
|
100
|
+
for (i=h; 1; i = (i + s)%(ss->size)) {
|
101
|
+
if ((ss->table[i] == NULL) || (strcmp(ss->table[i], str) == 0)) return i;
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
static void grow_table(String_set *ss)
|
106
|
+
{
|
107
|
+
String_set old;
|
108
|
+
int i, p;
|
109
|
+
|
110
|
+
old = *ss;
|
111
|
+
ss->size = next_prime_up(2 * old.size); /* at least double the size */
|
112
|
+
ss->table = (char **) xalloc(ss->size * sizeof(char *));
|
113
|
+
ss->count = 0;
|
114
|
+
for (i=0; i<ss->size; i++) ss->table[i] = NULL;
|
115
|
+
for (i=0; i<old.size; i++) {
|
116
|
+
if (old.table[i] != NULL) {
|
117
|
+
p = find_place(old.table[i], ss);
|
118
|
+
ss->table[p] = old.table[i];
|
119
|
+
ss->count++;
|
120
|
+
}
|
121
|
+
}
|
122
|
+
/*printf("growing from %d to %d\n", old.size, ss->size);*/
|
123
|
+
fflush(stdout);
|
124
|
+
xfree((char *) old.table, old.size * sizeof(char *));
|
125
|
+
}
|
126
|
+
|
127
|
+
const char * string_set_add(const char * source_string, String_set * ss)
|
128
|
+
{
|
129
|
+
char * str;
|
130
|
+
int len, p;
|
131
|
+
|
132
|
+
assert(source_string != NULL, "STRING_SET: Can't insert a null string");
|
133
|
+
|
134
|
+
p = find_place(source_string, ss);
|
135
|
+
if (ss->table[p] != NULL) return ss->table[p];
|
136
|
+
|
137
|
+
len = strlen(source_string);
|
138
|
+
str = (char *) xalloc(len+1);
|
139
|
+
strcpy(str, source_string);
|
140
|
+
ss->table[p] = str;
|
141
|
+
ss->count++;
|
142
|
+
|
143
|
+
/* We just added it to the table.
|
144
|
+
If the table got too big, we grow it.
|
145
|
+
Too big is defined as being more than 3/4 full */
|
146
|
+
if ((4 * ss->count) > (3 * ss->size)) grow_table(ss);
|
147
|
+
|
148
|
+
return str;
|
149
|
+
}
|
150
|
+
|
151
|
+
const char * string_set_lookup(const char * source_string, String_set * ss)
|
152
|
+
{
|
153
|
+
int p;
|
154
|
+
|
155
|
+
p = find_place(source_string, ss);
|
156
|
+
return ss->table[p];
|
157
|
+
}
|
158
|
+
|
159
|
+
void string_set_delete(String_set *ss)
|
160
|
+
{
|
161
|
+
int i;
|
162
|
+
|
163
|
+
if (ss == NULL) return;
|
164
|
+
for (i=0; i<ss->size; i++) {
|
165
|
+
if (ss->table[i] != NULL) xfree(ss->table[i], strlen(ss->table[i]) + 1);
|
166
|
+
}
|
167
|
+
xfree((char *) ss->table, ss->size * sizeof(char *));
|
168
|
+
xfree((char *) ss, sizeof(String_set));
|
169
|
+
}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
String_set * string_set_create(void);
|
14
|
+
const char * string_set_add(const char * source_string, String_set * ss);
|
15
|
+
const char * string_set_lookup(const char * source_string, String_set * ss);
|
16
|
+
void string_set_delete(String_set *ss);
|
@@ -0,0 +1,498 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#ifndef _STRUCTURES_H_
|
15
|
+
#define _STRUCTURES_H_
|
16
|
+
|
17
|
+
#include "api-types.h"
|
18
|
+
#include "utilities.h" /* Needed for inline defn in Windows */
|
19
|
+
|
20
|
+
/*
|
21
|
+
Global variable descriptions
|
22
|
+
-- Most of these global variables have been eliminated.
|
23
|
+
I've left this comment here for historical purposes --DS 4/98
|
24
|
+
|
25
|
+
N_words:
|
26
|
+
The number of words in the current sentence. Computed by
|
27
|
+
separate_sentence().
|
28
|
+
|
29
|
+
N_links:
|
30
|
+
The number of links in the current linkage. Computed by
|
31
|
+
extract_linkage().
|
32
|
+
|
33
|
+
sentence[].string:
|
34
|
+
Contains a slightly modified form of the words typed by the user.
|
35
|
+
Computed by separate_sentence().
|
36
|
+
|
37
|
+
sentence[].x:
|
38
|
+
Contains, for each word, a pointer to a list of expressions from the
|
39
|
+
dictionary that match the word in sentence[].string.
|
40
|
+
Computed by build_sentence_expressions().
|
41
|
+
|
42
|
+
sentence[].d
|
43
|
+
Contains for each word, a pointer to a list of disjuncts for this word.
|
44
|
+
Computed by: parepare_to_parse(), but modified by pruning and power
|
45
|
+
pruning.
|
46
|
+
|
47
|
+
link_array[]
|
48
|
+
This is an array of links. These links define the current linkage.
|
49
|
+
It is computed by extract_links(). It is used by analyze_linkage() to
|
50
|
+
compute pp_linkage[]. It may contain fat links.
|
51
|
+
|
52
|
+
pp_link_array[] ** eliminated (ALB)
|
53
|
+
Another array of links. Here all fat links have been expunged.
|
54
|
+
It is computed by analyze_linkage(), and used by post_process() and by
|
55
|
+
print_links();
|
56
|
+
|
57
|
+
chosen_disjuncts[]
|
58
|
+
This is an array pointers to disjuncts, one for each word, that is
|
59
|
+
computed by extract_links(). It represents the chosen disjuncts for the
|
60
|
+
current linkage. It is used to compute the cost of the linkage, and
|
61
|
+
also by compute_chosen_words() to compute the chosen_words[].
|
62
|
+
|
63
|
+
chosen_words[]
|
64
|
+
An array of pointers to strings. These are the words to be displayed
|
65
|
+
when printing the solution, the links, etc. Computed as a function of
|
66
|
+
chosen_disjuncts[] by compute_chosen_words(). This differs from
|
67
|
+
sentence[].string because it contains the suffixes. It differs from
|
68
|
+
chosen_disjunct[].string in that the idiom symbols have been removed.
|
69
|
+
|
70
|
+
has_fat_down[]
|
71
|
+
An array of chars, one for each word. TRUE if there is a fat link
|
72
|
+
down from this word, FALSE otherwise. (Only set if there is at least
|
73
|
+
one fat link.) Set by set_has_fat_down_array() and used by
|
74
|
+
analyze_linkage() and is_canonical().
|
75
|
+
|
76
|
+
is_conjunction[]
|
77
|
+
An array of chars, one for each word. TRUE if the word is a conjunction
|
78
|
+
("and", "or", "nor", or "but" at the moment). False otherwise.
|
79
|
+
*/
|
80
|
+
|
81
|
+
|
82
|
+
#define NEGATIVECOST -1000000
|
83
|
+
/* This is a hack that allows one to discard disjuncts containing
|
84
|
+
connectors whose cost is greater than given a bound. This number plus
|
85
|
+
the cost of any connectors on a disjunct must remain negative, and
|
86
|
+
this number multiplied times the number of costly connectors on any
|
87
|
+
disjunct must fit into an integer. */
|
88
|
+
|
89
|
+
/* Upper bound on the cost of any connector. */
|
90
|
+
#define MAX_CONNECTOR_COST 1000.0f
|
91
|
+
|
92
|
+
#define LEFT_WALL_DISPLAY ("LEFT-WALL") /* the string to use to show the wall */
|
93
|
+
#define LEFT_WALL_SUPPRESS ("Wd") /* If this connector is used on the wall, */
|
94
|
+
/* then suppress the display of the wall */
|
95
|
+
/* bogus name to prevent ever suppressing */
|
96
|
+
#define RIGHT_WALL_DISPLAY ("RIGHT-WALL") /* the string to use to show the wall */
|
97
|
+
#define RIGHT_WALL_SUPPRESS ("RW") /* If this connector is used on the wall, */
|
98
|
+
|
99
|
+
/* The following define the names of the special strings in the dictionary. */
|
100
|
+
#define LEFT_WALL_WORD ("LEFT-WALL")
|
101
|
+
#define RIGHT_WALL_WORD ("RIGHT-WALL")
|
102
|
+
#define POSTPROCESS_WORD ("POSTPROCESS")
|
103
|
+
#define ANDABLE_CONNECTORS_WORD ("ANDABLE-CONNECTORS")
|
104
|
+
#define UNLIMITED_CONNECTORS_WORD ("UNLIMITED-CONNECTORS")
|
105
|
+
|
106
|
+
#if DONT_USE_REGEX_GUESSING
|
107
|
+
/* English-language-specific morphology guessing */
|
108
|
+
/* Obsolete, replaced by regex-based morphology handler */
|
109
|
+
#define PROPER_WORD ("CAPITALIZED-WORDS")
|
110
|
+
#define PL_PROPER_WORD ("PL-CAPITALIZED-WORDS")
|
111
|
+
#define HYPHENATED_WORD ("HYPHENATED-WORDS")
|
112
|
+
#define NUMBER_WORD ("NUMBERS")
|
113
|
+
#define ING_WORD ("ING-WORDS")
|
114
|
+
#define S_WORD ("S-WORDS")
|
115
|
+
#define ED_WORD ("ED-WORDS")
|
116
|
+
#define LY_WORD ("LY-WORDS")
|
117
|
+
#endif /* DONT_USE_REGEX_GUESSING */
|
118
|
+
|
119
|
+
#define UNKNOWN_WORD ("UNKNOWN-WORD")
|
120
|
+
|
121
|
+
#define MAX_PATH_NAME 200 /* file names (including paths)
|
122
|
+
should not be longer than this */
|
123
|
+
|
124
|
+
/* Some size definitions. Reduce these for small machines */
|
125
|
+
#define MAX_WORD 60 /* maximum number of chars in a word */
|
126
|
+
#define MAX_LINE 1500 /* maximum number of chars in a sentence */
|
127
|
+
#define MAX_DISJUNCT_COST 10000
|
128
|
+
|
129
|
+
/* conditional compiling flags */
|
130
|
+
#define PLURALIZATION
|
131
|
+
/* If defined, Turns on the pluralization operation in */
|
132
|
+
/* "and", "or" and "nor" */
|
133
|
+
#define INFIX_NOTATION
|
134
|
+
/* If defined, then we're using infix notation for the dictionary */
|
135
|
+
/* otherwise we're using prefix notation */
|
136
|
+
|
137
|
+
#define DOWN_priority 2
|
138
|
+
#define UP_priority 1
|
139
|
+
#define THIN_priority 0
|
140
|
+
|
141
|
+
#define NORMAL_LABEL (-1) /* used for normal connectors */
|
142
|
+
/* the labels >= 0 are used by fat links */
|
143
|
+
|
144
|
+
#define UNLIMITED_LEN 255
|
145
|
+
#define SHORT_LEN 6
|
146
|
+
#define NO_WORD 255
|
147
|
+
|
148
|
+
#ifndef _MSC_VER
|
149
|
+
typedef long long s64; /* signed 64-bit integer, even on 32-bit cpus */
|
150
|
+
#define PARSE_NUM_OVERFLOW (1LL<<24)
|
151
|
+
#else
|
152
|
+
/* Microsoft Visual C Version 6 doesn't support long long. */
|
153
|
+
typedef signed __int64 s64; /* signed 64-bit integer, even on 32-bit cpus */
|
154
|
+
#define PARSE_NUM_OVERFLOW (((s64)1)<<24)
|
155
|
+
#endif
|
156
|
+
|
157
|
+
struct Connector_struct
|
158
|
+
{
|
159
|
+
short label;
|
160
|
+
short hash;
|
161
|
+
unsigned char word;
|
162
|
+
/* The nearest word to my left (or right) that
|
163
|
+
this could connect to. Computed by power pruning */
|
164
|
+
unsigned char length_limit;
|
165
|
+
/* If this is a length limited connector, this
|
166
|
+
gives the limit of the length of the link
|
167
|
+
that can be used on this connector. Since
|
168
|
+
this is strictly a funcion of the connector
|
169
|
+
name, efficiency is the only reason to store
|
170
|
+
this. If no limit, the value is set to 255. */
|
171
|
+
char priority;/* one of the three priorities above */
|
172
|
+
char multi; /* TRUE if this is a multi-connector */
|
173
|
+
Connector * next;
|
174
|
+
const char * string;
|
175
|
+
|
176
|
+
/* Hash table next pointer, used only during pruning. */
|
177
|
+
Connector * tableNext;
|
178
|
+
const char * prune_string;
|
179
|
+
};
|
180
|
+
|
181
|
+
static inline void connector_set_string(Connector *c, const char *s)
|
182
|
+
{
|
183
|
+
c->string = s;
|
184
|
+
c->hash = -1;
|
185
|
+
}
|
186
|
+
static inline const char * connector_get_string(Connector *c)
|
187
|
+
{
|
188
|
+
return c->string;
|
189
|
+
}
|
190
|
+
|
191
|
+
struct Disjunct_struct
|
192
|
+
{
|
193
|
+
Disjunct *next;
|
194
|
+
const char * string;
|
195
|
+
Connector *left, *right;
|
196
|
+
float cost;
|
197
|
+
char marked;
|
198
|
+
};
|
199
|
+
|
200
|
+
typedef struct Match_node_struct Match_node;
|
201
|
+
struct Match_node_struct
|
202
|
+
{
|
203
|
+
Match_node * next;
|
204
|
+
Disjunct * d;
|
205
|
+
};
|
206
|
+
|
207
|
+
typedef struct X_node_struct X_node;
|
208
|
+
struct X_node_struct
|
209
|
+
{
|
210
|
+
const char * string; /* the word itself */
|
211
|
+
Exp * exp;
|
212
|
+
X_node *next;
|
213
|
+
};
|
214
|
+
|
215
|
+
struct Word_struct
|
216
|
+
{
|
217
|
+
char string[MAX_WORD+1];
|
218
|
+
X_node * x; /* sentence starts out with these */
|
219
|
+
Disjunct * d; /* eventually these get generated */
|
220
|
+
int firstupper;
|
221
|
+
};
|
222
|
+
|
223
|
+
/**
|
224
|
+
* Types of Exp_struct structures
|
225
|
+
*/
|
226
|
+
#define OR_type 0
|
227
|
+
#define AND_type 1
|
228
|
+
#define CONNECTOR_type 2
|
229
|
+
|
230
|
+
/**
|
231
|
+
* The E_list and Exp structures defined below comprise the expression
|
232
|
+
* trees that are stored in the dictionary. The expression has a type
|
233
|
+
* (AND, OR or TERMINAL). If it is not a terminal it has a list
|
234
|
+
* (an E_list) of children.
|
235
|
+
*/
|
236
|
+
struct Exp_struct
|
237
|
+
{
|
238
|
+
Exp * next; /* Used only for mem management,for freeing */
|
239
|
+
char type; /* One of three types, see above */
|
240
|
+
char dir; /* '-' means to the left, '+' means to right (for connector) */
|
241
|
+
char multi; /* TRUE if a multi-connector (for connector) */
|
242
|
+
union {
|
243
|
+
E_list * l; /* only needed for non-terminals */
|
244
|
+
const char * string; /* only needed if it's a connector */
|
245
|
+
} u;
|
246
|
+
float cost; /* The cost of using this expression.
|
247
|
+
Only used for non-terminals */
|
248
|
+
};
|
249
|
+
|
250
|
+
struct E_list_struct
|
251
|
+
{
|
252
|
+
E_list * next;
|
253
|
+
Exp * e;
|
254
|
+
};
|
255
|
+
|
256
|
+
/* The structure below stores a list of dictionary word files. */
|
257
|
+
struct Word_file_struct
|
258
|
+
{
|
259
|
+
char file[MAX_PATH_NAME+1]; /* the file name */
|
260
|
+
int changed; /* TRUE if this file has been changed */
|
261
|
+
Word_file * next;
|
262
|
+
};
|
263
|
+
|
264
|
+
/**
|
265
|
+
* The dictionary is stored as a binary tree comprised of the following
|
266
|
+
* nodes. A list of these (via right pointers) is used to return
|
267
|
+
* the result of a dictionary lookup.
|
268
|
+
*/
|
269
|
+
struct Dict_node_struct
|
270
|
+
{
|
271
|
+
const char * string; /* the word itself */
|
272
|
+
Word_file * file; /* the file the word came from (NULL if dict file) */
|
273
|
+
Exp * exp;
|
274
|
+
Dict_node *left, *right;
|
275
|
+
};
|
276
|
+
|
277
|
+
/* The regexs are stored as a linked list of the following nodes. */
|
278
|
+
struct Regex_node_s
|
279
|
+
{
|
280
|
+
char *name; /* The identifying name of the regex */
|
281
|
+
char *pattern; /* The regular expression pattern */
|
282
|
+
void *re; /* The compiled regex. void * to avoid
|
283
|
+
* having re library details invading the
|
284
|
+
* rest of the LG system; regex-morph.c
|
285
|
+
* takes care of all matching.
|
286
|
+
*/
|
287
|
+
Regex_node *next;
|
288
|
+
};
|
289
|
+
|
290
|
+
|
291
|
+
/* The following three structs comprise what is returned by post_process(). */
|
292
|
+
typedef struct D_type_list_struct D_type_list;
|
293
|
+
struct D_type_list_struct
|
294
|
+
{
|
295
|
+
D_type_list * next;
|
296
|
+
int type;
|
297
|
+
};
|
298
|
+
|
299
|
+
typedef struct PP_node_struct PP_node;
|
300
|
+
struct PP_node_struct
|
301
|
+
{
|
302
|
+
D_type_list *d_type_array[MAX_LINKS];
|
303
|
+
const char *violation;
|
304
|
+
};
|
305
|
+
|
306
|
+
/* Davy added these */
|
307
|
+
|
308
|
+
typedef struct Andlist_struct Andlist;
|
309
|
+
struct Andlist_struct
|
310
|
+
{
|
311
|
+
Andlist * next;
|
312
|
+
int conjunction;
|
313
|
+
int num_elements;
|
314
|
+
int element[MAX_SENTENCE];
|
315
|
+
int num_outside_words;
|
316
|
+
int outside_word[MAX_SENTENCE];
|
317
|
+
int cost;
|
318
|
+
};
|
319
|
+
|
320
|
+
/**
|
321
|
+
* This is for building the graphs of links in post-processing and
|
322
|
+
* fat link extraction.
|
323
|
+
*/
|
324
|
+
struct Linkage_info_struct
|
325
|
+
{
|
326
|
+
int index;
|
327
|
+
Boolean fat;
|
328
|
+
Boolean canonical;
|
329
|
+
Boolean improper_fat_linkage;
|
330
|
+
Boolean inconsistent_domains;
|
331
|
+
short N_violations;
|
332
|
+
short null_cost, unused_word_cost, and_cost, link_cost;
|
333
|
+
float disjunct_cost;
|
334
|
+
double corpus_cost;
|
335
|
+
Andlist * andlist;
|
336
|
+
int island[MAX_SENTENCE];
|
337
|
+
size_t nwords;
|
338
|
+
char **disjunct_list_str;
|
339
|
+
#ifdef USE_CORPUS
|
340
|
+
Sense **sense_list;
|
341
|
+
#endif
|
342
|
+
};
|
343
|
+
|
344
|
+
struct List_o_links_struct
|
345
|
+
{
|
346
|
+
int link; /* the link number */
|
347
|
+
int word; /* the word at the other end of this link */
|
348
|
+
int dir; /* 0: undirected, 1: away from me, -1: toward me */
|
349
|
+
List_o_links * next;
|
350
|
+
};
|
351
|
+
|
352
|
+
/* These parameters tell power_pruning, to tell whether this is before
|
353
|
+
* or after generating and disjuncts. GENTLE is before RUTHLESS is
|
354
|
+
* after.
|
355
|
+
*/
|
356
|
+
#define GENTLE 1
|
357
|
+
#define RUTHLESS 0
|
358
|
+
|
359
|
+
typedef struct string_node_struct String_node;
|
360
|
+
struct string_node_struct
|
361
|
+
{
|
362
|
+
char * string;
|
363
|
+
int size;
|
364
|
+
String_node * next;
|
365
|
+
};
|
366
|
+
|
367
|
+
typedef struct Parse_choice_struct Parse_choice;
|
368
|
+
|
369
|
+
struct Link_s
|
370
|
+
{
|
371
|
+
int l, r;
|
372
|
+
Connector * lc, * rc;
|
373
|
+
const char * name; /* spelling of full link name */
|
374
|
+
};
|
375
|
+
|
376
|
+
struct Parse_choice_struct
|
377
|
+
{
|
378
|
+
Parse_choice * next;
|
379
|
+
Parse_set * set[2];
|
380
|
+
Link link[2]; /* the lc fields of these is NULL if there is no link used */
|
381
|
+
Disjunct *ld, *md, *rd; /* the chosen disjuncts for the relevant three words */
|
382
|
+
};
|
383
|
+
|
384
|
+
struct Parse_set_struct
|
385
|
+
{
|
386
|
+
s64 count; /* the number of ways */
|
387
|
+
Parse_choice * first;
|
388
|
+
Parse_choice * current; /* used to enumerate linkages */
|
389
|
+
};
|
390
|
+
|
391
|
+
struct X_table_connector_struct
|
392
|
+
{
|
393
|
+
short lw, rw;
|
394
|
+
short cost;
|
395
|
+
Parse_set *set;
|
396
|
+
Connector *le, *re;
|
397
|
+
X_table_connector *next;
|
398
|
+
};
|
399
|
+
|
400
|
+
/* from string-set.c */
|
401
|
+
struct String_set_s
|
402
|
+
{
|
403
|
+
int size; /* the current size of the table */
|
404
|
+
int count; /* number of things currently in the table */
|
405
|
+
char ** table; /* the table itself */
|
406
|
+
};
|
407
|
+
|
408
|
+
|
409
|
+
/* from pp_linkset.c */
|
410
|
+
typedef struct pp_linkset_node_s
|
411
|
+
{
|
412
|
+
const char *str;
|
413
|
+
struct pp_linkset_node_s *next;
|
414
|
+
} pp_linkset_node;
|
415
|
+
|
416
|
+
typedef struct pp_linkset_s
|
417
|
+
{
|
418
|
+
int hash_table_size;
|
419
|
+
int population;
|
420
|
+
pp_linkset_node **hash_table; /* data actually lives here */
|
421
|
+
} pp_linkset;
|
422
|
+
|
423
|
+
|
424
|
+
/* from pp_lexer.c */
|
425
|
+
#define PP_LEXER_MAX_LABELS 512
|
426
|
+
|
427
|
+
typedef struct pp_label_node_s
|
428
|
+
{
|
429
|
+
/* linked list of strings associated with a label in the table */
|
430
|
+
const char *str;
|
431
|
+
struct pp_label_node_s *next;
|
432
|
+
} pp_label_node; /* next=NULL: end of list */
|
433
|
+
|
434
|
+
|
435
|
+
typedef struct PPLexTable_s
|
436
|
+
{
|
437
|
+
String_set *string_set;
|
438
|
+
const char *labels[PP_LEXER_MAX_LABELS]; /* array of labels */
|
439
|
+
pp_label_node *nodes_of_label[PP_LEXER_MAX_LABELS]; /*str. for each label*/
|
440
|
+
pp_label_node *last_node_of_label[PP_LEXER_MAX_LABELS]; /* efficiency */
|
441
|
+
pp_label_node *current_node_of_active_label;/* state: curr node of label */
|
442
|
+
int idx_of_active_label; /* read state: current label */
|
443
|
+
} PPLexTable;
|
444
|
+
|
445
|
+
/* from pp_knowledge.c */
|
446
|
+
typedef struct StartingLinkAndDomain_s
|
447
|
+
{
|
448
|
+
const char *starting_link;
|
449
|
+
int domain; /* domain which the link belongs to (-1: terminator)*/
|
450
|
+
} StartingLinkAndDomain;
|
451
|
+
|
452
|
+
typedef struct pp_rule_s
|
453
|
+
{
|
454
|
+
/* Holds a single post-processing rule. Since rules come in many
|
455
|
+
flavors, not all fields of the following are always relevant */
|
456
|
+
const char *selector; /* name of link to which rule applies */
|
457
|
+
int domain; /* type of domain to which rule applies */
|
458
|
+
pp_linkset *link_set; /* handle to set of links relevant to rule */
|
459
|
+
int link_set_size; /* size of this set */
|
460
|
+
const char **link_array; /* array holding the spelled-out names */
|
461
|
+
const char *msg; /* explanation (NULL=end sentinel in array)*/
|
462
|
+
} pp_rule;
|
463
|
+
|
464
|
+
struct pp_knowledge_s
|
465
|
+
{
|
466
|
+
PPLexTable *lt; /* Internal rep'n of sets of strings from knowledge file */
|
467
|
+
const char *path; /* Name of file we loaded from */
|
468
|
+
|
469
|
+
/* handles to sets of links specified in knowledge file. These constitute
|
470
|
+
auxiliary data, necessary to implement the rules, below. See comments
|
471
|
+
in post-process.c for a description of these. */
|
472
|
+
pp_linkset *domain_starter_links;
|
473
|
+
pp_linkset *urfl_domain_starter_links;
|
474
|
+
pp_linkset *urfl_only_domain_starter_links;
|
475
|
+
pp_linkset *domain_contains_links;
|
476
|
+
pp_linkset *must_form_a_cycle_links;
|
477
|
+
pp_linkset *restricted_links;
|
478
|
+
pp_linkset *ignore_these_links;
|
479
|
+
pp_linkset *left_domain_starter_links;
|
480
|
+
|
481
|
+
/* arrays of rules specified in knowledge file */
|
482
|
+
pp_rule *connected_rules, *form_a_cycle_rules;
|
483
|
+
pp_rule *contains_one_rules, *contains_none_rules;
|
484
|
+
pp_rule *bounded_rules;
|
485
|
+
|
486
|
+
int n_connected_rules, n_form_a_cycle_rules;
|
487
|
+
int n_contains_one_rules, n_contains_none_rules;
|
488
|
+
int n_bounded_rules;
|
489
|
+
|
490
|
+
pp_linkset *set_of_links_starting_bounded_domain;
|
491
|
+
StartingLinkAndDomain *starting_link_lookup_table;
|
492
|
+
int nStartingLinks;
|
493
|
+
String_set *string_set;
|
494
|
+
};
|
495
|
+
|
496
|
+
|
497
|
+
#endif
|
498
|
+
|