grammar_cop 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,281 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
#ifndef _LINK_GRAMMAR_UTILITIES_H_
|
14
|
+
#define _LINK_GRAMMAR_UTILITIES_H_
|
15
|
+
|
16
|
+
#ifdef __CYGWIN__
|
17
|
+
#define _WIN32 1
|
18
|
+
#endif /* __CYGWIN__ */
|
19
|
+
|
20
|
+
#ifndef _WIN32
|
21
|
+
#include <langinfo.h>
|
22
|
+
#endif
|
23
|
+
|
24
|
+
#include <ctype.h>
|
25
|
+
#include <stdio.h>
|
26
|
+
#include <stdlib.h>
|
27
|
+
#include <string.h>
|
28
|
+
|
29
|
+
#ifndef __CYGWIN__
|
30
|
+
/* I was told that cygwin does not have these files. */
|
31
|
+
#include <wchar.h>
|
32
|
+
#include <wctype.h>
|
33
|
+
#endif
|
34
|
+
|
35
|
+
#if defined(__CYGWIN__) && defined(__MINGW32__)
|
36
|
+
/* Some users have CygWin and MinGW installed!
|
37
|
+
* In this case, use the MinGW versions of UTF-8 support. */
|
38
|
+
#include <wchar.h>
|
39
|
+
#include <wctype.h>
|
40
|
+
#endif
|
41
|
+
|
42
|
+
#include "error.h"
|
43
|
+
|
44
|
+
|
45
|
+
#ifdef _WIN32
|
46
|
+
#include <windows.h>
|
47
|
+
|
48
|
+
#ifdef _MSC_VER
|
49
|
+
/* The Microsoft Visual C compiler doesn't support the "inline" keyword. */
|
50
|
+
#define inline
|
51
|
+
|
52
|
+
/* MS Visual C does not have any function normally found in strings.h */
|
53
|
+
/* In particular, be careful to avoid including strings.h */
|
54
|
+
|
55
|
+
/* MS Visual C uses non-standard string function names */
|
56
|
+
#define snprintf _snprintf
|
57
|
+
#define vsnprintf _vsnprintf
|
58
|
+
#define strcasecmp _stricmp
|
59
|
+
#define strdup _strdup
|
60
|
+
#define strncasecmp(a,b,s) strnicmp((a),(b),(s))
|
61
|
+
|
62
|
+
/* MS Visual C does not support some C99 standard floating-point functions */
|
63
|
+
#define fmaxf(a,b) ((a) > (b) ? (a) : (b))
|
64
|
+
|
65
|
+
#endif /* _MSC_VER */
|
66
|
+
|
67
|
+
/* Appearently, MinGW is also missing a variety of standard fuctions.
|
68
|
+
* Not surprising, since MinGW is intended for compiling Windows
|
69
|
+
* programs on Windows.
|
70
|
+
* MINGW is also known as MSYS */
|
71
|
+
#if defined(_MSC_VER) || defined(__MINGW32__)
|
72
|
+
|
73
|
+
/* No langinfo in Windows or MinGW */
|
74
|
+
#define nl_langinfo(X) ""
|
75
|
+
|
76
|
+
/* strtok_r is missing in Windows */
|
77
|
+
char * strtok_r (char *s, const char *delim, char **saveptr);
|
78
|
+
|
79
|
+
/* Windows doesn't have a thread-safe rand (???) */
|
80
|
+
/* Surely not, there must be something */
|
81
|
+
/* XXX FIXME -- this breaks thread safety on windows */
|
82
|
+
#define rand_r(seedp) rand()
|
83
|
+
#endif /* _MSC_VER || __MINGW32__ */
|
84
|
+
|
85
|
+
/*
|
86
|
+
* CYGWIN on Windows doesn't have UTF8 support, or wide chars ...
|
87
|
+
* However, MS Visual C appearently does, as does MinGW. Since
|
88
|
+
* some users have both cygwin and MinGW installed, crap out the
|
89
|
+
* UTF8 code only when MinGW is missing.
|
90
|
+
*/
|
91
|
+
#if defined (__CYGWIN__) && !defined(__MINGW32__)
|
92
|
+
#define mbstate_t char
|
93
|
+
#define mbrtowc(w,s,n,x) ({*((char *)(w)) = *(s); 1;})
|
94
|
+
#define wcrtomb(s,w,x) ({*((char *)(s)) = ((char)(w)); 1;})
|
95
|
+
#define iswupper isupper
|
96
|
+
#define iswalpha isalpha
|
97
|
+
#define iswdigit isdigit
|
98
|
+
#define iswspace isspace
|
99
|
+
#define wchar_t char
|
100
|
+
#define wint_t int
|
101
|
+
#define fgetwc fgetc
|
102
|
+
#define WEOF EOF
|
103
|
+
#define towlower tolower
|
104
|
+
#define towupper toupper
|
105
|
+
#endif /* __CYGWIN__ and not __MINGW32__ */
|
106
|
+
|
107
|
+
#endif /* _WIN32 */
|
108
|
+
|
109
|
+
#if defined(__sun__)
|
110
|
+
int strncasecmp(const char *s1, const char *s2, size_t n);
|
111
|
+
/* This does not appear to be in string.h header file in sunos
|
112
|
+
(Or in linux when I compile with -ansi) */
|
113
|
+
#endif
|
114
|
+
|
115
|
+
#ifndef FALSE
|
116
|
+
#define FALSE 0
|
117
|
+
#endif
|
118
|
+
|
119
|
+
#ifndef TRUE
|
120
|
+
#define TRUE 1
|
121
|
+
#endif
|
122
|
+
|
123
|
+
#define assert(ex,string) { \
|
124
|
+
if (!(ex)) { \
|
125
|
+
prt_error("Assertion failed: %s\n", string); \
|
126
|
+
exit(1); \
|
127
|
+
} \
|
128
|
+
}
|
129
|
+
|
130
|
+
#if !defined(MIN)
|
131
|
+
#define MIN(X,Y) ( ((X) < (Y)) ? (X) : (Y))
|
132
|
+
#endif
|
133
|
+
#if !defined(MAX)
|
134
|
+
#define MAX(X,Y) ( ((X) > (Y)) ? (X) : (Y))
|
135
|
+
#endif
|
136
|
+
|
137
|
+
|
138
|
+
static inline int wctomb_check(char *s, wchar_t wc, mbstate_t *ps)
|
139
|
+
{
|
140
|
+
int nr = wcrtomb(s, wc, ps);
|
141
|
+
if (nr < 0) {
|
142
|
+
prt_error("Fatal Error: unknown character set %s\n", nl_langinfo(CODESET));
|
143
|
+
exit(1);
|
144
|
+
}
|
145
|
+
return nr;
|
146
|
+
}
|
147
|
+
|
148
|
+
static inline int is_utf8_upper(const char *s)
|
149
|
+
{
|
150
|
+
mbstate_t mbs;
|
151
|
+
wchar_t c;
|
152
|
+
int nbytes;
|
153
|
+
|
154
|
+
memset(&mbs, 0, sizeof(mbs));
|
155
|
+
nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
|
156
|
+
if (iswupper(c)) return nbytes;
|
157
|
+
return 0;
|
158
|
+
}
|
159
|
+
|
160
|
+
static inline int is_utf8_alpha(const char *s)
|
161
|
+
{
|
162
|
+
mbstate_t mbs;
|
163
|
+
wchar_t c;
|
164
|
+
int nbytes;
|
165
|
+
|
166
|
+
memset(&mbs, 0, sizeof(mbs));
|
167
|
+
nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
|
168
|
+
if (iswalpha(c)) return nbytes;
|
169
|
+
return 0;
|
170
|
+
}
|
171
|
+
|
172
|
+
static inline int is_utf8_digit(const char *s)
|
173
|
+
{
|
174
|
+
mbstate_t mbs;
|
175
|
+
wchar_t c;
|
176
|
+
int nbytes;
|
177
|
+
|
178
|
+
memset(&mbs, 0, sizeof(mbs));
|
179
|
+
nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
|
180
|
+
if (iswdigit(c)) return nbytes;
|
181
|
+
return 0;
|
182
|
+
}
|
183
|
+
|
184
|
+
static inline int is_utf8_space(const char *s)
|
185
|
+
{
|
186
|
+
mbstate_t mbs;
|
187
|
+
wchar_t c;
|
188
|
+
int nbytes;
|
189
|
+
|
190
|
+
memset(&mbs, 0, sizeof(mbs));
|
191
|
+
nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
|
192
|
+
if (iswspace(c)) return nbytes;
|
193
|
+
return 0;
|
194
|
+
}
|
195
|
+
|
196
|
+
static inline const char * skip_utf8_upper(const char * s)
|
197
|
+
{
|
198
|
+
int nb = is_utf8_upper(s);
|
199
|
+
while (nb)
|
200
|
+
{
|
201
|
+
s += nb;
|
202
|
+
nb = is_utf8_upper(s);
|
203
|
+
}
|
204
|
+
return s;
|
205
|
+
}
|
206
|
+
|
207
|
+
/**
|
208
|
+
* Return true if the intial upper-case letters of the
|
209
|
+
* two input strings match. Comparison stops when
|
210
|
+
* both srings descend to lowercase.
|
211
|
+
*/
|
212
|
+
static inline int utf8_upper_match(const char * s, const char * t)
|
213
|
+
{
|
214
|
+
mbstate_t mbs, mbt;
|
215
|
+
wchar_t ws, wt;
|
216
|
+
int ns, nt;
|
217
|
+
|
218
|
+
memset(&mbs, 0, sizeof(mbs));
|
219
|
+
memset(&mbt, 0, sizeof(mbt));
|
220
|
+
|
221
|
+
ns = mbrtowc(&ws, s, MB_CUR_MAX, &mbs);
|
222
|
+
nt = mbrtowc(&wt, t, MB_CUR_MAX, &mbt);
|
223
|
+
while (iswupper(ws) || iswupper(wt))
|
224
|
+
{
|
225
|
+
if (ws != wt) return FALSE;
|
226
|
+
s += ns;
|
227
|
+
t += nt;
|
228
|
+
ns = mbrtowc(&ws, s, MB_CUR_MAX, &mbs);
|
229
|
+
nt = mbrtowc(&wt, t, MB_CUR_MAX, &mbt);
|
230
|
+
}
|
231
|
+
return TRUE;
|
232
|
+
}
|
233
|
+
|
234
|
+
void downcase_utf8_str(char *to, const char * from, size_t usize);
|
235
|
+
void upcase_utf8_str(char *to, const char * from, size_t usize);
|
236
|
+
|
237
|
+
size_t lg_strlcpy(char * dest, const char *src, size_t size);
|
238
|
+
void safe_strcpy(char *u, const char * v, size_t usize);
|
239
|
+
void safe_strcat(char *u, const char *v, size_t usize);
|
240
|
+
char *safe_strdup(const char *u);
|
241
|
+
|
242
|
+
void left_print_string(FILE* fp, const char *, const char *);
|
243
|
+
|
244
|
+
/* routines for allocating basic objects */
|
245
|
+
void init_memusage(void);
|
246
|
+
void * xalloc(size_t);
|
247
|
+
void * xrealloc(void *, size_t oldsize, size_t newsize);
|
248
|
+
void * exalloc(size_t);
|
249
|
+
|
250
|
+
#define TRACK_SPACE_USAGE
|
251
|
+
#ifdef TRACK_SPACE_USAGE
|
252
|
+
void xfree(void *, size_t);
|
253
|
+
void exfree(void *, size_t);
|
254
|
+
#else /* TRACK_SPACE_USAGE */
|
255
|
+
static inline void xfree(void *p, size_t sz) { free(p); }
|
256
|
+
static inline void exfree(void *p, size_t sz) { free(p); };
|
257
|
+
#endif /* TRACK_SPACE_USAGE */
|
258
|
+
|
259
|
+
size_t get_space_in_use(void);
|
260
|
+
size_t get_max_space_used(void);
|
261
|
+
|
262
|
+
|
263
|
+
char * get_default_locale(void);
|
264
|
+
char * join_path(const char * prefix, const char * suffix);
|
265
|
+
|
266
|
+
FILE * dictopen(const char *filename, const char *how);
|
267
|
+
void * object_open(const char *filename,
|
268
|
+
void * (*opencb)(const char *, void *),
|
269
|
+
void * user_data);
|
270
|
+
|
271
|
+
/**
|
272
|
+
* Returns the smallest power of two that is at least i and at least 1
|
273
|
+
*/
|
274
|
+
static inline int next_power_of_two_up(int i)
|
275
|
+
{
|
276
|
+
int j=1;
|
277
|
+
while(j<i) j = j<<1;
|
278
|
+
return j;
|
279
|
+
}
|
280
|
+
|
281
|
+
#endif
|
@@ -0,0 +1,124 @@
|
|
1
|
+
/***************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/***************************************************************************/
|
13
|
+
|
14
|
+
#include <wchar.h>
|
15
|
+
#include <wctype.h>
|
16
|
+
#include "api.h"
|
17
|
+
#include "error.h"
|
18
|
+
|
19
|
+
/**
|
20
|
+
* Reads in one word from the file, allocates space for it,
|
21
|
+
* and returns it.
|
22
|
+
*/
|
23
|
+
static const char * get_a_word(Dictionary dict, FILE * fp)
|
24
|
+
{
|
25
|
+
char word[MAX_WORD+4]; /* allow for 4-byte wide chars */
|
26
|
+
const char * s;
|
27
|
+
wint_t c;
|
28
|
+
mbstate_t mbss;
|
29
|
+
int j;
|
30
|
+
|
31
|
+
do {
|
32
|
+
c = fgetwc(fp);
|
33
|
+
} while ((c != WEOF) && iswspace(c));
|
34
|
+
if (c == WEOF) return NULL;
|
35
|
+
|
36
|
+
memset(&mbss, 0, sizeof(mbss));
|
37
|
+
for (j=0; (j <= MAX_WORD-1) && (!iswspace(c)) && (c != WEOF);)
|
38
|
+
{
|
39
|
+
j += wctomb_check(&word[j], c, &mbss);
|
40
|
+
c = fgetwc(fp);
|
41
|
+
}
|
42
|
+
|
43
|
+
if (j >= MAX_WORD) {
|
44
|
+
word[MAX_WORD] = 0x0;
|
45
|
+
prt_error("Fatal Error: The dictionary contains a word that "
|
46
|
+
"is too long. The word was: %s", word);
|
47
|
+
exit(1);
|
48
|
+
}
|
49
|
+
word[j] = '\0';
|
50
|
+
s = string_set_add(word, dict->string_set);
|
51
|
+
return s;
|
52
|
+
}
|
53
|
+
|
54
|
+
/**
|
55
|
+
*
|
56
|
+
* (1) opens the word file and adds it to the word file list
|
57
|
+
* (2) reads in the words
|
58
|
+
* (3) puts each word in a Dict_node
|
59
|
+
* (4) links these together by their left pointers at the
|
60
|
+
* front of the list pointed to by dn
|
61
|
+
* (5) returns a pointer to the first of this list
|
62
|
+
*/
|
63
|
+
Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename)
|
64
|
+
{
|
65
|
+
Dict_node * dn_new;
|
66
|
+
Word_file * wf;
|
67
|
+
FILE * fp;
|
68
|
+
const char * s;
|
69
|
+
char file_name_copy[MAX_PATH_NAME+1];
|
70
|
+
|
71
|
+
safe_strcpy(file_name_copy, filename+1, sizeof(file_name_copy)); /* get rid of leading '/' */
|
72
|
+
|
73
|
+
if ((fp = dictopen(file_name_copy, "r")) == NULL) {
|
74
|
+
prt_error("Error opening word file %s\n", file_name_copy);
|
75
|
+
return NULL;
|
76
|
+
}
|
77
|
+
|
78
|
+
/*printf(" Reading \"%s\"\n", file_name_copy);*/
|
79
|
+
/*printf("*"); fflush(stdout);*/
|
80
|
+
|
81
|
+
wf = (Word_file *) xalloc(sizeof (Word_file));
|
82
|
+
safe_strcpy(wf->file, file_name_copy, sizeof(wf->file));
|
83
|
+
wf->changed = FALSE;
|
84
|
+
wf->next = dict->word_file_header;
|
85
|
+
dict->word_file_header = wf;
|
86
|
+
|
87
|
+
while ((s = get_a_word(dict, fp)) != NULL) {
|
88
|
+
dn_new = (Dict_node *) xalloc(sizeof(Dict_node));
|
89
|
+
dn_new->left = dn;
|
90
|
+
dn = dn_new;
|
91
|
+
dn->string = s;
|
92
|
+
dn->file = wf;
|
93
|
+
}
|
94
|
+
fclose(fp);
|
95
|
+
return dn;
|
96
|
+
}
|
97
|
+
|
98
|
+
void save_files(Dictionary dict)
|
99
|
+
{
|
100
|
+
Word_file *wf;
|
101
|
+
FILE *fp;
|
102
|
+
for (wf = dict->word_file_header; wf != NULL; wf = wf->next) {
|
103
|
+
if (wf->changed) {
|
104
|
+
if ((fp = fopen(wf->file, "w")) == NULL) {
|
105
|
+
printf("\nCannot open %s. Gee, this shouldn't happen.\n", wf->file);
|
106
|
+
printf("file not saved\n");
|
107
|
+
return;
|
108
|
+
}
|
109
|
+
printf(" saving file \"%s\"\n", wf->file);
|
110
|
+
/*output_dictionary(dict_root, fp, wf);*/
|
111
|
+
fclose(fp);
|
112
|
+
wf->changed = FALSE;
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
int files_need_saving(Dictionary dict)
|
118
|
+
{
|
119
|
+
Word_file *wf;
|
120
|
+
for (wf = dict->word_file_header; wf != NULL; wf = wf->next) {
|
121
|
+
if (wf->changed) return TRUE;
|
122
|
+
}
|
123
|
+
return FALSE;
|
124
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
int files_need_saving(Dictionary dict);
|
14
|
+
void save_files(Dictionary dict);
|
15
|
+
Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename);
|
@@ -0,0 +1,526 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
/*
|
14
|
+
* Miscellaneous utilities for dealing with word types.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include <math.h>
|
18
|
+
#include <stdio.h>
|
19
|
+
|
20
|
+
#include "api.h"
|
21
|
+
#include "disjunct-utils.h"
|
22
|
+
#include "word-utils.h"
|
23
|
+
|
24
|
+
/* ======================================================== */
|
25
|
+
/* Exp utilities ... */
|
26
|
+
|
27
|
+
void free_E_list(E_list *);
|
28
|
+
void free_Exp(Exp * e)
|
29
|
+
{
|
30
|
+
if (e->type != CONNECTOR_type) {
|
31
|
+
free_E_list(e->u.l);
|
32
|
+
}
|
33
|
+
xfree((char *)e, sizeof(Exp));
|
34
|
+
}
|
35
|
+
|
36
|
+
void free_E_list(E_list * l)
|
37
|
+
{
|
38
|
+
if (l == NULL) return;
|
39
|
+
free_E_list(l->next);
|
40
|
+
free_Exp(l->e);
|
41
|
+
xfree((char *)l, sizeof(E_list));
|
42
|
+
}
|
43
|
+
|
44
|
+
/* Returns the number of connectors in the expression e */
|
45
|
+
int size_of_expression(Exp * e)
|
46
|
+
{
|
47
|
+
int size;
|
48
|
+
E_list * l;
|
49
|
+
if (e->type == CONNECTOR_type) return 1;
|
50
|
+
size = 0;
|
51
|
+
for (l=e->u.l; l!=NULL; l=l->next) {
|
52
|
+
size += size_of_expression(l->e);
|
53
|
+
}
|
54
|
+
return size;
|
55
|
+
}
|
56
|
+
|
57
|
+
/**
|
58
|
+
* Build a copy of the given expression (don't copy strings, of course)
|
59
|
+
*/
|
60
|
+
static E_list * copy_E_list(E_list * l);
|
61
|
+
Exp * copy_Exp(Exp * e)
|
62
|
+
{
|
63
|
+
Exp * n;
|
64
|
+
if (e == NULL) return NULL;
|
65
|
+
n = (Exp *) xalloc(sizeof(Exp));
|
66
|
+
*n = *e;
|
67
|
+
if (e->type != CONNECTOR_type) {
|
68
|
+
n->u.l = copy_E_list(e->u.l);
|
69
|
+
}
|
70
|
+
return n;
|
71
|
+
}
|
72
|
+
|
73
|
+
static E_list * copy_E_list(E_list * l)
|
74
|
+
{
|
75
|
+
E_list * nl;
|
76
|
+
if (l == NULL) return NULL;
|
77
|
+
nl = (E_list *) xalloc(sizeof(E_list));
|
78
|
+
nl->next = copy_E_list(l->next);
|
79
|
+
nl->e = copy_Exp(l->e);
|
80
|
+
return nl;
|
81
|
+
}
|
82
|
+
|
83
|
+
/**
|
84
|
+
* Compare two expressions, return 1 for equal, 0 for unequal
|
85
|
+
*/
|
86
|
+
static int exp_compare(Exp * e1, Exp * e2)
|
87
|
+
{
|
88
|
+
E_list *el1, *el2;
|
89
|
+
|
90
|
+
if ((e1 == NULL) && (e2 == NULL))
|
91
|
+
return 1; /* they are equal */
|
92
|
+
if ((e1 == NULL) || (e2 == NULL))
|
93
|
+
return 0; /* they are not equal */
|
94
|
+
if (e1->type != e2->type)
|
95
|
+
return 0;
|
96
|
+
if (fabs (e1->cost - e2->cost) > 0.001)
|
97
|
+
return 0;
|
98
|
+
if (e1->type == CONNECTOR_type)
|
99
|
+
{
|
100
|
+
if (e1->dir != e2->dir)
|
101
|
+
return 0;
|
102
|
+
/* printf("%s %s\n",e1->u.string,e2->u.string); */
|
103
|
+
if (strcmp(e1->u.string,e2->u.string)!=0)
|
104
|
+
return 0;
|
105
|
+
}
|
106
|
+
else
|
107
|
+
{
|
108
|
+
el1 = e1->u.l;
|
109
|
+
el2 = e2->u.l;
|
110
|
+
/* while at least 1 is non-null */
|
111
|
+
for (;(el1!=NULL)||(el2!=NULL);) {
|
112
|
+
/*fail if 1 is null */
|
113
|
+
if ((el1==NULL)||(el2==NULL))
|
114
|
+
return 0;
|
115
|
+
/* fail if they are not compared */
|
116
|
+
if (exp_compare(el1->e, el2->e) == 0)
|
117
|
+
return 0;
|
118
|
+
if (el1!=NULL)
|
119
|
+
el1 = el1->next;
|
120
|
+
if (el2!=NULL)
|
121
|
+
el2 = el2->next;
|
122
|
+
}
|
123
|
+
}
|
124
|
+
return 1; /* if never returned 0, return 1 */
|
125
|
+
}
|
126
|
+
|
127
|
+
/**
|
128
|
+
* Sub-expression matcher -- return 1 if sub is non-NULL and
|
129
|
+
* contained in super, 0 otherwise.
|
130
|
+
*/
|
131
|
+
static int exp_contains(Exp * super, Exp * sub)
|
132
|
+
{
|
133
|
+
E_list * el;
|
134
|
+
|
135
|
+
#ifdef DEBUG
|
136
|
+
printf("SUP: ");
|
137
|
+
if (super) print_expression(super);
|
138
|
+
printf("\n");
|
139
|
+
#endif
|
140
|
+
|
141
|
+
if (sub==NULL || super==NULL)
|
142
|
+
return 0;
|
143
|
+
if (exp_compare(sub,super)==1)
|
144
|
+
return 1;
|
145
|
+
if (super->type==CONNECTOR_type)
|
146
|
+
return 0; /* super is a leaf */
|
147
|
+
|
148
|
+
/* proceed through supers children and return 1 if sub
|
149
|
+
is contained in any of them */
|
150
|
+
for(el = super->u.l; el!=NULL; el=el->next) {
|
151
|
+
if (exp_contains(el->e, sub)==1)
|
152
|
+
return 1;
|
153
|
+
}
|
154
|
+
return 0;
|
155
|
+
}
|
156
|
+
|
157
|
+
/* ======================================================== */
|
158
|
+
/* X_node utilities ... */
|
159
|
+
/**
|
160
|
+
* frees the list of X_nodes pointed to by x, and all of the expressions
|
161
|
+
*/
|
162
|
+
void free_X_nodes(X_node * x)
|
163
|
+
{
|
164
|
+
X_node * y;
|
165
|
+
for (; x!= NULL; x = y) {
|
166
|
+
y = x->next;
|
167
|
+
free_Exp(x->exp);
|
168
|
+
xfree((char *)x, sizeof(X_node));
|
169
|
+
}
|
170
|
+
}
|
171
|
+
|
172
|
+
/**
|
173
|
+
* Destructively catenates the two disjunct lists d1 followed by d2.
|
174
|
+
* Doesn't change the contents of the disjuncts.
|
175
|
+
* Traverses the first list, but not the second.
|
176
|
+
*/
|
177
|
+
X_node * catenate_X_nodes(X_node *d1, X_node *d2)
|
178
|
+
{
|
179
|
+
X_node * dis = d1;
|
180
|
+
|
181
|
+
if (d1 == NULL) return d2;
|
182
|
+
if (d2 == NULL) return d1;
|
183
|
+
while (dis->next != NULL) dis = dis->next;
|
184
|
+
dis->next = d2;
|
185
|
+
return d1;
|
186
|
+
}
|
187
|
+
|
188
|
+
/* ======================================================== */
|
189
|
+
/* Connector utilities ... */
|
190
|
+
|
191
|
+
/**
|
192
|
+
* free_connectors() -- free the list of connectors pointed to by e
|
193
|
+
* (does not free any strings)
|
194
|
+
*/
|
195
|
+
void free_connectors(Connector *e)
|
196
|
+
{
|
197
|
+
Connector * n;
|
198
|
+
for (; e != NULL; e = n)
|
199
|
+
{
|
200
|
+
n = e->next;
|
201
|
+
xfree((char *)e, sizeof(Connector));
|
202
|
+
}
|
203
|
+
}
|
204
|
+
|
205
|
+
void exfree_connectors(Connector *e)
|
206
|
+
{
|
207
|
+
Connector * n;
|
208
|
+
for(;e != NULL; e = n) {
|
209
|
+
n = e->next;
|
210
|
+
exfree((void *) e->string, sizeof(char)*(strlen(e->string)+1));
|
211
|
+
exfree(e, sizeof(Connector));
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
Connector * connector_new(void)
|
216
|
+
{
|
217
|
+
Connector *c = (Connector *) xalloc(sizeof(Connector));
|
218
|
+
c->length_limit = UNLIMITED_LEN;
|
219
|
+
c->string = "";
|
220
|
+
c->label = NORMAL_LABEL;
|
221
|
+
c->hash = -1;
|
222
|
+
c->priority = THIN_priority;
|
223
|
+
c->multi = FALSE;
|
224
|
+
c->next = NULL;
|
225
|
+
c->tableNext = NULL;
|
226
|
+
return c;
|
227
|
+
}
|
228
|
+
|
229
|
+
Connector * init_connector(Connector *c)
|
230
|
+
{
|
231
|
+
c->hash = -1;
|
232
|
+
c->length_limit = UNLIMITED_LEN;
|
233
|
+
return c;
|
234
|
+
}
|
235
|
+
|
236
|
+
/**
|
237
|
+
* This builds a new copy of the connector list pointed to by c.
|
238
|
+
* Strings, as usual, are not copied.
|
239
|
+
*/
|
240
|
+
Connector * copy_connectors(Connector * c)
|
241
|
+
{
|
242
|
+
Connector *c1;
|
243
|
+
if (c == NULL) return NULL;
|
244
|
+
c1 = connector_new();
|
245
|
+
*c1 = *c;
|
246
|
+
c1->next = copy_connectors(c->next);
|
247
|
+
return c1;
|
248
|
+
}
|
249
|
+
|
250
|
+
Connector * excopy_connectors(Connector * c)
|
251
|
+
{
|
252
|
+
char * s;
|
253
|
+
Connector *c1;
|
254
|
+
|
255
|
+
if (c == NULL) return NULL;
|
256
|
+
|
257
|
+
c1 = connector_new();
|
258
|
+
*c1 = *c;
|
259
|
+
s = (char *) exalloc(sizeof(char)*(strlen(c->string)+1));
|
260
|
+
strcpy(s, c->string);
|
261
|
+
c1->string = s;
|
262
|
+
c1->next = excopy_connectors(c->next);
|
263
|
+
|
264
|
+
return c1;
|
265
|
+
}
|
266
|
+
|
267
|
+
/* ======================================================== */
|
268
|
+
/* Link utilities ... */
|
269
|
+
|
270
|
+
Link * excopy_link(Link * l)
|
271
|
+
{
|
272
|
+
char * s;
|
273
|
+
Link * newl;
|
274
|
+
|
275
|
+
if (l == NULL) return NULL;
|
276
|
+
|
277
|
+
newl = (Link *) exalloc(sizeof(Link));
|
278
|
+
s = (char *) exalloc(sizeof(char)*(strlen(l->name)+1));
|
279
|
+
strcpy(s, l->name);
|
280
|
+
newl->name = s;
|
281
|
+
newl->l = l->l;
|
282
|
+
newl->r = l->r;
|
283
|
+
newl->lc = excopy_connectors(l->lc);
|
284
|
+
newl->rc = excopy_connectors(l->rc);
|
285
|
+
|
286
|
+
return newl;
|
287
|
+
}
|
288
|
+
|
289
|
+
void exfree_link(Link * l)
|
290
|
+
{
|
291
|
+
exfree_connectors(l->rc);
|
292
|
+
exfree_connectors(l->lc);
|
293
|
+
exfree((void *)l->name, sizeof(char)*(strlen(l->name)+1));
|
294
|
+
exfree(l, sizeof(Link));
|
295
|
+
}
|
296
|
+
|
297
|
+
/* ======================================================== */
|
298
|
+
/* Connector-set utilities ... */
|
299
|
+
/**
|
300
|
+
* This hash function only looks at the leading upper case letters of
|
301
|
+
* the string, and the direction, '+' or '-'.
|
302
|
+
*/
|
303
|
+
static int connector_set_hash(Connector_set *conset, const char * s, int d)
|
304
|
+
{
|
305
|
+
unsigned int i;
|
306
|
+
/* djb2 hash */
|
307
|
+
i = 5381;
|
308
|
+
i = ((i << 5) + i) + d;
|
309
|
+
while (isupper((int) *s)) /* connector tables cannot contain UTF8, yet */
|
310
|
+
{
|
311
|
+
i = ((i << 5) + i) + *s;
|
312
|
+
s++;
|
313
|
+
}
|
314
|
+
return (i & (conset->table_size-1));
|
315
|
+
}
|
316
|
+
|
317
|
+
static void build_connector_set_from_expression(Connector_set * conset, Exp * e)
|
318
|
+
{
|
319
|
+
E_list * l;
|
320
|
+
Connector * c;
|
321
|
+
int h;
|
322
|
+
if (e->type == CONNECTOR_type)
|
323
|
+
{
|
324
|
+
c = connector_new();
|
325
|
+
c->string = e->u.string;
|
326
|
+
c->word = e->dir; /* just use the word field to give the dir */
|
327
|
+
h = connector_set_hash(conset, c->string, c->word);
|
328
|
+
c->next = conset->hash_table[h];
|
329
|
+
conset->hash_table[h] = c;
|
330
|
+
} else {
|
331
|
+
for (l=e->u.l; l!=NULL; l=l->next) {
|
332
|
+
build_connector_set_from_expression(conset, l->e);
|
333
|
+
}
|
334
|
+
}
|
335
|
+
}
|
336
|
+
|
337
|
+
Connector_set * connector_set_create(Exp *e)
|
338
|
+
{
|
339
|
+
int i;
|
340
|
+
Connector_set *conset;
|
341
|
+
|
342
|
+
conset = (Connector_set *) xalloc(sizeof(Connector_set));
|
343
|
+
conset->table_size = next_power_of_two_up(size_of_expression(e));
|
344
|
+
conset->hash_table =
|
345
|
+
(Connector **) xalloc(conset->table_size * sizeof(Connector *));
|
346
|
+
for (i=0; i<conset->table_size; i++) conset->hash_table[i] = NULL;
|
347
|
+
build_connector_set_from_expression(conset, e);
|
348
|
+
return conset;
|
349
|
+
}
|
350
|
+
|
351
|
+
void connector_set_delete(Connector_set * conset)
|
352
|
+
{
|
353
|
+
int i;
|
354
|
+
if (conset == NULL) return;
|
355
|
+
for (i=0; i<conset->table_size; i++) free_connectors(conset->hash_table[i]);
|
356
|
+
xfree(conset->hash_table, conset->table_size * sizeof(Connector *));
|
357
|
+
xfree(conset, sizeof(Connector_set));
|
358
|
+
}
|
359
|
+
|
360
|
+
/**
|
361
|
+
* Returns TRUE the given connector is in this conset. FALSE otherwise.
|
362
|
+
* d='+' means this connector is on the right side of the disjunct.
|
363
|
+
* d='-' means this connector is on the left side of the disjunct.
|
364
|
+
*/
|
365
|
+
int match_in_connector_set(Sentence sent, Connector_set *conset, Connector * c, int d)
|
366
|
+
{
|
367
|
+
int h;
|
368
|
+
Connector * c1;
|
369
|
+
if (conset == NULL) return FALSE;
|
370
|
+
h = connector_set_hash(conset, c->string, d);
|
371
|
+
for (c1 = conset->hash_table[h]; c1 != NULL; c1 = c1->next)
|
372
|
+
{
|
373
|
+
if (x_match(sent, c1, c) && (d == c1->word)) return TRUE;
|
374
|
+
}
|
375
|
+
return FALSE;
|
376
|
+
}
|
377
|
+
|
378
|
+
/* ======================================================== */
|
379
|
+
/* More connector utilities ... */
|
380
|
+
|
381
|
+
/**
|
382
|
+
* This is like the basic "match" function in count.c - the basic
|
383
|
+
* connector-matching function used in parsing - except it ignores
|
384
|
+
* "priority" (used to handle fat links)
|
385
|
+
*/
|
386
|
+
static int easy_match(const char * s, const char * t)
|
387
|
+
{
|
388
|
+
while(isupper((int)*s) || isupper((int)*t)) {
|
389
|
+
if (*s != *t) return FALSE;
|
390
|
+
s++;
|
391
|
+
t++;
|
392
|
+
}
|
393
|
+
|
394
|
+
while ((*s!='\0') && (*t!='\0')) {
|
395
|
+
if ((*s == '*') || (*t == '*') ||
|
396
|
+
((*s == *t) && (*s != '^'))) {
|
397
|
+
s++;
|
398
|
+
t++;
|
399
|
+
} else return FALSE;
|
400
|
+
}
|
401
|
+
return TRUE;
|
402
|
+
}
|
403
|
+
|
404
|
+
/**
|
405
|
+
* word_has_connector() -- return TRUE if dictionary expression has connector
|
406
|
+
* This function takes a dict_node (corresponding to an entry in a
|
407
|
+
* given dictionary), a string (representing a connector), and a
|
408
|
+
* direction (0 = right-pointing, 1 = left-pointing); it returns 1
|
409
|
+
* if the dictionary expression for the word includes the connector,
|
410
|
+
* 0 otherwise. This can be used to see if a word is in a certain
|
411
|
+
* category (checking for a category connector in a table), or to see
|
412
|
+
* if a word has a connector in a normal dictionary. The connector
|
413
|
+
* check uses a "smart-match", the same kind used by the parser.
|
414
|
+
*/
|
415
|
+
int word_has_connector(Dict_node * dn, const char * cs, int direction)
|
416
|
+
{
|
417
|
+
Connector * c2=NULL;
|
418
|
+
Disjunct * d, *d0;
|
419
|
+
if(dn == NULL) return -1;
|
420
|
+
d0 = d = build_disjuncts_for_dict_node(dn);
|
421
|
+
if(d == NULL) return 0;
|
422
|
+
for(; d!=NULL; d=d->next) {
|
423
|
+
if(direction==0) c2 = d->right;
|
424
|
+
if(direction==1) c2 = d->left;
|
425
|
+
for(; c2!=NULL; c2=c2->next) {
|
426
|
+
if(easy_match(c2->string, cs)==1) {
|
427
|
+
free_disjuncts(d0);
|
428
|
+
return 1;
|
429
|
+
}
|
430
|
+
}
|
431
|
+
}
|
432
|
+
free_disjuncts(d0);
|
433
|
+
return 0;
|
434
|
+
}
|
435
|
+
|
436
|
+
/* ======================================================== */
|
437
|
+
/* Dictionary utilities ... */
|
438
|
+
|
439
|
+
static int dn_word_contains(Dictionary dict,
|
440
|
+
Dict_node * w_dn, const char * macro)
|
441
|
+
{
|
442
|
+
Exp * m_exp;
|
443
|
+
Dict_node *m_dn;
|
444
|
+
|
445
|
+
if (w_dn == NULL) return 0;
|
446
|
+
|
447
|
+
m_dn = dictionary_lookup_list(dict, macro);
|
448
|
+
if (m_dn == NULL) return 0;
|
449
|
+
|
450
|
+
m_exp = m_dn->exp;
|
451
|
+
free_lookup_list(m_dn);
|
452
|
+
|
453
|
+
#ifdef DEBUG
|
454
|
+
printf("\nWORD: ");
|
455
|
+
print_expression(w_dn->exp);
|
456
|
+
printf("\nMACR: ");
|
457
|
+
print_expression(m_exp);
|
458
|
+
printf("\n");
|
459
|
+
#endif
|
460
|
+
|
461
|
+
for (;w_dn != NULL; w_dn = w_dn->right)
|
462
|
+
{
|
463
|
+
if (1 == exp_contains(w_dn->exp, m_exp))
|
464
|
+
return 1;
|
465
|
+
}
|
466
|
+
return 0;
|
467
|
+
}
|
468
|
+
|
469
|
+
/**
|
470
|
+
* word_contains: return true if the word may involve application of
|
471
|
+
* a rule.
|
472
|
+
*
|
473
|
+
* @return: true if word's expression contains macro's expression,
|
474
|
+
* false otherwise.
|
475
|
+
*/
|
476
|
+
int word_contains(Dictionary dict, const char * word, const char * macro)
|
477
|
+
{
|
478
|
+
Dict_node *w_dn;
|
479
|
+
int ret;
|
480
|
+
w_dn = abridged_lookup_list(dict, word);
|
481
|
+
ret = dn_word_contains(dict, w_dn, macro);
|
482
|
+
free_lookup_list(w_dn);
|
483
|
+
return ret;
|
484
|
+
}
|
485
|
+
|
486
|
+
Dict_node * list_whole_dictionary(Dict_node *root, Dict_node *dn)
|
487
|
+
{
|
488
|
+
Dict_node *c, *d;
|
489
|
+
if (root == NULL) return dn;
|
490
|
+
c = (Dict_node *) xalloc(sizeof(Dict_node));
|
491
|
+
*c = *root;
|
492
|
+
d = list_whole_dictionary(root->left, dn);
|
493
|
+
c->right = list_whole_dictionary(root->right, d);
|
494
|
+
return c;
|
495
|
+
}
|
496
|
+
|
497
|
+
#define PAST_TENSE_FORM_MARKER "<marker-past>"
|
498
|
+
#define ENTITY_MARKER "<marker-entity>"
|
499
|
+
#define COMMON_ENTITY_MARKER "<marker-common-entity>"
|
500
|
+
|
501
|
+
/* This is exported to public API (for Java)
|
502
|
+
* @deprecated -- past-tense verbs are tagged with .v-d or .w-d or .q-d
|
503
|
+
* subscripts. use those instead to figure out if a verb is past tense.
|
504
|
+
*/
|
505
|
+
int dictionary_is_past_tense_form(Dictionary dict, const char * str)
|
506
|
+
{
|
507
|
+
if (word_contains(dict, str, PAST_TENSE_FORM_MARKER) == 1)
|
508
|
+
return 1;
|
509
|
+
return 0;
|
510
|
+
}
|
511
|
+
|
512
|
+
/**
|
513
|
+
* dictionary_is_entity - Return true if word is entity.
|
514
|
+
* Entities are proper names (geographical names,
|
515
|
+
* names of people), street addresses, phone numbers,
|
516
|
+
* etc.
|
517
|
+
*/
|
518
|
+
/* This is exported to public API (for Java) */
|
519
|
+
int dictionary_is_entity(Dictionary dict, const char * str)
|
520
|
+
{
|
521
|
+
if (word_contains(dict, str, ENTITY_MARKER) == 1)
|
522
|
+
return 1;
|
523
|
+
return 0;
|
524
|
+
}
|
525
|
+
|
526
|
+
/* ========================= END OF FILE ============================== */
|