grammar_cop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,281 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
#ifndef _LINK_GRAMMAR_UTILITIES_H_
|
14
|
+
#define _LINK_GRAMMAR_UTILITIES_H_
|
15
|
+
|
16
|
+
#ifdef __CYGWIN__
|
17
|
+
#define _WIN32 1
|
18
|
+
#endif /* __CYGWIN__ */
|
19
|
+
|
20
|
+
#ifndef _WIN32
|
21
|
+
#include <langinfo.h>
|
22
|
+
#endif
|
23
|
+
|
24
|
+
#include <ctype.h>
|
25
|
+
#include <stdio.h>
|
26
|
+
#include <stdlib.h>
|
27
|
+
#include <string.h>
|
28
|
+
|
29
|
+
#ifndef __CYGWIN__
|
30
|
+
/* I was told that cygwin does not have these files. */
|
31
|
+
#include <wchar.h>
|
32
|
+
#include <wctype.h>
|
33
|
+
#endif
|
34
|
+
|
35
|
+
#if defined(__CYGWIN__) && defined(__MINGW32__)
|
36
|
+
/* Some users have CygWin and MinGW installed!
|
37
|
+
* In this case, use the MinGW versions of UTF-8 support. */
|
38
|
+
#include <wchar.h>
|
39
|
+
#include <wctype.h>
|
40
|
+
#endif
|
41
|
+
|
42
|
+
#include "error.h"
|
43
|
+
|
44
|
+
|
45
|
+
#ifdef _WIN32
|
46
|
+
#include <windows.h>
|
47
|
+
|
48
|
+
#ifdef _MSC_VER
|
49
|
+
/* The Microsoft Visual C compiler doesn't support the "inline" keyword. */
|
50
|
+
#define inline
|
51
|
+
|
52
|
+
/* MS Visual C does not have any function normally found in strings.h */
|
53
|
+
/* In particular, be careful to avoid including strings.h */
|
54
|
+
|
55
|
+
/* MS Visual C uses non-standard string function names */
|
56
|
+
#define snprintf _snprintf
|
57
|
+
#define vsnprintf _vsnprintf
|
58
|
+
#define strcasecmp _stricmp
|
59
|
+
#define strdup _strdup
|
60
|
+
#define strncasecmp(a,b,s) strnicmp((a),(b),(s))
|
61
|
+
|
62
|
+
/* MS Visual C does not support some C99 standard floating-point functions */
|
63
|
+
#define fmaxf(a,b) ((a) > (b) ? (a) : (b))
|
64
|
+
|
65
|
+
#endif /* _MSC_VER */
|
66
|
+
|
67
|
+
/* Appearently, MinGW is also missing a variety of standard fuctions.
|
68
|
+
* Not surprising, since MinGW is intended for compiling Windows
|
69
|
+
* programs on Windows.
|
70
|
+
* MINGW is also known as MSYS */
|
71
|
+
#if defined(_MSC_VER) || defined(__MINGW32__)
|
72
|
+
|
73
|
+
/* No langinfo in Windows or MinGW */
|
74
|
+
#define nl_langinfo(X) ""
|
75
|
+
|
76
|
+
/* strtok_r is missing in Windows */
|
77
|
+
char * strtok_r (char *s, const char *delim, char **saveptr);
|
78
|
+
|
79
|
+
/* Windows doesn't have a thread-safe rand (???) */
|
80
|
+
/* Surely not, there must be something */
|
81
|
+
/* XXX FIXME -- this breaks thread safety on windows */
|
82
|
+
#define rand_r(seedp) rand()
|
83
|
+
#endif /* _MSC_VER || __MINGW32__ */
|
84
|
+
|
85
|
+
/*
|
86
|
+
* CYGWIN on Windows doesn't have UTF8 support, or wide chars ...
|
87
|
+
* However, MS Visual C appearently does, as does MinGW. Since
|
88
|
+
* some users have both cygwin and MinGW installed, crap out the
|
89
|
+
* UTF8 code only when MinGW is missing.
|
90
|
+
*/
|
91
|
+
#if defined (__CYGWIN__) && !defined(__MINGW32__)
|
92
|
+
#define mbstate_t char
|
93
|
+
#define mbrtowc(w,s,n,x) ({*((char *)(w)) = *(s); 1;})
|
94
|
+
#define wcrtomb(s,w,x) ({*((char *)(s)) = ((char)(w)); 1;})
|
95
|
+
#define iswupper isupper
|
96
|
+
#define iswalpha isalpha
|
97
|
+
#define iswdigit isdigit
|
98
|
+
#define iswspace isspace
|
99
|
+
#define wchar_t char
|
100
|
+
#define wint_t int
|
101
|
+
#define fgetwc fgetc
|
102
|
+
#define WEOF EOF
|
103
|
+
#define towlower tolower
|
104
|
+
#define towupper toupper
|
105
|
+
#endif /* __CYGWIN__ and not __MINGW32__ */
|
106
|
+
|
107
|
+
#endif /* _WIN32 */
|
108
|
+
|
109
|
+
#if defined(__sun__)
|
110
|
+
int strncasecmp(const char *s1, const char *s2, size_t n);
|
111
|
+
/* This does not appear to be in string.h header file in sunos
|
112
|
+
(Or in linux when I compile with -ansi) */
|
113
|
+
#endif
|
114
|
+
|
115
|
+
#ifndef FALSE
|
116
|
+
#define FALSE 0
|
117
|
+
#endif
|
118
|
+
|
119
|
+
#ifndef TRUE
|
120
|
+
#define TRUE 1
|
121
|
+
#endif
|
122
|
+
|
123
|
+
#define assert(ex,string) { \
|
124
|
+
if (!(ex)) { \
|
125
|
+
prt_error("Assertion failed: %s\n", string); \
|
126
|
+
exit(1); \
|
127
|
+
} \
|
128
|
+
}
|
129
|
+
|
130
|
+
#if !defined(MIN)
|
131
|
+
#define MIN(X,Y) ( ((X) < (Y)) ? (X) : (Y))
|
132
|
+
#endif
|
133
|
+
#if !defined(MAX)
|
134
|
+
#define MAX(X,Y) ( ((X) > (Y)) ? (X) : (Y))
|
135
|
+
#endif
|
136
|
+
|
137
|
+
|
138
|
+
static inline int wctomb_check(char *s, wchar_t wc, mbstate_t *ps)
|
139
|
+
{
|
140
|
+
int nr = wcrtomb(s, wc, ps);
|
141
|
+
if (nr < 0) {
|
142
|
+
prt_error("Fatal Error: unknown character set %s\n", nl_langinfo(CODESET));
|
143
|
+
exit(1);
|
144
|
+
}
|
145
|
+
return nr;
|
146
|
+
}
|
147
|
+
|
148
|
+
static inline int is_utf8_upper(const char *s)
|
149
|
+
{
|
150
|
+
mbstate_t mbs;
|
151
|
+
wchar_t c;
|
152
|
+
int nbytes;
|
153
|
+
|
154
|
+
memset(&mbs, 0, sizeof(mbs));
|
155
|
+
nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
|
156
|
+
if (iswupper(c)) return nbytes;
|
157
|
+
return 0;
|
158
|
+
}
|
159
|
+
|
160
|
+
static inline int is_utf8_alpha(const char *s)
|
161
|
+
{
|
162
|
+
mbstate_t mbs;
|
163
|
+
wchar_t c;
|
164
|
+
int nbytes;
|
165
|
+
|
166
|
+
memset(&mbs, 0, sizeof(mbs));
|
167
|
+
nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
|
168
|
+
if (iswalpha(c)) return nbytes;
|
169
|
+
return 0;
|
170
|
+
}
|
171
|
+
|
172
|
+
static inline int is_utf8_digit(const char *s)
|
173
|
+
{
|
174
|
+
mbstate_t mbs;
|
175
|
+
wchar_t c;
|
176
|
+
int nbytes;
|
177
|
+
|
178
|
+
memset(&mbs, 0, sizeof(mbs));
|
179
|
+
nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
|
180
|
+
if (iswdigit(c)) return nbytes;
|
181
|
+
return 0;
|
182
|
+
}
|
183
|
+
|
184
|
+
static inline int is_utf8_space(const char *s)
|
185
|
+
{
|
186
|
+
mbstate_t mbs;
|
187
|
+
wchar_t c;
|
188
|
+
int nbytes;
|
189
|
+
|
190
|
+
memset(&mbs, 0, sizeof(mbs));
|
191
|
+
nbytes = mbrtowc(&c, s, MB_CUR_MAX, &mbs);
|
192
|
+
if (iswspace(c)) return nbytes;
|
193
|
+
return 0;
|
194
|
+
}
|
195
|
+
|
196
|
+
static inline const char * skip_utf8_upper(const char * s)
|
197
|
+
{
|
198
|
+
int nb = is_utf8_upper(s);
|
199
|
+
while (nb)
|
200
|
+
{
|
201
|
+
s += nb;
|
202
|
+
nb = is_utf8_upper(s);
|
203
|
+
}
|
204
|
+
return s;
|
205
|
+
}
|
206
|
+
|
207
|
+
/**
|
208
|
+
* Return true if the intial upper-case letters of the
|
209
|
+
* two input strings match. Comparison stops when
|
210
|
+
* both srings descend to lowercase.
|
211
|
+
*/
|
212
|
+
static inline int utf8_upper_match(const char * s, const char * t)
|
213
|
+
{
|
214
|
+
mbstate_t mbs, mbt;
|
215
|
+
wchar_t ws, wt;
|
216
|
+
int ns, nt;
|
217
|
+
|
218
|
+
memset(&mbs, 0, sizeof(mbs));
|
219
|
+
memset(&mbt, 0, sizeof(mbt));
|
220
|
+
|
221
|
+
ns = mbrtowc(&ws, s, MB_CUR_MAX, &mbs);
|
222
|
+
nt = mbrtowc(&wt, t, MB_CUR_MAX, &mbt);
|
223
|
+
while (iswupper(ws) || iswupper(wt))
|
224
|
+
{
|
225
|
+
if (ws != wt) return FALSE;
|
226
|
+
s += ns;
|
227
|
+
t += nt;
|
228
|
+
ns = mbrtowc(&ws, s, MB_CUR_MAX, &mbs);
|
229
|
+
nt = mbrtowc(&wt, t, MB_CUR_MAX, &mbt);
|
230
|
+
}
|
231
|
+
return TRUE;
|
232
|
+
}
|
233
|
+
|
234
|
+
void downcase_utf8_str(char *to, const char * from, size_t usize);
|
235
|
+
void upcase_utf8_str(char *to, const char * from, size_t usize);
|
236
|
+
|
237
|
+
size_t lg_strlcpy(char * dest, const char *src, size_t size);
|
238
|
+
void safe_strcpy(char *u, const char * v, size_t usize);
|
239
|
+
void safe_strcat(char *u, const char *v, size_t usize);
|
240
|
+
char *safe_strdup(const char *u);
|
241
|
+
|
242
|
+
void left_print_string(FILE* fp, const char *, const char *);
|
243
|
+
|
244
|
+
/* routines for allocating basic objects */
|
245
|
+
void init_memusage(void);
|
246
|
+
void * xalloc(size_t);
|
247
|
+
void * xrealloc(void *, size_t oldsize, size_t newsize);
|
248
|
+
void * exalloc(size_t);
|
249
|
+
|
250
|
+
#define TRACK_SPACE_USAGE
|
251
|
+
#ifdef TRACK_SPACE_USAGE
|
252
|
+
void xfree(void *, size_t);
|
253
|
+
void exfree(void *, size_t);
|
254
|
+
#else /* TRACK_SPACE_USAGE */
|
255
|
+
static inline void xfree(void *p, size_t sz) { free(p); }
|
256
|
+
static inline void exfree(void *p, size_t sz) { free(p); };
|
257
|
+
#endif /* TRACK_SPACE_USAGE */
|
258
|
+
|
259
|
+
size_t get_space_in_use(void);
|
260
|
+
size_t get_max_space_used(void);
|
261
|
+
|
262
|
+
|
263
|
+
char * get_default_locale(void);
|
264
|
+
char * join_path(const char * prefix, const char * suffix);
|
265
|
+
|
266
|
+
FILE * dictopen(const char *filename, const char *how);
|
267
|
+
void * object_open(const char *filename,
|
268
|
+
void * (*opencb)(const char *, void *),
|
269
|
+
void * user_data);
|
270
|
+
|
271
|
+
/**
|
272
|
+
* Returns the smallest power of two that is at least i and at least 1
|
273
|
+
*/
|
274
|
+
static inline int next_power_of_two_up(int i)
|
275
|
+
{
|
276
|
+
int j=1;
|
277
|
+
while(j<i) j = j<<1;
|
278
|
+
return j;
|
279
|
+
}
|
280
|
+
|
281
|
+
#endif
|
@@ -0,0 +1,124 @@
|
|
1
|
+
/***************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/***************************************************************************/
|
13
|
+
|
14
|
+
#include <wchar.h>
|
15
|
+
#include <wctype.h>
|
16
|
+
#include "api.h"
|
17
|
+
#include "error.h"
|
18
|
+
|
19
|
+
/**
|
20
|
+
* Reads in one word from the file, allocates space for it,
|
21
|
+
* and returns it.
|
22
|
+
*/
|
23
|
+
static const char * get_a_word(Dictionary dict, FILE * fp)
|
24
|
+
{
|
25
|
+
char word[MAX_WORD+4]; /* allow for 4-byte wide chars */
|
26
|
+
const char * s;
|
27
|
+
wint_t c;
|
28
|
+
mbstate_t mbss;
|
29
|
+
int j;
|
30
|
+
|
31
|
+
do {
|
32
|
+
c = fgetwc(fp);
|
33
|
+
} while ((c != WEOF) && iswspace(c));
|
34
|
+
if (c == WEOF) return NULL;
|
35
|
+
|
36
|
+
memset(&mbss, 0, sizeof(mbss));
|
37
|
+
for (j=0; (j <= MAX_WORD-1) && (!iswspace(c)) && (c != WEOF);)
|
38
|
+
{
|
39
|
+
j += wctomb_check(&word[j], c, &mbss);
|
40
|
+
c = fgetwc(fp);
|
41
|
+
}
|
42
|
+
|
43
|
+
if (j >= MAX_WORD) {
|
44
|
+
word[MAX_WORD] = 0x0;
|
45
|
+
prt_error("Fatal Error: The dictionary contains a word that "
|
46
|
+
"is too long. The word was: %s", word);
|
47
|
+
exit(1);
|
48
|
+
}
|
49
|
+
word[j] = '\0';
|
50
|
+
s = string_set_add(word, dict->string_set);
|
51
|
+
return s;
|
52
|
+
}
|
53
|
+
|
54
|
+
/**
|
55
|
+
*
|
56
|
+
* (1) opens the word file and adds it to the word file list
|
57
|
+
* (2) reads in the words
|
58
|
+
* (3) puts each word in a Dict_node
|
59
|
+
* (4) links these together by their left pointers at the
|
60
|
+
* front of the list pointed to by dn
|
61
|
+
* (5) returns a pointer to the first of this list
|
62
|
+
*/
|
63
|
+
Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename)
|
64
|
+
{
|
65
|
+
Dict_node * dn_new;
|
66
|
+
Word_file * wf;
|
67
|
+
FILE * fp;
|
68
|
+
const char * s;
|
69
|
+
char file_name_copy[MAX_PATH_NAME+1];
|
70
|
+
|
71
|
+
safe_strcpy(file_name_copy, filename+1, sizeof(file_name_copy)); /* get rid of leading '/' */
|
72
|
+
|
73
|
+
if ((fp = dictopen(file_name_copy, "r")) == NULL) {
|
74
|
+
prt_error("Error opening word file %s\n", file_name_copy);
|
75
|
+
return NULL;
|
76
|
+
}
|
77
|
+
|
78
|
+
/*printf(" Reading \"%s\"\n", file_name_copy);*/
|
79
|
+
/*printf("*"); fflush(stdout);*/
|
80
|
+
|
81
|
+
wf = (Word_file *) xalloc(sizeof (Word_file));
|
82
|
+
safe_strcpy(wf->file, file_name_copy, sizeof(wf->file));
|
83
|
+
wf->changed = FALSE;
|
84
|
+
wf->next = dict->word_file_header;
|
85
|
+
dict->word_file_header = wf;
|
86
|
+
|
87
|
+
while ((s = get_a_word(dict, fp)) != NULL) {
|
88
|
+
dn_new = (Dict_node *) xalloc(sizeof(Dict_node));
|
89
|
+
dn_new->left = dn;
|
90
|
+
dn = dn_new;
|
91
|
+
dn->string = s;
|
92
|
+
dn->file = wf;
|
93
|
+
}
|
94
|
+
fclose(fp);
|
95
|
+
return dn;
|
96
|
+
}
|
97
|
+
|
98
|
+
void save_files(Dictionary dict)
|
99
|
+
{
|
100
|
+
Word_file *wf;
|
101
|
+
FILE *fp;
|
102
|
+
for (wf = dict->word_file_header; wf != NULL; wf = wf->next) {
|
103
|
+
if (wf->changed) {
|
104
|
+
if ((fp = fopen(wf->file, "w")) == NULL) {
|
105
|
+
printf("\nCannot open %s. Gee, this shouldn't happen.\n", wf->file);
|
106
|
+
printf("file not saved\n");
|
107
|
+
return;
|
108
|
+
}
|
109
|
+
printf(" saving file \"%s\"\n", wf->file);
|
110
|
+
/*output_dictionary(dict_root, fp, wf);*/
|
111
|
+
fclose(fp);
|
112
|
+
wf->changed = FALSE;
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
int files_need_saving(Dictionary dict)
|
118
|
+
{
|
119
|
+
Word_file *wf;
|
120
|
+
for (wf = dict->word_file_header; wf != NULL; wf = wf->next) {
|
121
|
+
if (wf->changed) return TRUE;
|
122
|
+
}
|
123
|
+
return FALSE;
|
124
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
int files_need_saving(Dictionary dict);
|
14
|
+
void save_files(Dictionary dict);
|
15
|
+
Dict_node * read_word_file(Dictionary dict, Dict_node * dn, char * filename);
|
@@ -0,0 +1,526 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
/*
|
14
|
+
* Miscellaneous utilities for dealing with word types.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include <math.h>
|
18
|
+
#include <stdio.h>
|
19
|
+
|
20
|
+
#include "api.h"
|
21
|
+
#include "disjunct-utils.h"
|
22
|
+
#include "word-utils.h"
|
23
|
+
|
24
|
+
/* ======================================================== */
|
25
|
+
/* Exp utilities ... */
|
26
|
+
|
27
|
+
void free_E_list(E_list *);
|
28
|
+
void free_Exp(Exp * e)
|
29
|
+
{
|
30
|
+
if (e->type != CONNECTOR_type) {
|
31
|
+
free_E_list(e->u.l);
|
32
|
+
}
|
33
|
+
xfree((char *)e, sizeof(Exp));
|
34
|
+
}
|
35
|
+
|
36
|
+
void free_E_list(E_list * l)
|
37
|
+
{
|
38
|
+
if (l == NULL) return;
|
39
|
+
free_E_list(l->next);
|
40
|
+
free_Exp(l->e);
|
41
|
+
xfree((char *)l, sizeof(E_list));
|
42
|
+
}
|
43
|
+
|
44
|
+
/* Returns the number of connectors in the expression e */
|
45
|
+
int size_of_expression(Exp * e)
|
46
|
+
{
|
47
|
+
int size;
|
48
|
+
E_list * l;
|
49
|
+
if (e->type == CONNECTOR_type) return 1;
|
50
|
+
size = 0;
|
51
|
+
for (l=e->u.l; l!=NULL; l=l->next) {
|
52
|
+
size += size_of_expression(l->e);
|
53
|
+
}
|
54
|
+
return size;
|
55
|
+
}
|
56
|
+
|
57
|
+
/**
|
58
|
+
* Build a copy of the given expression (don't copy strings, of course)
|
59
|
+
*/
|
60
|
+
static E_list * copy_E_list(E_list * l);
|
61
|
+
Exp * copy_Exp(Exp * e)
|
62
|
+
{
|
63
|
+
Exp * n;
|
64
|
+
if (e == NULL) return NULL;
|
65
|
+
n = (Exp *) xalloc(sizeof(Exp));
|
66
|
+
*n = *e;
|
67
|
+
if (e->type != CONNECTOR_type) {
|
68
|
+
n->u.l = copy_E_list(e->u.l);
|
69
|
+
}
|
70
|
+
return n;
|
71
|
+
}
|
72
|
+
|
73
|
+
static E_list * copy_E_list(E_list * l)
|
74
|
+
{
|
75
|
+
E_list * nl;
|
76
|
+
if (l == NULL) return NULL;
|
77
|
+
nl = (E_list *) xalloc(sizeof(E_list));
|
78
|
+
nl->next = copy_E_list(l->next);
|
79
|
+
nl->e = copy_Exp(l->e);
|
80
|
+
return nl;
|
81
|
+
}
|
82
|
+
|
83
|
+
/**
|
84
|
+
* Compare two expressions, return 1 for equal, 0 for unequal
|
85
|
+
*/
|
86
|
+
static int exp_compare(Exp * e1, Exp * e2)
|
87
|
+
{
|
88
|
+
E_list *el1, *el2;
|
89
|
+
|
90
|
+
if ((e1 == NULL) && (e2 == NULL))
|
91
|
+
return 1; /* they are equal */
|
92
|
+
if ((e1 == NULL) || (e2 == NULL))
|
93
|
+
return 0; /* they are not equal */
|
94
|
+
if (e1->type != e2->type)
|
95
|
+
return 0;
|
96
|
+
if (fabs (e1->cost - e2->cost) > 0.001)
|
97
|
+
return 0;
|
98
|
+
if (e1->type == CONNECTOR_type)
|
99
|
+
{
|
100
|
+
if (e1->dir != e2->dir)
|
101
|
+
return 0;
|
102
|
+
/* printf("%s %s\n",e1->u.string,e2->u.string); */
|
103
|
+
if (strcmp(e1->u.string,e2->u.string)!=0)
|
104
|
+
return 0;
|
105
|
+
}
|
106
|
+
else
|
107
|
+
{
|
108
|
+
el1 = e1->u.l;
|
109
|
+
el2 = e2->u.l;
|
110
|
+
/* while at least 1 is non-null */
|
111
|
+
for (;(el1!=NULL)||(el2!=NULL);) {
|
112
|
+
/*fail if 1 is null */
|
113
|
+
if ((el1==NULL)||(el2==NULL))
|
114
|
+
return 0;
|
115
|
+
/* fail if they are not compared */
|
116
|
+
if (exp_compare(el1->e, el2->e) == 0)
|
117
|
+
return 0;
|
118
|
+
if (el1!=NULL)
|
119
|
+
el1 = el1->next;
|
120
|
+
if (el2!=NULL)
|
121
|
+
el2 = el2->next;
|
122
|
+
}
|
123
|
+
}
|
124
|
+
return 1; /* if never returned 0, return 1 */
|
125
|
+
}
|
126
|
+
|
127
|
+
/**
|
128
|
+
* Sub-expression matcher -- return 1 if sub is non-NULL and
|
129
|
+
* contained in super, 0 otherwise.
|
130
|
+
*/
|
131
|
+
static int exp_contains(Exp * super, Exp * sub)
|
132
|
+
{
|
133
|
+
E_list * el;
|
134
|
+
|
135
|
+
#ifdef DEBUG
|
136
|
+
printf("SUP: ");
|
137
|
+
if (super) print_expression(super);
|
138
|
+
printf("\n");
|
139
|
+
#endif
|
140
|
+
|
141
|
+
if (sub==NULL || super==NULL)
|
142
|
+
return 0;
|
143
|
+
if (exp_compare(sub,super)==1)
|
144
|
+
return 1;
|
145
|
+
if (super->type==CONNECTOR_type)
|
146
|
+
return 0; /* super is a leaf */
|
147
|
+
|
148
|
+
/* proceed through supers children and return 1 if sub
|
149
|
+
is contained in any of them */
|
150
|
+
for(el = super->u.l; el!=NULL; el=el->next) {
|
151
|
+
if (exp_contains(el->e, sub)==1)
|
152
|
+
return 1;
|
153
|
+
}
|
154
|
+
return 0;
|
155
|
+
}
|
156
|
+
|
157
|
+
/* ======================================================== */
|
158
|
+
/* X_node utilities ... */
|
159
|
+
/**
|
160
|
+
* frees the list of X_nodes pointed to by x, and all of the expressions
|
161
|
+
*/
|
162
|
+
void free_X_nodes(X_node * x)
|
163
|
+
{
|
164
|
+
X_node * y;
|
165
|
+
for (; x!= NULL; x = y) {
|
166
|
+
y = x->next;
|
167
|
+
free_Exp(x->exp);
|
168
|
+
xfree((char *)x, sizeof(X_node));
|
169
|
+
}
|
170
|
+
}
|
171
|
+
|
172
|
+
/**
|
173
|
+
* Destructively catenates the two disjunct lists d1 followed by d2.
|
174
|
+
* Doesn't change the contents of the disjuncts.
|
175
|
+
* Traverses the first list, but not the second.
|
176
|
+
*/
|
177
|
+
X_node * catenate_X_nodes(X_node *d1, X_node *d2)
|
178
|
+
{
|
179
|
+
X_node * dis = d1;
|
180
|
+
|
181
|
+
if (d1 == NULL) return d2;
|
182
|
+
if (d2 == NULL) return d1;
|
183
|
+
while (dis->next != NULL) dis = dis->next;
|
184
|
+
dis->next = d2;
|
185
|
+
return d1;
|
186
|
+
}
|
187
|
+
|
188
|
+
/* ======================================================== */
|
189
|
+
/* Connector utilities ... */
|
190
|
+
|
191
|
+
/**
|
192
|
+
* free_connectors() -- free the list of connectors pointed to by e
|
193
|
+
* (does not free any strings)
|
194
|
+
*/
|
195
|
+
void free_connectors(Connector *e)
|
196
|
+
{
|
197
|
+
Connector * n;
|
198
|
+
for (; e != NULL; e = n)
|
199
|
+
{
|
200
|
+
n = e->next;
|
201
|
+
xfree((char *)e, sizeof(Connector));
|
202
|
+
}
|
203
|
+
}
|
204
|
+
|
205
|
+
void exfree_connectors(Connector *e)
|
206
|
+
{
|
207
|
+
Connector * n;
|
208
|
+
for(;e != NULL; e = n) {
|
209
|
+
n = e->next;
|
210
|
+
exfree((void *) e->string, sizeof(char)*(strlen(e->string)+1));
|
211
|
+
exfree(e, sizeof(Connector));
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
Connector * connector_new(void)
|
216
|
+
{
|
217
|
+
Connector *c = (Connector *) xalloc(sizeof(Connector));
|
218
|
+
c->length_limit = UNLIMITED_LEN;
|
219
|
+
c->string = "";
|
220
|
+
c->label = NORMAL_LABEL;
|
221
|
+
c->hash = -1;
|
222
|
+
c->priority = THIN_priority;
|
223
|
+
c->multi = FALSE;
|
224
|
+
c->next = NULL;
|
225
|
+
c->tableNext = NULL;
|
226
|
+
return c;
|
227
|
+
}
|
228
|
+
|
229
|
+
Connector * init_connector(Connector *c)
|
230
|
+
{
|
231
|
+
c->hash = -1;
|
232
|
+
c->length_limit = UNLIMITED_LEN;
|
233
|
+
return c;
|
234
|
+
}
|
235
|
+
|
236
|
+
/**
|
237
|
+
* This builds a new copy of the connector list pointed to by c.
|
238
|
+
* Strings, as usual, are not copied.
|
239
|
+
*/
|
240
|
+
Connector * copy_connectors(Connector * c)
|
241
|
+
{
|
242
|
+
Connector *c1;
|
243
|
+
if (c == NULL) return NULL;
|
244
|
+
c1 = connector_new();
|
245
|
+
*c1 = *c;
|
246
|
+
c1->next = copy_connectors(c->next);
|
247
|
+
return c1;
|
248
|
+
}
|
249
|
+
|
250
|
+
Connector * excopy_connectors(Connector * c)
|
251
|
+
{
|
252
|
+
char * s;
|
253
|
+
Connector *c1;
|
254
|
+
|
255
|
+
if (c == NULL) return NULL;
|
256
|
+
|
257
|
+
c1 = connector_new();
|
258
|
+
*c1 = *c;
|
259
|
+
s = (char *) exalloc(sizeof(char)*(strlen(c->string)+1));
|
260
|
+
strcpy(s, c->string);
|
261
|
+
c1->string = s;
|
262
|
+
c1->next = excopy_connectors(c->next);
|
263
|
+
|
264
|
+
return c1;
|
265
|
+
}
|
266
|
+
|
267
|
+
/* ======================================================== */
|
268
|
+
/* Link utilities ... */
|
269
|
+
|
270
|
+
Link * excopy_link(Link * l)
|
271
|
+
{
|
272
|
+
char * s;
|
273
|
+
Link * newl;
|
274
|
+
|
275
|
+
if (l == NULL) return NULL;
|
276
|
+
|
277
|
+
newl = (Link *) exalloc(sizeof(Link));
|
278
|
+
s = (char *) exalloc(sizeof(char)*(strlen(l->name)+1));
|
279
|
+
strcpy(s, l->name);
|
280
|
+
newl->name = s;
|
281
|
+
newl->l = l->l;
|
282
|
+
newl->r = l->r;
|
283
|
+
newl->lc = excopy_connectors(l->lc);
|
284
|
+
newl->rc = excopy_connectors(l->rc);
|
285
|
+
|
286
|
+
return newl;
|
287
|
+
}
|
288
|
+
|
289
|
+
void exfree_link(Link * l)
|
290
|
+
{
|
291
|
+
exfree_connectors(l->rc);
|
292
|
+
exfree_connectors(l->lc);
|
293
|
+
exfree((void *)l->name, sizeof(char)*(strlen(l->name)+1));
|
294
|
+
exfree(l, sizeof(Link));
|
295
|
+
}
|
296
|
+
|
297
|
+
/* ======================================================== */
|
298
|
+
/* Connector-set utilities ... */
|
299
|
+
/**
|
300
|
+
* This hash function only looks at the leading upper case letters of
|
301
|
+
* the string, and the direction, '+' or '-'.
|
302
|
+
*/
|
303
|
+
static int connector_set_hash(Connector_set *conset, const char * s, int d)
|
304
|
+
{
|
305
|
+
unsigned int i;
|
306
|
+
/* djb2 hash */
|
307
|
+
i = 5381;
|
308
|
+
i = ((i << 5) + i) + d;
|
309
|
+
while (isupper((int) *s)) /* connector tables cannot contain UTF8, yet */
|
310
|
+
{
|
311
|
+
i = ((i << 5) + i) + *s;
|
312
|
+
s++;
|
313
|
+
}
|
314
|
+
return (i & (conset->table_size-1));
|
315
|
+
}
|
316
|
+
|
317
|
+
static void build_connector_set_from_expression(Connector_set * conset, Exp * e)
|
318
|
+
{
|
319
|
+
E_list * l;
|
320
|
+
Connector * c;
|
321
|
+
int h;
|
322
|
+
if (e->type == CONNECTOR_type)
|
323
|
+
{
|
324
|
+
c = connector_new();
|
325
|
+
c->string = e->u.string;
|
326
|
+
c->word = e->dir; /* just use the word field to give the dir */
|
327
|
+
h = connector_set_hash(conset, c->string, c->word);
|
328
|
+
c->next = conset->hash_table[h];
|
329
|
+
conset->hash_table[h] = c;
|
330
|
+
} else {
|
331
|
+
for (l=e->u.l; l!=NULL; l=l->next) {
|
332
|
+
build_connector_set_from_expression(conset, l->e);
|
333
|
+
}
|
334
|
+
}
|
335
|
+
}
|
336
|
+
|
337
|
+
Connector_set * connector_set_create(Exp *e)
|
338
|
+
{
|
339
|
+
int i;
|
340
|
+
Connector_set *conset;
|
341
|
+
|
342
|
+
conset = (Connector_set *) xalloc(sizeof(Connector_set));
|
343
|
+
conset->table_size = next_power_of_two_up(size_of_expression(e));
|
344
|
+
conset->hash_table =
|
345
|
+
(Connector **) xalloc(conset->table_size * sizeof(Connector *));
|
346
|
+
for (i=0; i<conset->table_size; i++) conset->hash_table[i] = NULL;
|
347
|
+
build_connector_set_from_expression(conset, e);
|
348
|
+
return conset;
|
349
|
+
}
|
350
|
+
|
351
|
+
void connector_set_delete(Connector_set * conset)
|
352
|
+
{
|
353
|
+
int i;
|
354
|
+
if (conset == NULL) return;
|
355
|
+
for (i=0; i<conset->table_size; i++) free_connectors(conset->hash_table[i]);
|
356
|
+
xfree(conset->hash_table, conset->table_size * sizeof(Connector *));
|
357
|
+
xfree(conset, sizeof(Connector_set));
|
358
|
+
}
|
359
|
+
|
360
|
+
/**
|
361
|
+
* Returns TRUE the given connector is in this conset. FALSE otherwise.
|
362
|
+
* d='+' means this connector is on the right side of the disjunct.
|
363
|
+
* d='-' means this connector is on the left side of the disjunct.
|
364
|
+
*/
|
365
|
+
int match_in_connector_set(Sentence sent, Connector_set *conset, Connector * c, int d)
|
366
|
+
{
|
367
|
+
int h;
|
368
|
+
Connector * c1;
|
369
|
+
if (conset == NULL) return FALSE;
|
370
|
+
h = connector_set_hash(conset, c->string, d);
|
371
|
+
for (c1 = conset->hash_table[h]; c1 != NULL; c1 = c1->next)
|
372
|
+
{
|
373
|
+
if (x_match(sent, c1, c) && (d == c1->word)) return TRUE;
|
374
|
+
}
|
375
|
+
return FALSE;
|
376
|
+
}
|
377
|
+
|
378
|
+
/* ======================================================== */
|
379
|
+
/* More connector utilities ... */
|
380
|
+
|
381
|
+
/**
|
382
|
+
* This is like the basic "match" function in count.c - the basic
|
383
|
+
* connector-matching function used in parsing - except it ignores
|
384
|
+
* "priority" (used to handle fat links)
|
385
|
+
*/
|
386
|
+
static int easy_match(const char * s, const char * t)
|
387
|
+
{
|
388
|
+
while(isupper((int)*s) || isupper((int)*t)) {
|
389
|
+
if (*s != *t) return FALSE;
|
390
|
+
s++;
|
391
|
+
t++;
|
392
|
+
}
|
393
|
+
|
394
|
+
while ((*s!='\0') && (*t!='\0')) {
|
395
|
+
if ((*s == '*') || (*t == '*') ||
|
396
|
+
((*s == *t) && (*s != '^'))) {
|
397
|
+
s++;
|
398
|
+
t++;
|
399
|
+
} else return FALSE;
|
400
|
+
}
|
401
|
+
return TRUE;
|
402
|
+
}
|
403
|
+
|
404
|
+
/**
|
405
|
+
* word_has_connector() -- return TRUE if dictionary expression has connector
|
406
|
+
* This function takes a dict_node (corresponding to an entry in a
|
407
|
+
* given dictionary), a string (representing a connector), and a
|
408
|
+
* direction (0 = right-pointing, 1 = left-pointing); it returns 1
|
409
|
+
* if the dictionary expression for the word includes the connector,
|
410
|
+
* 0 otherwise. This can be used to see if a word is in a certain
|
411
|
+
* category (checking for a category connector in a table), or to see
|
412
|
+
* if a word has a connector in a normal dictionary. The connector
|
413
|
+
* check uses a "smart-match", the same kind used by the parser.
|
414
|
+
*/
|
415
|
+
int word_has_connector(Dict_node * dn, const char * cs, int direction)
|
416
|
+
{
|
417
|
+
Connector * c2=NULL;
|
418
|
+
Disjunct * d, *d0;
|
419
|
+
if(dn == NULL) return -1;
|
420
|
+
d0 = d = build_disjuncts_for_dict_node(dn);
|
421
|
+
if(d == NULL) return 0;
|
422
|
+
for(; d!=NULL; d=d->next) {
|
423
|
+
if(direction==0) c2 = d->right;
|
424
|
+
if(direction==1) c2 = d->left;
|
425
|
+
for(; c2!=NULL; c2=c2->next) {
|
426
|
+
if(easy_match(c2->string, cs)==1) {
|
427
|
+
free_disjuncts(d0);
|
428
|
+
return 1;
|
429
|
+
}
|
430
|
+
}
|
431
|
+
}
|
432
|
+
free_disjuncts(d0);
|
433
|
+
return 0;
|
434
|
+
}
|
435
|
+
|
436
|
+
/* ======================================================== */
|
437
|
+
/* Dictionary utilities ... */
|
438
|
+
|
439
|
+
static int dn_word_contains(Dictionary dict,
|
440
|
+
Dict_node * w_dn, const char * macro)
|
441
|
+
{
|
442
|
+
Exp * m_exp;
|
443
|
+
Dict_node *m_dn;
|
444
|
+
|
445
|
+
if (w_dn == NULL) return 0;
|
446
|
+
|
447
|
+
m_dn = dictionary_lookup_list(dict, macro);
|
448
|
+
if (m_dn == NULL) return 0;
|
449
|
+
|
450
|
+
m_exp = m_dn->exp;
|
451
|
+
free_lookup_list(m_dn);
|
452
|
+
|
453
|
+
#ifdef DEBUG
|
454
|
+
printf("\nWORD: ");
|
455
|
+
print_expression(w_dn->exp);
|
456
|
+
printf("\nMACR: ");
|
457
|
+
print_expression(m_exp);
|
458
|
+
printf("\n");
|
459
|
+
#endif
|
460
|
+
|
461
|
+
for (;w_dn != NULL; w_dn = w_dn->right)
|
462
|
+
{
|
463
|
+
if (1 == exp_contains(w_dn->exp, m_exp))
|
464
|
+
return 1;
|
465
|
+
}
|
466
|
+
return 0;
|
467
|
+
}
|
468
|
+
|
469
|
+
/**
|
470
|
+
* word_contains: return true if the word may involve application of
|
471
|
+
* a rule.
|
472
|
+
*
|
473
|
+
* @return: true if word's expression contains macro's expression,
|
474
|
+
* false otherwise.
|
475
|
+
*/
|
476
|
+
int word_contains(Dictionary dict, const char * word, const char * macro)
|
477
|
+
{
|
478
|
+
Dict_node *w_dn;
|
479
|
+
int ret;
|
480
|
+
w_dn = abridged_lookup_list(dict, word);
|
481
|
+
ret = dn_word_contains(dict, w_dn, macro);
|
482
|
+
free_lookup_list(w_dn);
|
483
|
+
return ret;
|
484
|
+
}
|
485
|
+
|
486
|
+
Dict_node * list_whole_dictionary(Dict_node *root, Dict_node *dn)
|
487
|
+
{
|
488
|
+
Dict_node *c, *d;
|
489
|
+
if (root == NULL) return dn;
|
490
|
+
c = (Dict_node *) xalloc(sizeof(Dict_node));
|
491
|
+
*c = *root;
|
492
|
+
d = list_whole_dictionary(root->left, dn);
|
493
|
+
c->right = list_whole_dictionary(root->right, d);
|
494
|
+
return c;
|
495
|
+
}
|
496
|
+
|
497
|
+
#define PAST_TENSE_FORM_MARKER "<marker-past>"
|
498
|
+
#define ENTITY_MARKER "<marker-entity>"
|
499
|
+
#define COMMON_ENTITY_MARKER "<marker-common-entity>"
|
500
|
+
|
501
|
+
/* This is exported to public API (for Java)
|
502
|
+
* @deprecated -- past-tense verbs are tagged with .v-d or .w-d or .q-d
|
503
|
+
* subscripts. use those instead to figure out if a verb is past tense.
|
504
|
+
*/
|
505
|
+
int dictionary_is_past_tense_form(Dictionary dict, const char * str)
|
506
|
+
{
|
507
|
+
if (word_contains(dict, str, PAST_TENSE_FORM_MARKER) == 1)
|
508
|
+
return 1;
|
509
|
+
return 0;
|
510
|
+
}
|
511
|
+
|
512
|
+
/**
|
513
|
+
* dictionary_is_entity - Return true if word is entity.
|
514
|
+
* Entities are proper names (geographical names,
|
515
|
+
* names of people), street addresses, phone numbers,
|
516
|
+
* etc.
|
517
|
+
*/
|
518
|
+
/* This is exported to public API (for Java) */
|
519
|
+
int dictionary_is_entity(Dictionary dict, const char * str)
|
520
|
+
{
|
521
|
+
if (word_contains(dict, str, ENTITY_MARKER) == 1)
|
522
|
+
return 1;
|
523
|
+
return 0;
|
524
|
+
}
|
525
|
+
|
526
|
+
/* ========================= END OF FILE ============================== */
|