grammar_cop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
void prune(Sentence sent);
|
14
|
+
int power_prune(Sentence sent, int mode, Parse_Options opts);
|
15
|
+
void pp_and_power_prune(Sentence sent, int mode, Parse_Options opts);
|
16
|
+
int prune_match(int dist, Connector * left, Connector * right);
|
17
|
+
void expression_prune(Sentence sent);
|
@@ -0,0 +1,1785 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#include <limits.h>
|
15
|
+
#include <string.h>
|
16
|
+
#include <wchar.h>
|
17
|
+
#include <wctype.h>
|
18
|
+
#include "api.h"
|
19
|
+
#include "disjunct-utils.h"
|
20
|
+
#include "error.h"
|
21
|
+
|
22
|
+
const char * linkgrammar_get_version(void)
|
23
|
+
{
|
24
|
+
const char *s = "link-grammar-" LINK_VERSION_STRING;
|
25
|
+
return s;
|
26
|
+
}
|
27
|
+
|
28
|
+
const char * linkgrammar_get_dict_version(Dictionary dict)
|
29
|
+
{
|
30
|
+
static char * ver = NULL;
|
31
|
+
char * p;
|
32
|
+
Dict_node *dn;
|
33
|
+
Exp *e;
|
34
|
+
|
35
|
+
if (ver) return ver;
|
36
|
+
|
37
|
+
/* The newer dictionaries should contain a macro of the form:
|
38
|
+
* <dictionary-version-number>: V4v6v6+;
|
39
|
+
* which would indicate dictionary verison 4.6.6
|
40
|
+
* Older dictionaries contain no version info.
|
41
|
+
*/
|
42
|
+
dn = dictionary_lookup_list(dict, "<dictionary-version-number>");
|
43
|
+
if (NULL == dn) return "[unknown]";
|
44
|
+
|
45
|
+
e = dn->exp;
|
46
|
+
ver = strdup(&e->u.string[1]);
|
47
|
+
p = strchr(ver, 'v');
|
48
|
+
while (p)
|
49
|
+
{
|
50
|
+
*p = '.';
|
51
|
+
p = strchr(p+1, 'v');
|
52
|
+
}
|
53
|
+
|
54
|
+
free_lookup_list(dn);
|
55
|
+
return ver;
|
56
|
+
}
|
57
|
+
|
58
|
+
|
59
|
+
/*
|
60
|
+
The dictionary format:
|
61
|
+
|
62
|
+
In what follows:
|
63
|
+
Every "%" symbol and everything after it is ignored on every line.
|
64
|
+
Every newline or tab is replaced by a space.
|
65
|
+
|
66
|
+
The dictionary file is a sequence of ENTRIES. Each ENTRY is one or
|
67
|
+
more WORDS (a sequence of upper or lower case letters) separated by
|
68
|
+
spaces, followed by a ":", followed by an EXPRESSION followed by a
|
69
|
+
";". An EXPRESSION is an expression where the operators are "&"
|
70
|
+
or "and" or "|" or "or", and there are three types of parentheses:
|
71
|
+
"()", "{}", and "[]". The terminal symbols of this grammar are the
|
72
|
+
connectors, which are strings of letters or numbers or *s.
|
73
|
+
Expressions may be written in prefix or infix form. In prefix-form,
|
74
|
+
the expressions are lisp-like, with the operators &, | preceeding
|
75
|
+
the operands. In infix-form, the operators are in the middle. The
|
76
|
+
current dictionaries are in infix form. If the C preprocessor
|
77
|
+
constant INFIX_NOTATION is defined, then the dictionary is assumed
|
78
|
+
to be in infix form.
|
79
|
+
|
80
|
+
The connector begins with an optinal @, which is followed by an upper
|
81
|
+
case sequence of letters. Each subsequent *, lower case letter or
|
82
|
+
number is a subscript. At the end is a + or - sign. The "@" allows
|
83
|
+
this connector to attach to one or more other connectors.
|
84
|
+
|
85
|
+
Here is a sample dictionary entry (in infix form):
|
86
|
+
|
87
|
+
gone: T- & {@EV+};
|
88
|
+
|
89
|
+
(See our paper for more about how to interpret the meaning of the
|
90
|
+
dictionary expressions.)
|
91
|
+
|
92
|
+
A previously defined word (such as "gone" above) may be used instead
|
93
|
+
of a connector to specify the expression it was defined to be. Of
|
94
|
+
course, in this case, it must uniquely specify a word in the
|
95
|
+
dictionary, and have been previously defined.
|
96
|
+
|
97
|
+
If a word is of the form "/foo", then the file current-dir/foo
|
98
|
+
is a so-called word file, and is read in as a list of words.
|
99
|
+
A word file is just a list of words separted by blanks or newlines.
|
100
|
+
|
101
|
+
A word that contains the character "_" defines an idiomatic use of
|
102
|
+
the words separated by the "_". For example "kind of" is an idiomatic
|
103
|
+
expression, so a word "kind_of" is defined in the dictionary.
|
104
|
+
Idomatic expressions of any number of words can be defined in this way.
|
105
|
+
When the word "kind" is encountered, all the idiomatic uses of the word
|
106
|
+
are considered.
|
107
|
+
|
108
|
+
An expresion enclosed in "[..]" is give a cost of 1. This means
|
109
|
+
that if any of the connectors inside the square braces are used,
|
110
|
+
a cost of 1 is incurred. (This cost is the first element of the cost
|
111
|
+
vector printed when a sentence is parsed.) Of course if something is
|
112
|
+
inside of 10 levels of "[..]" then using it incurs a cost of 10.
|
113
|
+
These costs are called "disjunct costs". The linkages are printed out
|
114
|
+
in order of non-increasing disjunct cost.
|
115
|
+
|
116
|
+
The expression "(A+ or ())" means that you can choose either "A+" or
|
117
|
+
the empty expression "()", that is, that the connector "A+" is
|
118
|
+
optional. This is more compactly expressed as "{A+}". In other words,
|
119
|
+
curly braces indicate an optional expression.
|
120
|
+
|
121
|
+
The expression "(A+ or [])" is the same as that above, but there is a
|
122
|
+
cost of 1 incurred for choosing not to use "A+". The expression
|
123
|
+
"(EXP1 & [EXP2])" is exactly the same as "[EXP1 & EXP2]". The difference
|
124
|
+
between "({[A+]} & B+)" and "([{A+}] & B+)" is that the latter always
|
125
|
+
incurrs a cost of 1, while the former only gets a cost of 1 if "A+" is
|
126
|
+
used.
|
127
|
+
|
128
|
+
The dictionary writer is not allowed to use connectors that begin in
|
129
|
+
"ID". This is reserved for the connectors automatically
|
130
|
+
generated for idioms.
|
131
|
+
|
132
|
+
Dictionary words may be followed by a dot (period, "."), and a "subscript"
|
133
|
+
identifying the word type. The subscript may be one or more letters or
|
134
|
+
numbers, but must begin with a letter. Currently, the dictionary contains
|
135
|
+
(mostly?) subscripts consisting of a single letter, and these serve mostly
|
136
|
+
to identify the part-of-speech. In general, subscripts can also be used
|
137
|
+
to distinguish different word senses.
|
138
|
+
*/
|
139
|
+
|
140
|
+
static int link_advance(Dictionary dict);
|
141
|
+
|
142
|
+
static void dict_error2(Dictionary dict, const char * s, const char *s2)
|
143
|
+
{
|
144
|
+
int i;
|
145
|
+
char tokens[1024], t[128];
|
146
|
+
|
147
|
+
if (dict->recursive_error) return;
|
148
|
+
dict->recursive_error = TRUE;
|
149
|
+
|
150
|
+
tokens[0] = '\0';
|
151
|
+
for (i=0; i<5 && dict->token[0] != '\0' ; i++)
|
152
|
+
{
|
153
|
+
sprintf(t, "\"%s\" ", dict->token);
|
154
|
+
strcat(tokens, t);
|
155
|
+
link_advance(dict);
|
156
|
+
}
|
157
|
+
if (s2)
|
158
|
+
{
|
159
|
+
err_ctxt ec;
|
160
|
+
ec.sent = NULL;
|
161
|
+
err_msg(&ec, Error, "Error parsing dictionary %s.\n"
|
162
|
+
"%s %s\n\t line %d, tokens = %s\n",
|
163
|
+
dict->name,
|
164
|
+
s, s2, dict->line_number, tokens);
|
165
|
+
}
|
166
|
+
else
|
167
|
+
{
|
168
|
+
err_ctxt ec;
|
169
|
+
ec.sent = NULL;
|
170
|
+
err_msg(&ec, Error, "Error parsing dictionary %s.\n"
|
171
|
+
"%s\n\t line %d, tokens = %s\n",
|
172
|
+
dict->name,
|
173
|
+
s, dict->line_number, tokens);
|
174
|
+
}
|
175
|
+
dict->recursive_error = FALSE;
|
176
|
+
}
|
177
|
+
|
178
|
+
static void dict_error(Dictionary dict, const char * s)
|
179
|
+
{
|
180
|
+
dict_error2(dict, s, NULL);
|
181
|
+
}
|
182
|
+
|
183
|
+
static void warning(Dictionary dict, const char * s)
|
184
|
+
{
|
185
|
+
err_ctxt ec;
|
186
|
+
ec.sent = NULL;
|
187
|
+
err_msg(&ec, Warn, "Warning: %s\n"
|
188
|
+
"\tline %d, current token = \"%s\"\n",
|
189
|
+
s, dict->line_number, dict->token);
|
190
|
+
}
|
191
|
+
|
192
|
+
/**
|
193
|
+
* This gets the next character from the input, eliminating comments.
|
194
|
+
* If we're in quote mode, it does not consider the % character for
|
195
|
+
* comments.
|
196
|
+
*/
|
197
|
+
static wint_t get_character(Dictionary dict, int quote_mode)
|
198
|
+
{
|
199
|
+
wint_t c;
|
200
|
+
|
201
|
+
c = fgetwc(dict->fp);
|
202
|
+
if ((c == '%') && (!quote_mode)) {
|
203
|
+
while((c != WEOF) && (c != '\n')) c = fgetwc(dict->fp);
|
204
|
+
}
|
205
|
+
if (c == '\n') dict->line_number++;
|
206
|
+
return c;
|
207
|
+
}
|
208
|
+
|
209
|
+
|
210
|
+
/*
|
211
|
+
* This set of 10 characters are the ones defining the syntax of the
|
212
|
+
* dictionary.
|
213
|
+
*/
|
214
|
+
#define SPECIAL "(){};[]&|:"
|
215
|
+
|
216
|
+
/**
|
217
|
+
* Return true if the input wide-character is one of the special
|
218
|
+
* characters used to define the syntax of the dictionary.
|
219
|
+
*/
|
220
|
+
static int is_special(wint_t wc, mbstate_t *ps)
|
221
|
+
{
|
222
|
+
char buff[MB_LEN_MAX];
|
223
|
+
int nr = wcrtomb(buff, wc, ps);
|
224
|
+
if (1 != nr) return FALSE;
|
225
|
+
return (NULL != strchr(SPECIAL, buff[0]));
|
226
|
+
}
|
227
|
+
|
228
|
+
/**
|
229
|
+
* This reads the next token from the input into token.
|
230
|
+
* Return 1 if a character was read, else return 0 (and print a warning).
|
231
|
+
*/
|
232
|
+
static int link_advance(Dictionary dict)
|
233
|
+
{
|
234
|
+
wint_t c;
|
235
|
+
int nr, i;
|
236
|
+
int quote_mode;
|
237
|
+
|
238
|
+
dict->is_special = FALSE;
|
239
|
+
|
240
|
+
if (dict->already_got_it != '\0')
|
241
|
+
{
|
242
|
+
dict->is_special = is_special(dict->already_got_it, &dict->mbss);
|
243
|
+
if (dict->already_got_it == WEOF) {
|
244
|
+
dict->token[0] = '\0';
|
245
|
+
} else {
|
246
|
+
dict->token[0] = dict->already_got_it; /* specials are one byte */
|
247
|
+
dict->token[1] = '\0';
|
248
|
+
}
|
249
|
+
dict->already_got_it = '\0';
|
250
|
+
return 1;
|
251
|
+
}
|
252
|
+
|
253
|
+
do { c = get_character(dict, FALSE); } while (iswspace(c));
|
254
|
+
|
255
|
+
quote_mode = FALSE;
|
256
|
+
|
257
|
+
i = 0;
|
258
|
+
for (;;)
|
259
|
+
{
|
260
|
+
if (i > MAX_TOKEN_LENGTH-3) { /* 3 for multi-byte tokens */
|
261
|
+
dict_error(dict, "Token too long");
|
262
|
+
return 0;
|
263
|
+
}
|
264
|
+
if (quote_mode) {
|
265
|
+
if (c == '\"') {
|
266
|
+
quote_mode = FALSE;
|
267
|
+
dict->token[i] = '\0';
|
268
|
+
return 1;
|
269
|
+
}
|
270
|
+
if (iswspace(c)) {
|
271
|
+
dict_error(dict, "White space inside of token");
|
272
|
+
return 0;
|
273
|
+
}
|
274
|
+
|
275
|
+
/* Although we read wide chars, we store UTF8 internally, always. */
|
276
|
+
nr = wcrtomb(&dict->token[i], c, &dict->mbss);
|
277
|
+
if (nr < 0) {
|
278
|
+
#ifndef _WIN32
|
279
|
+
dict_error2(dict, "Unable to read UTF8 string in current locale",
|
280
|
+
nl_langinfo(CODESET));
|
281
|
+
fprintf (stderr, "\tTry setting the locale with \"export LANG=en_US.UTF-8\"\n");
|
282
|
+
#else
|
283
|
+
dict_error(dict, "Unable to read UTF8 string in current locale");
|
284
|
+
#endif
|
285
|
+
return 0;
|
286
|
+
}
|
287
|
+
i += nr;
|
288
|
+
} else {
|
289
|
+
if (is_special(c, &dict->mbss))
|
290
|
+
{
|
291
|
+
if (i == 0)
|
292
|
+
{
|
293
|
+
dict->token[0] = c; /* special toks are one char always */
|
294
|
+
dict->token[1] = '\0';
|
295
|
+
dict->is_special = TRUE;
|
296
|
+
return 1;
|
297
|
+
}
|
298
|
+
dict->token[i] = '\0';
|
299
|
+
dict->already_got_it = c;
|
300
|
+
return 1;
|
301
|
+
}
|
302
|
+
if (c == WEOF) {
|
303
|
+
if (i == 0) {
|
304
|
+
dict->token[0] = '\0';
|
305
|
+
return 1;
|
306
|
+
}
|
307
|
+
dict->token[i] = '\0';
|
308
|
+
dict->already_got_it = c;
|
309
|
+
return 1;
|
310
|
+
}
|
311
|
+
if (iswspace(c)) {
|
312
|
+
dict->token[i] = '\0';
|
313
|
+
return 1;
|
314
|
+
}
|
315
|
+
if (c == '\"') {
|
316
|
+
quote_mode = TRUE;
|
317
|
+
} else {
|
318
|
+
/* store UTF8 internally, always. */
|
319
|
+
nr = wctomb_check(&dict->token[i], c, &dict->mbss);
|
320
|
+
if (nr < 0) {
|
321
|
+
#ifndef _WIN32
|
322
|
+
dict_error2(dict, "Unable to read UTF8 string in current locale",
|
323
|
+
nl_langinfo(CODESET));
|
324
|
+
fprintf (stderr, "\tTry setting the locale with \"export LANG=en_US.UTF-8\"\n");
|
325
|
+
#else
|
326
|
+
dict_error(dict, "Unable to read UTF8 string in current locale");
|
327
|
+
#endif
|
328
|
+
return 0;
|
329
|
+
}
|
330
|
+
i += nr;
|
331
|
+
}
|
332
|
+
}
|
333
|
+
c = get_character(dict, quote_mode);
|
334
|
+
}
|
335
|
+
return 1;
|
336
|
+
}
|
337
|
+
|
338
|
+
/**
|
339
|
+
* Returns TRUE if this token is a special token and it is equal to c
|
340
|
+
*/
|
341
|
+
static int is_equal(Dictionary dict, wint_t c)
|
342
|
+
{
|
343
|
+
return (dict->is_special &&
|
344
|
+
wctob(c) == dict->token[0] &&
|
345
|
+
dict->token[1] == '\0');
|
346
|
+
}
|
347
|
+
|
348
|
+
/**
|
349
|
+
* Make sure the string s is a valid connector.
|
350
|
+
* Return 1 if the connector is valid, else return 0,
|
351
|
+
* and print an appropriate warning message.
|
352
|
+
*/
|
353
|
+
static int check_connector(Dictionary dict, const char * s)
|
354
|
+
{
|
355
|
+
int i;
|
356
|
+
i = strlen(s);
|
357
|
+
if (i < 1) {
|
358
|
+
dict_error(dict, "Expecting a connector.");
|
359
|
+
return 0;
|
360
|
+
}
|
361
|
+
i = s[i-1]; /* the last character of the token */
|
362
|
+
if ((i!='+') && (i!='-')) {
|
363
|
+
dict_error(dict, "A connector must end in a \"+\" or \"-\".");
|
364
|
+
return 0;
|
365
|
+
}
|
366
|
+
if (*s == '@') s++;
|
367
|
+
if (!isupper((int)*s)) {
|
368
|
+
dict_error(dict, "The first letter of a connector must be in [A--Z].");
|
369
|
+
return 0;
|
370
|
+
}
|
371
|
+
if ((*s == 'I') && (*(s+1) == 'D')) {
|
372
|
+
dict_error(dict, "Connectors beginning with \"ID\" are forbidden");
|
373
|
+
return 0;
|
374
|
+
}
|
375
|
+
while (*(s+1)) {
|
376
|
+
if ((!isalnum((int)*s)) && (*s != '*') && (*s != '^')) {
|
377
|
+
dict_error(dict, "All letters of a connector must be ASCII alpha-numeric.");
|
378
|
+
return 0;
|
379
|
+
}
|
380
|
+
s++;
|
381
|
+
}
|
382
|
+
return 1;
|
383
|
+
}
|
384
|
+
|
385
|
+
/* ======================================================================== */
|
386
|
+
/**
|
387
|
+
* Dictionary entry comparison and ordering functions.
|
388
|
+
*
|
389
|
+
* The data structure storing the dictionary is simply a binary tree.
|
390
|
+
* The entries in the binary tree are sorted by alphabetical order.
|
391
|
+
* There is one catch, however: words may have suffixes (a dot, followed
|
392
|
+
* by the suffix), and these suffixes are to be handled appripriately
|
393
|
+
* during sorting and comparison.
|
394
|
+
*
|
395
|
+
* The use of suffixes means that the ordering of the words is not
|
396
|
+
* exactly the order given by strcmp. The order must be such that, for
|
397
|
+
* example, "make" < "make.n" < "make-up" -- suffixed words come after
|
398
|
+
* the bare words, but before any other other words with non-ascii-alpha
|
399
|
+
* characters (such as the hyphen in "make-up", or possibly UTF8
|
400
|
+
* characters). Thus, stright "strcmp" can't be used to determine
|
401
|
+
* dictionary order.
|
402
|
+
*
|
403
|
+
* Thus, a set of specialized string comparison and ordering functions
|
404
|
+
* are provided. These "do the right thing" when matching string with
|
405
|
+
* and without suffixes.
|
406
|
+
*/
|
407
|
+
/**
|
408
|
+
* dict_order - order two dictionary words in proper sort order.
|
409
|
+
* Return zero if the strings match, else return standard
|
410
|
+
* (locale-dependent) UTF8 sort order.
|
411
|
+
*/
|
412
|
+
/* verbose version */
|
413
|
+
/*
|
414
|
+
int dict_order(char *s, char *t)
|
415
|
+
{
|
416
|
+
int ss, tt;
|
417
|
+
while (*s != '\0' && *s == *t) {
|
418
|
+
s++;
|
419
|
+
t++;
|
420
|
+
}
|
421
|
+
if (*s == '.') {
|
422
|
+
ss = 1;
|
423
|
+
} else {
|
424
|
+
ss = (*s)<<1;
|
425
|
+
}
|
426
|
+
if (*t == '.') {
|
427
|
+
tt = 1;
|
428
|
+
} else {
|
429
|
+
tt = (*t)<<1;
|
430
|
+
}
|
431
|
+
return (ss - tt);
|
432
|
+
}
|
433
|
+
*/
|
434
|
+
|
435
|
+
/* terse version */
|
436
|
+
static inline int dict_order(const char *s, const char *t)
|
437
|
+
{
|
438
|
+
while (*s != '\0' && *s == *t) {s++; t++;}
|
439
|
+
return (((*s == '.')?(1):((*s)<<1)) - ((*t == '.')?(1):((*t)<<1)));
|
440
|
+
}
|
441
|
+
|
442
|
+
/**
|
443
|
+
* dict_order_wild() -- order dictionary strings, with wildcard.
|
444
|
+
* Assuming that s is a pointer to a dictionary string, and that
|
445
|
+
* t is a pointer to a search string, this returns 0 if they
|
446
|
+
* match, >0 if s>t, and <0 if s<t.
|
447
|
+
*
|
448
|
+
* The matching is done as follows. Walk down the strings until
|
449
|
+
* you come to the end of one of them, or until you find unequal
|
450
|
+
* characters. A "*" matches anything. Otherwise, replace "."
|
451
|
+
* by "\0", and take the difference. This behavior matches that
|
452
|
+
* of the function dict_order().
|
453
|
+
*/
|
454
|
+
static inline int dict_order_wild(const char * s, const char * t)
|
455
|
+
{
|
456
|
+
while((*s != '\0') && (*s == *t)) {s++; t++;}
|
457
|
+
if ((*s == '*') || (*t == '*')) return 0;
|
458
|
+
return (((*s == '.')?('\0'):(*s)) - ((*t == '.')?('\0'):(*t)));
|
459
|
+
}
|
460
|
+
|
461
|
+
/**
|
462
|
+
* dict_match -- return true if strings match, else false.
|
463
|
+
* A "bare" string (one without a suffix) will match any corresponding
|
464
|
+
* string with a suffix; so, for example, "make" and "make.n" are
|
465
|
+
* a match. If both strings have suffixes, then the suffixes must match.
|
466
|
+
*
|
467
|
+
* A subscript is the part that followes the last "." in the word, and
|
468
|
+
* that does not begin with a digit.
|
469
|
+
*/
|
470
|
+
static int dict_match(const char * s, const char * t)
|
471
|
+
{
|
472
|
+
char *ds, *dt;
|
473
|
+
ds = strrchr(s, '.');
|
474
|
+
dt = strrchr(t, '.');
|
475
|
+
|
476
|
+
/* a dot at the end or a dot followed by a number is NOT
|
477
|
+
* considered a subscript */
|
478
|
+
if ((dt != NULL) && ((*(dt+1) == '\0') ||
|
479
|
+
(isdigit((int)*(dt+1))))) dt = NULL;
|
480
|
+
if ((ds != NULL) && ((*(ds+1) == '\0') ||
|
481
|
+
(isdigit((int)*(ds+1))))) ds = NULL;
|
482
|
+
|
483
|
+
/* dt is NULL when there's no prefix ... */
|
484
|
+
if (dt == NULL && ds != NULL) {
|
485
|
+
if (((int)strlen(t)) > (ds-s)) return FALSE; /* we need to do this to ensure that */
|
486
|
+
return (strncmp(s, t, ds-s) == 0); /* "i.e." does not match "i.e" */
|
487
|
+
} else if (dt != NULL && ds == NULL) {
|
488
|
+
if (((int)strlen(s)) > (dt-t)) return FALSE;
|
489
|
+
return (strncmp(s, t, dt-t) == 0);
|
490
|
+
} else {
|
491
|
+
return (strcmp(s, t) == 0);
|
492
|
+
}
|
493
|
+
}
|
494
|
+
|
495
|
+
/* ======================================================================== */
|
496
|
+
|
497
|
+
static inline Dict_node * dict_node_new(void)
|
498
|
+
{
|
499
|
+
return (Dict_node*) xalloc(sizeof(Dict_node));
|
500
|
+
}
|
501
|
+
|
502
|
+
static inline void free_dict_node(Dict_node *dn)
|
503
|
+
{
|
504
|
+
xfree((char *)dn, sizeof(Dict_node));
|
505
|
+
}
|
506
|
+
|
507
|
+
/**
|
508
|
+
* prune_lookup_list -- discard all list entries that don't match string
|
509
|
+
* Walk the lookup list (of right links), discarding all nodes that do
|
510
|
+
* not match the dictionary string s. The matching is dictionary matching:
|
511
|
+
* suffixed entries will match "bare" entries.
|
512
|
+
*/
|
513
|
+
static Dict_node * prune_lookup_list(Dict_node *llist, const char * s)
|
514
|
+
{
|
515
|
+
Dict_node *dn, *dnx, *list_new;
|
516
|
+
|
517
|
+
list_new = NULL;
|
518
|
+
for (dn = llist; dn != NULL; dn = dnx)
|
519
|
+
{
|
520
|
+
dnx = dn->right;
|
521
|
+
/* now put dn onto the answer list, or free it */
|
522
|
+
if (dict_match(dn->string, s))
|
523
|
+
{
|
524
|
+
dn->right = list_new;
|
525
|
+
list_new = dn;
|
526
|
+
}
|
527
|
+
else
|
528
|
+
{
|
529
|
+
free_dict_node(dn);
|
530
|
+
}
|
531
|
+
}
|
532
|
+
|
533
|
+
/* now reverse the list back */
|
534
|
+
llist = NULL;
|
535
|
+
for (dn = list_new; dn != NULL; dn = dnx)
|
536
|
+
{
|
537
|
+
dnx = dn->right;
|
538
|
+
dn->right = llist;
|
539
|
+
llist = dn;
|
540
|
+
}
|
541
|
+
return llist;
|
542
|
+
}
|
543
|
+
|
544
|
+
void free_lookup_list(Dict_node *llist)
|
545
|
+
{
|
546
|
+
Dict_node * n;
|
547
|
+
while(llist != NULL)
|
548
|
+
{
|
549
|
+
n = llist->right;
|
550
|
+
free_dict_node(llist);
|
551
|
+
llist = n;
|
552
|
+
}
|
553
|
+
}
|
554
|
+
|
555
|
+
static void free_dict_node_recursive(Dict_node * dn)
|
556
|
+
{
|
557
|
+
if (dn == NULL) return;
|
558
|
+
free_dict_node_recursive(dn->left);
|
559
|
+
free_dict_node_recursive(dn->right);
|
560
|
+
free_dict_node(dn);
|
561
|
+
}
|
562
|
+
|
563
|
+
/* ======================================================================== */
|
564
|
+
/**
|
565
|
+
* rdictionary_lookup() -- recursive dictionary lookup
|
566
|
+
* Walk binary tree, given by 'dn', looking for the string 's'.
|
567
|
+
* For every node in the tree where 's' matches (including wildcards)
|
568
|
+
* make a copy of that node, and append it to llist.
|
569
|
+
*/
|
570
|
+
static Dict_node * rdictionary_lookup(Dict_node *llist,
|
571
|
+
Dict_node * dn, const char * s, int match_idiom)
|
572
|
+
{
|
573
|
+
/* see comment in dictionary_lookup below */
|
574
|
+
int m;
|
575
|
+
Dict_node * dn_new;
|
576
|
+
if (dn == NULL) return llist;
|
577
|
+
m = dict_order_wild(s, dn->string);
|
578
|
+
if (m >= 0)
|
579
|
+
{
|
580
|
+
llist = rdictionary_lookup(llist, dn->right, s, match_idiom);
|
581
|
+
}
|
582
|
+
if ((m == 0) && (match_idiom || !is_idiom_word(dn->string)))
|
583
|
+
{
|
584
|
+
dn_new = dict_node_new();
|
585
|
+
*dn_new = *dn;
|
586
|
+
dn_new->right = llist;
|
587
|
+
llist = dn_new;
|
588
|
+
}
|
589
|
+
if (m <= 0)
|
590
|
+
{
|
591
|
+
llist = rdictionary_lookup(llist, dn->left, s, match_idiom);
|
592
|
+
}
|
593
|
+
return llist;
|
594
|
+
}
|
595
|
+
|
596
|
+
/**
|
597
|
+
* dictionary_lookup_list() - return lookup list of words in the dictionary
|
598
|
+
*
|
599
|
+
* Returns a pointer to a lookup list of the words in the dictionary.
|
600
|
+
* Matches include word that appear in idioms. Use
|
601
|
+
* abridged_lookup_list() to obtain matches, excluding idioms.
|
602
|
+
*
|
603
|
+
* This list is made up of Dict_nodes, linked by their right pointers.
|
604
|
+
* The node, file and string fields are copied from the dictionary.
|
605
|
+
*
|
606
|
+
* The returned list must be freed with free_lookup_list().
|
607
|
+
*/
|
608
|
+
Dict_node * dictionary_lookup_list(Dictionary dict, const char *s)
|
609
|
+
{
|
610
|
+
Dict_node * llist = rdictionary_lookup(NULL, dict->root, s, TRUE);
|
611
|
+
llist = prune_lookup_list(llist, s);
|
612
|
+
return llist;
|
613
|
+
}
|
614
|
+
|
615
|
+
/**
|
616
|
+
* abridged_lookup_list() - return lookup list of words in the dictionary
|
617
|
+
*
|
618
|
+
* Returns a pointer to a lookup list of the words in the dictionary.
|
619
|
+
* Excludes any idioms that contain the word; use
|
620
|
+
* dictionary_lookup_list() to obtain the complete list.
|
621
|
+
*
|
622
|
+
* This list is made up of Dict_nodes, linked by their right pointers.
|
623
|
+
* The node, file and string fields are copied from the dictionary.
|
624
|
+
*
|
625
|
+
* The returned list must be freed with free_lookup_list().
|
626
|
+
*/
|
627
|
+
Dict_node * abridged_lookup_list(Dictionary dict, const char *s)
|
628
|
+
{
|
629
|
+
Dict_node *llist;
|
630
|
+
llist = rdictionary_lookup(NULL, dict->root, s, FALSE);
|
631
|
+
llist = prune_lookup_list(llist, s);
|
632
|
+
return llist;
|
633
|
+
}
|
634
|
+
|
635
|
+
int boolean_dictionary_lookup(Dictionary dict, const char *s)
|
636
|
+
{
|
637
|
+
Dict_node *llist = dictionary_lookup_list(dict, s);
|
638
|
+
int boool = (llist != NULL);
|
639
|
+
free_lookup_list(llist);
|
640
|
+
return boool;
|
641
|
+
}
|
642
|
+
|
643
|
+
/* ======================================================================== */
|
644
|
+
/**
|
645
|
+
* Allocate a new Exp node and link it into the exp_list for freeing later.
|
646
|
+
*/
|
647
|
+
Exp * Exp_create(Dictionary dict)
|
648
|
+
{
|
649
|
+
Exp * e;
|
650
|
+
e = (Exp *) xalloc(sizeof(Exp));
|
651
|
+
e->next = dict->exp_list;
|
652
|
+
dict->exp_list = e;
|
653
|
+
return e;
|
654
|
+
}
|
655
|
+
|
656
|
+
static inline void exp_free(Exp * e)
|
657
|
+
{
|
658
|
+
xfree((char *)e, sizeof(Exp));
|
659
|
+
}
|
660
|
+
|
661
|
+
/* ======================================================================== */
|
662
|
+
/**
|
663
|
+
* This creates a node with one child (namely e). Initializes
|
664
|
+
* the cost to zero.
|
665
|
+
*/
|
666
|
+
static Exp * make_unary_node(Dictionary dict, Exp * e)
|
667
|
+
{
|
668
|
+
Exp * n;
|
669
|
+
n = Exp_create(dict);
|
670
|
+
n->type = AND_type; /* these must be AND types */
|
671
|
+
n->cost = 0.0f;
|
672
|
+
n->u.l = (E_list *) xalloc(sizeof(E_list));
|
673
|
+
n->u.l->next = NULL;
|
674
|
+
n->u.l->e = e;
|
675
|
+
return n;
|
676
|
+
}
|
677
|
+
|
678
|
+
/**
|
679
|
+
* connector() -- make a node for a connector or dictionary word.
|
680
|
+
*
|
681
|
+
* Assumes the current token is a connector or dictionary word.
|
682
|
+
*/
|
683
|
+
static Exp * connector(Dictionary dict)
|
684
|
+
{
|
685
|
+
Exp * n;
|
686
|
+
Dict_node *dn, *dn_head;
|
687
|
+
int i;
|
688
|
+
|
689
|
+
i = strlen(dict->token) - 1; /* this must be + or - if a connector */
|
690
|
+
if ((dict->token[i] != '+') && (dict->token[i] != '-'))
|
691
|
+
{
|
692
|
+
/* If we are here, token is a word */
|
693
|
+
dn_head = abridged_lookup_list(dict, dict->token);
|
694
|
+
dn = dn_head;
|
695
|
+
while ((dn != NULL) && (strcmp(dn->string, dict->token) != 0))
|
696
|
+
{
|
697
|
+
dn = dn->right;
|
698
|
+
}
|
699
|
+
if (dn == NULL)
|
700
|
+
{
|
701
|
+
free_lookup_list(dn_head);
|
702
|
+
dict_error(dict, "\nPerhaps missing + or - in a connector.\n"
|
703
|
+
"Or perhaps you forgot the suffix on a word.\n"
|
704
|
+
"Or perhaps a word is used before it is defined.\n");
|
705
|
+
return NULL;
|
706
|
+
}
|
707
|
+
n = make_unary_node(dict, dn->exp);
|
708
|
+
free_lookup_list(dn_head);
|
709
|
+
}
|
710
|
+
else
|
711
|
+
{
|
712
|
+
/* If we are here, token is a connector */
|
713
|
+
if (!check_connector(dict, dict->token))
|
714
|
+
{
|
715
|
+
return NULL;
|
716
|
+
}
|
717
|
+
n = Exp_create(dict);
|
718
|
+
n->dir = dict->token[i];
|
719
|
+
dict->token[i] = '\0'; /* get rid of the + or - */
|
720
|
+
if (dict->token[0] == '@')
|
721
|
+
{
|
722
|
+
n->u.string = string_set_add(dict->token+1, dict->string_set);
|
723
|
+
n->multi = TRUE;
|
724
|
+
}
|
725
|
+
else
|
726
|
+
{
|
727
|
+
n->u.string = string_set_add(dict->token, dict->string_set);
|
728
|
+
n->multi = FALSE;
|
729
|
+
}
|
730
|
+
n->type = CONNECTOR_type;
|
731
|
+
n->cost = 0.0f;
|
732
|
+
}
|
733
|
+
|
734
|
+
if (!link_advance(dict))
|
735
|
+
{
|
736
|
+
exp_free(n);
|
737
|
+
return NULL;
|
738
|
+
}
|
739
|
+
return n;
|
740
|
+
}
|
741
|
+
|
742
|
+
/**
|
743
|
+
* This creates a node with zero children. Initializes
|
744
|
+
* the cost to zero.
|
745
|
+
*/
|
746
|
+
static Exp * make_zeroary_node(Dictionary dict)
|
747
|
+
{
|
748
|
+
Exp * n;
|
749
|
+
n = Exp_create(dict);
|
750
|
+
n->type = AND_type; /* these must be AND types */
|
751
|
+
n->cost = 0.0f;
|
752
|
+
n->u.l = NULL;
|
753
|
+
return n;
|
754
|
+
}
|
755
|
+
|
756
|
+
/**
|
757
|
+
* This creates an OR node with two children, one the given node,
|
758
|
+
* and the other as zeroary node. This has the effect of creating
|
759
|
+
* what used to be called an optional node.
|
760
|
+
*/
|
761
|
+
static Exp * make_optional_node(Dictionary dict, Exp * e)
|
762
|
+
{
|
763
|
+
Exp * n;
|
764
|
+
E_list *el, *elx;
|
765
|
+
n = Exp_create(dict);
|
766
|
+
n->type = OR_type;
|
767
|
+
n->cost = 0.0f;
|
768
|
+
n->u.l = el = (E_list *) xalloc(sizeof(E_list));
|
769
|
+
el->e = make_zeroary_node(dict);
|
770
|
+
el->next = elx = (E_list *) xalloc(sizeof(E_list));
|
771
|
+
elx->next = NULL;
|
772
|
+
elx->e = e;
|
773
|
+
return n;
|
774
|
+
}
|
775
|
+
|
776
|
+
/* ======================================================================== */
|
777
|
+
|
778
|
+
#if ! defined INFIX_NOTATION
|
779
|
+
|
780
|
+
Exp * expression(Dictionary dict);
|
781
|
+
/**
|
782
|
+
* We're looking at the first of the stuff after an "and" or "or".
|
783
|
+
* Build a Exp node for this expression. Set the cost and optional
|
784
|
+
* fields to the default values. Set the type field according to type
|
785
|
+
*/
|
786
|
+
Exp * operator_exp(Dictionary dict, int type)
|
787
|
+
{
|
788
|
+
Exp * n;
|
789
|
+
E_list first;
|
790
|
+
E_list * elist;
|
791
|
+
n = Exp_create(dict);
|
792
|
+
n->type = type;
|
793
|
+
n->cost = 0.0f;
|
794
|
+
elist = &first;
|
795
|
+
while((!is_equal(dict, ')')) && (!is_equal(dict, ']')) && (!is_equal(dict, '}'))) {
|
796
|
+
elist->next = (E_list *) xalloc(sizeof(E_list));
|
797
|
+
elist = elist->next;
|
798
|
+
elist->next = NULL;
|
799
|
+
elist->e = expression(dict);
|
800
|
+
if (elist->e == NULL) {
|
801
|
+
return NULL;
|
802
|
+
}
|
803
|
+
}
|
804
|
+
if (elist == &first) {
|
805
|
+
dict_error(dict, "An \"or\" or \"and\" of nothing");
|
806
|
+
return NULL;
|
807
|
+
}
|
808
|
+
n->u.l = first.next;
|
809
|
+
return n;
|
810
|
+
}
|
811
|
+
|
812
|
+
/**
|
813
|
+
* Looks for the stuff that is allowed to be inside of parentheses
|
814
|
+
* either & or | followed by a list, or a terminal symbol.
|
815
|
+
*/
|
816
|
+
Exp * in_parens(Dictionary dict)
|
817
|
+
{
|
818
|
+
Exp * e;
|
819
|
+
|
820
|
+
if (is_equal(dict, '&') || (strcmp(token, "and")==0)) {
|
821
|
+
if (!link_advance(dict)) {
|
822
|
+
return NULL;
|
823
|
+
}
|
824
|
+
return operator_exp(dict, AND_type);
|
825
|
+
} else if (is_equal(dict, '|') || (strcmp(dict->token, "or")==0)) {
|
826
|
+
if (!link_advance(dict)) {
|
827
|
+
return NULL;
|
828
|
+
}
|
829
|
+
return operator_exp(dict, OR_type);
|
830
|
+
} else {
|
831
|
+
return expression(dict);
|
832
|
+
}
|
833
|
+
}
|
834
|
+
|
835
|
+
/**
|
836
|
+
* Build (and return the root of) the tree for the expression beginning
|
837
|
+
* with the current token. At the end, the token is the first one not
|
838
|
+
* part of this expression.
|
839
|
+
*/
|
840
|
+
Exp * expression(Dictionary dict)
|
841
|
+
{
|
842
|
+
Exp * n;
|
843
|
+
if (is_equal(dict, '(')) {
|
844
|
+
if (!link_advance(dict)) {
|
845
|
+
return NULL;
|
846
|
+
}
|
847
|
+
n = in_parens(dict);
|
848
|
+
if (!is_equal(dict, ')')) {
|
849
|
+
dict_error(dict, "Expecting a \")\".");
|
850
|
+
return NULL;
|
851
|
+
}
|
852
|
+
if (!link_advance(dict)) {
|
853
|
+
return NULL;
|
854
|
+
}
|
855
|
+
} else if (is_equal(dict, '{')) {
|
856
|
+
if (!link_advance(dict)) {
|
857
|
+
return NULL;
|
858
|
+
}
|
859
|
+
n = in_parens(dict);
|
860
|
+
if (!is_equal(dict, '}')) {
|
861
|
+
dict_error(dict, "Expecting a \"}\".");
|
862
|
+
return NULL;
|
863
|
+
}
|
864
|
+
if (!link_advance(dict)) {
|
865
|
+
return NULL;
|
866
|
+
}
|
867
|
+
n = make_optional_node(dict, n);
|
868
|
+
} else if (is_equal(dict, '[')) {
|
869
|
+
if (!link_advance(dict)) {
|
870
|
+
return NULL;
|
871
|
+
}
|
872
|
+
n = in_parens(dict);
|
873
|
+
if (!is_equal(dict, ']')) {
|
874
|
+
dict_error(dict, "Expecting a \"]\".");
|
875
|
+
return NULL;
|
876
|
+
}
|
877
|
+
if (!link_advance(dict)) {
|
878
|
+
return NULL;
|
879
|
+
}
|
880
|
+
n->cost += 1.0f;
|
881
|
+
} else if (!dict->is_special) {
|
882
|
+
n = connector(dict);
|
883
|
+
if (n == NULL) {
|
884
|
+
return NULL;
|
885
|
+
}
|
886
|
+
} else if (is_equal(dict, ')') || is_equal(dict, ']')) {
|
887
|
+
/* allows "()" or "[]" */
|
888
|
+
n = make_zeroary_node(dict);
|
889
|
+
} else {
|
890
|
+
dict_error(dict, "Connector, \"(\", \"[\", or \"{\" expected.");
|
891
|
+
return NULL;
|
892
|
+
}
|
893
|
+
return n;
|
894
|
+
}
|
895
|
+
|
896
|
+
/* ======================================================================== */
|
897
|
+
#else /* This is for infix notation */
|
898
|
+
|
899
|
+
static Exp * restricted_expression(Dictionary dict, int and_ok, int or_ok);
|
900
|
+
|
901
|
+
/**
|
902
|
+
* Build (and return the root of) the tree for the expression beginning
|
903
|
+
* with the current token. At the end, the token is the first one not
|
904
|
+
* part of this expression.
|
905
|
+
*/
|
906
|
+
static Exp * expression(Dictionary dict)
|
907
|
+
{
|
908
|
+
return restricted_expression(dict, TRUE, TRUE);
|
909
|
+
}
|
910
|
+
|
911
|
+
static Exp * restricted_expression(Dictionary dict, int and_ok, int or_ok)
|
912
|
+
{
|
913
|
+
Exp *nl = NULL, *nr;
|
914
|
+
E_list *ell, *elr;
|
915
|
+
|
916
|
+
if (is_equal(dict, '('))
|
917
|
+
{
|
918
|
+
if (!link_advance(dict)) {
|
919
|
+
return NULL;
|
920
|
+
}
|
921
|
+
nl = expression(dict);
|
922
|
+
if (nl == NULL) {
|
923
|
+
return NULL;
|
924
|
+
}
|
925
|
+
if (!is_equal(dict, ')')) {
|
926
|
+
dict_error(dict, "Expecting a \")\".");
|
927
|
+
return NULL;
|
928
|
+
}
|
929
|
+
if (!link_advance(dict)) {
|
930
|
+
return NULL;
|
931
|
+
}
|
932
|
+
}
|
933
|
+
else if (is_equal(dict, '{'))
|
934
|
+
{
|
935
|
+
if (!link_advance(dict)) {
|
936
|
+
return NULL;
|
937
|
+
}
|
938
|
+
nl = expression(dict);
|
939
|
+
if (nl == NULL) {
|
940
|
+
return NULL;
|
941
|
+
}
|
942
|
+
if (!is_equal(dict, '}')) {
|
943
|
+
dict_error(dict, "Expecting a \"}\".");
|
944
|
+
return NULL;
|
945
|
+
}
|
946
|
+
if (!link_advance(dict)) {
|
947
|
+
return NULL;
|
948
|
+
}
|
949
|
+
nl = make_optional_node(dict, nl);
|
950
|
+
}
|
951
|
+
else if (is_equal(dict, '['))
|
952
|
+
{
|
953
|
+
if (!link_advance(dict)) {
|
954
|
+
return NULL;
|
955
|
+
}
|
956
|
+
nl = expression(dict);
|
957
|
+
if (nl == NULL) {
|
958
|
+
return NULL;
|
959
|
+
}
|
960
|
+
if (!is_equal(dict, ']')) {
|
961
|
+
dict_error(dict, "Expecting a \"]\".");
|
962
|
+
return NULL;
|
963
|
+
}
|
964
|
+
if (!link_advance(dict)) {
|
965
|
+
return NULL;
|
966
|
+
}
|
967
|
+
nl->cost += 1.0f;
|
968
|
+
}
|
969
|
+
else if (!dict->is_special)
|
970
|
+
{
|
971
|
+
nl = connector(dict);
|
972
|
+
if (nl == NULL) {
|
973
|
+
return NULL;
|
974
|
+
}
|
975
|
+
}
|
976
|
+
else if (is_equal(dict, ')') || is_equal(dict, ']'))
|
977
|
+
{
|
978
|
+
/* allows "()" or "[]" */
|
979
|
+
nl = make_zeroary_node(dict);
|
980
|
+
}
|
981
|
+
else
|
982
|
+
{
|
983
|
+
dict_error(dict, "Connector, \"(\", \"[\", or \"{\" expected.");
|
984
|
+
return NULL;
|
985
|
+
}
|
986
|
+
|
987
|
+
if (is_equal(dict, '&') || (strcmp(dict->token, "and") == 0))
|
988
|
+
{
|
989
|
+
Exp *n;
|
990
|
+
|
991
|
+
if (!and_ok) {
|
992
|
+
warning(dict, "\"and\" and \"or\" at the same level in an expression");
|
993
|
+
}
|
994
|
+
if (!link_advance(dict)) {
|
995
|
+
return NULL;
|
996
|
+
}
|
997
|
+
nr = restricted_expression(dict, TRUE, FALSE);
|
998
|
+
if (nr == NULL) {
|
999
|
+
return NULL;
|
1000
|
+
}
|
1001
|
+
n = Exp_create(dict);
|
1002
|
+
n->u.l = ell = (E_list *) xalloc(sizeof(E_list));
|
1003
|
+
ell->next = elr = (E_list *) xalloc(sizeof(E_list));
|
1004
|
+
elr->next = NULL;
|
1005
|
+
|
1006
|
+
ell->e = nl;
|
1007
|
+
elr->e = nr;
|
1008
|
+
n->type = AND_type;
|
1009
|
+
n->cost = 0.0f;
|
1010
|
+
return n;
|
1011
|
+
}
|
1012
|
+
else if (is_equal(dict, '|') || (strcmp(dict->token, "or") == 0))
|
1013
|
+
{
|
1014
|
+
Exp *n;
|
1015
|
+
|
1016
|
+
if (!or_ok) {
|
1017
|
+
warning(dict, "\"and\" and \"or\" at the same level in an expression");
|
1018
|
+
}
|
1019
|
+
if (!link_advance(dict)) {
|
1020
|
+
return NULL;
|
1021
|
+
}
|
1022
|
+
nr = restricted_expression(dict, FALSE,TRUE);
|
1023
|
+
if (nr == NULL) {
|
1024
|
+
return NULL;
|
1025
|
+
}
|
1026
|
+
n = Exp_create(dict);
|
1027
|
+
n->u.l = ell = (E_list *) xalloc(sizeof(E_list));
|
1028
|
+
ell->next = elr = (E_list *) xalloc(sizeof(E_list));
|
1029
|
+
elr->next = NULL;
|
1030
|
+
|
1031
|
+
ell->e = nl;
|
1032
|
+
elr->e = nr;
|
1033
|
+
n->type = OR_type;
|
1034
|
+
n->cost = 0.0f;
|
1035
|
+
return n;
|
1036
|
+
}
|
1037
|
+
|
1038
|
+
return nl;
|
1039
|
+
}
|
1040
|
+
|
1041
|
+
#endif
|
1042
|
+
|
1043
|
+
/* ======================================================================== */
|
1044
|
+
/* Tree balancing utilities, used to implement an AVL tree.
|
1045
|
+
* Unfortunately, AVL tree insertion is very slowww, unusably
|
1046
|
+
* slow for creating the dictionary. The code is thus ifdef'ed out
|
1047
|
+
* but is left here for debugging and other sundry purposes.
|
1048
|
+
* A better way to rebalance the tree is the DSW algo, implemented
|
1049
|
+
* further below.
|
1050
|
+
*/
|
1051
|
+
|
1052
|
+
static Dict_node *rotate_right(Dict_node *root)
|
1053
|
+
{
|
1054
|
+
Dict_node *pivot = root->left;
|
1055
|
+
root->left = pivot->right;
|
1056
|
+
pivot->right = root;
|
1057
|
+
return pivot;
|
1058
|
+
}
|
1059
|
+
|
1060
|
+
#ifdef USE_AVL_TREE_FOR_INSERTION
|
1061
|
+
|
1062
|
+
static Dict_node *rotate_left(Dict_node *root)
|
1063
|
+
{
|
1064
|
+
Dict_node *pivot = root->right;
|
1065
|
+
root->right = pivot->left;
|
1066
|
+
pivot->left = root;
|
1067
|
+
return pivot;
|
1068
|
+
}
|
1069
|
+
|
1070
|
+
/* Return tree height. XXX this is not tail-recursive! */
|
1071
|
+
static int tree_depth (Dict_node *n)
|
1072
|
+
{
|
1073
|
+
int l, r;
|
1074
|
+
if (NULL == n) return 0;
|
1075
|
+
if (NULL == n->left) return 1+tree_depth(n->right);
|
1076
|
+
if (NULL == n->right) return 1+tree_depth(n->left);
|
1077
|
+
l = tree_depth(n->left);
|
1078
|
+
r = tree_depth(n->right);
|
1079
|
+
if (l < r) return r+1;
|
1080
|
+
return l+1;
|
1081
|
+
}
|
1082
|
+
|
1083
|
+
static int tree_balance(Dict_node *n)
|
1084
|
+
{
|
1085
|
+
int l = tree_depth(n->left);
|
1086
|
+
int r = tree_depth(n->right);
|
1087
|
+
return r-l;
|
1088
|
+
}
|
1089
|
+
|
1090
|
+
/**
|
1091
|
+
* Rebalance the dictionary tree.
|
1092
|
+
* This recomputes the tree depth wayy too often, but so what.. this
|
1093
|
+
* only wastes cpu time during the initial dictinary read.
|
1094
|
+
*/
|
1095
|
+
static Dict_node *rebalance(Dict_node *root)
|
1096
|
+
{
|
1097
|
+
int bal = tree_balance(root);
|
1098
|
+
if (2 == bal)
|
1099
|
+
{
|
1100
|
+
bal = tree_balance(root->right);
|
1101
|
+
if (-1 == bal)
|
1102
|
+
{
|
1103
|
+
root->right = rotate_right (root->right);
|
1104
|
+
}
|
1105
|
+
return rotate_left(root);
|
1106
|
+
}
|
1107
|
+
else if (-2 == bal)
|
1108
|
+
{
|
1109
|
+
bal = tree_balance(root->left);
|
1110
|
+
if (1 == bal)
|
1111
|
+
{
|
1112
|
+
root->left = rotate_left (root->left);
|
1113
|
+
}
|
1114
|
+
return rotate_right(root);
|
1115
|
+
}
|
1116
|
+
return root;
|
1117
|
+
}
|
1118
|
+
|
1119
|
+
#endif /* USE_AVL_TREE_FOR_INSERTION */
|
1120
|
+
|
1121
|
+
/* ======================================================================== */
|
1122
|
+
/* Implementation of the DSW algo for rebalancing a binary tree.
|
1123
|
+
* The point is -- after building the dictionary tree, we rebalance it
|
1124
|
+
* once at the end. This is a **LOT LOT** quicker than maintaing an
|
1125
|
+
* AVL tree along the way (less than quarter-of-a-second vs. about
|
1126
|
+
* a minute or more!) FWIW, the DSW tree is even more balanced than
|
1127
|
+
* the AVL tree is (its less deep, more full).
|
1128
|
+
*
|
1129
|
+
* The DSW algo, with C++ code, is described in
|
1130
|
+
*
|
1131
|
+
* Timothy J. Rolfe, "One-Time Binary Search Tree Balancing:
|
1132
|
+
* The Day/Stout/Warren (DSW) Algorithm", inroads, Vol. 34, No. 4
|
1133
|
+
* (December 2002), pp. 85-88
|
1134
|
+
* http://penguin.ewu.edu/~trolfe/DSWpaper/
|
1135
|
+
*/
|
1136
|
+
|
1137
|
+
static Dict_node * dsw_tree_to_vine (Dict_node *root)
|
1138
|
+
{
|
1139
|
+
Dict_node *vine_tail, *vine_head, *rest;
|
1140
|
+
Dict_node vh;
|
1141
|
+
|
1142
|
+
vine_head = &vh;
|
1143
|
+
vine_head->left = NULL;
|
1144
|
+
vine_head->right = root;
|
1145
|
+
vine_tail = vine_head;
|
1146
|
+
rest = root;
|
1147
|
+
|
1148
|
+
while (NULL != rest)
|
1149
|
+
{
|
1150
|
+
/* If no left, we are done, do the right */
|
1151
|
+
if (NULL == rest->left)
|
1152
|
+
{
|
1153
|
+
vine_tail = rest;
|
1154
|
+
rest = rest->right;
|
1155
|
+
}
|
1156
|
+
/* eliminate the left subtree */
|
1157
|
+
else
|
1158
|
+
{
|
1159
|
+
rest = rotate_right(rest);
|
1160
|
+
vine_tail->right = rest;
|
1161
|
+
}
|
1162
|
+
}
|
1163
|
+
|
1164
|
+
return vh.right;
|
1165
|
+
}
|
1166
|
+
|
1167
|
+
static void dsw_compression (Dict_node *root, unsigned int count)
|
1168
|
+
{
|
1169
|
+
unsigned int j;
|
1170
|
+
for (j = 0; j < count; j++)
|
1171
|
+
{
|
1172
|
+
/* Compound left rotation */
|
1173
|
+
Dict_node * pivot = root->right;
|
1174
|
+
root->right = pivot->right;
|
1175
|
+
root = pivot->right;
|
1176
|
+
pivot->right = root->left;
|
1177
|
+
root->left = pivot;
|
1178
|
+
}
|
1179
|
+
}
|
1180
|
+
|
1181
|
+
/* Return size of the full portion of the tree
|
1182
|
+
* Gets the next pow(2,k)-1
|
1183
|
+
*/
|
1184
|
+
static inline unsigned int full_tree_size (unsigned int size)
|
1185
|
+
{
|
1186
|
+
unsigned int pk = 1;
|
1187
|
+
while (pk < size) pk = 2*pk + 1;
|
1188
|
+
return pk/2;
|
1189
|
+
}
|
1190
|
+
|
1191
|
+
static Dict_node * dsw_vine_to_tree (Dict_node *root, int size)
|
1192
|
+
{
|
1193
|
+
Dict_node vine_head;
|
1194
|
+
unsigned int full_count = full_tree_size(size +1);
|
1195
|
+
|
1196
|
+
vine_head.left = NULL;
|
1197
|
+
vine_head.right = root;
|
1198
|
+
|
1199
|
+
dsw_compression(&vine_head, size - full_count);
|
1200
|
+
for (size = full_count ; size > 1 ; size /= 2)
|
1201
|
+
{
|
1202
|
+
dsw_compression(&vine_head, size / 2);
|
1203
|
+
}
|
1204
|
+
return vine_head.right;
|
1205
|
+
}
|
1206
|
+
|
1207
|
+
/* ======================================================================== */
|
1208
|
+
/**
|
1209
|
+
* Insert the new node into the dictionary below node n.
|
1210
|
+
* Give error message if the new element's string is already there.
|
1211
|
+
* Assumes that the "n" field of new is already set, and the left
|
1212
|
+
* and right fields of it are NULL.
|
1213
|
+
*
|
1214
|
+
* The resulting tree is highly unbalanced. It needs to be rebalanced
|
1215
|
+
* before used.
|
1216
|
+
*/
|
1217
|
+
Dict_node * insert_dict(Dictionary dict, Dict_node * n, Dict_node * newnode)
|
1218
|
+
{
|
1219
|
+
int comp;
|
1220
|
+
|
1221
|
+
if (NULL == n) return newnode;
|
1222
|
+
|
1223
|
+
comp = dict_order(newnode->string, n->string);
|
1224
|
+
if (comp < 0)
|
1225
|
+
{
|
1226
|
+
if (NULL == n->left)
|
1227
|
+
{
|
1228
|
+
n->left = newnode;
|
1229
|
+
return n;
|
1230
|
+
}
|
1231
|
+
n->left = insert_dict(dict, n->left, newnode);
|
1232
|
+
return n;
|
1233
|
+
/* return rebalance(n); Uncomment to get an AVL tree */
|
1234
|
+
}
|
1235
|
+
else if (comp > 0)
|
1236
|
+
{
|
1237
|
+
if (NULL == n->right)
|
1238
|
+
{
|
1239
|
+
n->right = newnode;
|
1240
|
+
return n;
|
1241
|
+
}
|
1242
|
+
n->right = insert_dict(dict, n->right, newnode);
|
1243
|
+
return n;
|
1244
|
+
/* return rebalance(n); Uncomment to get an AVL tree */
|
1245
|
+
}
|
1246
|
+
else
|
1247
|
+
{
|
1248
|
+
char t[256];
|
1249
|
+
snprintf(t, 256, "The word \"%s\" has been multiply defined\n", newnode->string);
|
1250
|
+
dict_error(dict, t);
|
1251
|
+
return NULL;
|
1252
|
+
}
|
1253
|
+
}
|
1254
|
+
|
1255
|
+
/**
|
1256
|
+
* insert_list() -
|
1257
|
+
* p points to a list of dict_nodes connected by their left pointers.
|
1258
|
+
* l is the length of this list (the last ptr may not be NULL).
|
1259
|
+
* It inserts the list into the dictionary.
|
1260
|
+
* It does the middle one first, then the left half, then the right.
|
1261
|
+
*
|
1262
|
+
* Note: I think this insert middle, then left, then right, has
|
1263
|
+
* its origins as a lame attempt to hack around the fact that the
|
1264
|
+
* resulting binary tree is rather badly unbalanced. This has been
|
1265
|
+
* fixed by using the DSW rebalancing algo. Now, that would seem
|
1266
|
+
* to render this crazy bisected-insertion algo obsoloete, but ..
|
1267
|
+
* oddly enough, it seems to make the DSW balancing go really fast!
|
1268
|
+
* Faster than a simple insertion. Go figure. I think this has
|
1269
|
+
* something to do with the fact that the dictionaries are in
|
1270
|
+
* alphabetical order! This subdivision helps randomize a bit.
|
1271
|
+
*/
|
1272
|
+
static void insert_list(Dictionary dict, Dict_node * p, int l)
|
1273
|
+
{
|
1274
|
+
Dict_node * dn, *dn_head, *dn_second_half;
|
1275
|
+
int k, i; /* length of first half */
|
1276
|
+
|
1277
|
+
if (l == 0) return;
|
1278
|
+
|
1279
|
+
k = (l-1)/2;
|
1280
|
+
dn = p;
|
1281
|
+
for (i = 0; i < k; i++)
|
1282
|
+
{
|
1283
|
+
dn = dn->left;
|
1284
|
+
}
|
1285
|
+
|
1286
|
+
/* dn now points to the middle element */
|
1287
|
+
dn_second_half = dn->left;
|
1288
|
+
dn->left = dn->right = NULL;
|
1289
|
+
|
1290
|
+
if (contains_underbar(dn->string))
|
1291
|
+
{
|
1292
|
+
insert_idiom(dict, dn);
|
1293
|
+
}
|
1294
|
+
else if (is_idiom_word(dn->string))
|
1295
|
+
{
|
1296
|
+
err_ctxt ec;
|
1297
|
+
ec.sent = NULL;
|
1298
|
+
err_msg(&ec, Warn, "Warning: Word \"%s\" found near line %d.\n"
|
1299
|
+
"\tWords ending \".Ix\" (x a number) are reserved for idioms.\n"
|
1300
|
+
"\tThis word will be ignored.\n",
|
1301
|
+
dn->string, dict->line_number);
|
1302
|
+
free_dict_node(dn);
|
1303
|
+
}
|
1304
|
+
else if ((dn_head = abridged_lookup_list(dict, dn->string)) != NULL)
|
1305
|
+
{
|
1306
|
+
Dict_node *dnx;
|
1307
|
+
err_ctxt ec;
|
1308
|
+
ec.sent = NULL;
|
1309
|
+
err_msg(&ec, Warn, "Warning: The word \"%s\" "
|
1310
|
+
"found near line %d of %s matches the following words:\n",
|
1311
|
+
dn->string, dict->line_number, dict->name);
|
1312
|
+
for (dnx = dn_head; dnx != NULL; dnx = dnx->right) {
|
1313
|
+
fprintf(stderr, "\t%s", dnx->string);
|
1314
|
+
}
|
1315
|
+
fprintf(stderr, "\n\tThis word will be ignored.\n");
|
1316
|
+
free_lookup_list(dn_head);
|
1317
|
+
free_dict_node(dn);
|
1318
|
+
}
|
1319
|
+
else
|
1320
|
+
{
|
1321
|
+
dict->root = insert_dict(dict, dict->root, dn);
|
1322
|
+
dict->num_entries++;
|
1323
|
+
}
|
1324
|
+
|
1325
|
+
insert_list(dict, p, k);
|
1326
|
+
insert_list(dict, dn_second_half, l-k-1);
|
1327
|
+
}
|
1328
|
+
|
1329
|
+
/**
|
1330
|
+
* read_entry() -- read one dictionary entry
|
1331
|
+
* Starting with the current token parse one dictionary entry.
|
1332
|
+
* Add these words to the dictionary.
|
1333
|
+
*/
|
1334
|
+
static int read_entry(Dictionary dict)
|
1335
|
+
{
|
1336
|
+
Exp *n;
|
1337
|
+
int i;
|
1338
|
+
|
1339
|
+
Dict_node *dn_new, *dnx, *dn = NULL;
|
1340
|
+
|
1341
|
+
/* Reset multi-byte shift state every line. */
|
1342
|
+
memset(&dict->mbss, 0, sizeof(dict->mbss));
|
1343
|
+
|
1344
|
+
while (!is_equal(dict, ':'))
|
1345
|
+
{
|
1346
|
+
if (dict->is_special)
|
1347
|
+
{
|
1348
|
+
dict_error(dict, "I expected a word but didn\'t get it.");
|
1349
|
+
return 0;
|
1350
|
+
}
|
1351
|
+
|
1352
|
+
/* if it's a word-file name */
|
1353
|
+
/* However, be careful to reject "/.v" which is the division symbol
|
1354
|
+
* used in equations (.v means verb-like) */
|
1355
|
+
if ((dict->token[0] == '/') && (dict->token[1] != '.'))
|
1356
|
+
{
|
1357
|
+
dn = read_word_file(dict, dn, dict->token);
|
1358
|
+
if (dn == NULL)
|
1359
|
+
{
|
1360
|
+
err_ctxt ec;
|
1361
|
+
ec.sent = NULL;
|
1362
|
+
err_msg(&ec, Error, "Error opening word file %s\n", dict->token);
|
1363
|
+
return 0;
|
1364
|
+
}
|
1365
|
+
}
|
1366
|
+
else
|
1367
|
+
{
|
1368
|
+
dn_new = dict_node_new();
|
1369
|
+
dn_new->left = dn;
|
1370
|
+
dn = dn_new;
|
1371
|
+
dn->file = NULL;
|
1372
|
+
dn->string = string_set_add(dict->token, dict->string_set);
|
1373
|
+
}
|
1374
|
+
|
1375
|
+
/* Advance to next entry, unless error */
|
1376
|
+
if (0 == link_advance(dict)) goto syntax_error;
|
1377
|
+
}
|
1378
|
+
|
1379
|
+
/* pass the : */
|
1380
|
+
if (!link_advance(dict))
|
1381
|
+
{
|
1382
|
+
goto syntax_error;
|
1383
|
+
}
|
1384
|
+
|
1385
|
+
n = expression(dict);
|
1386
|
+
if (n == NULL)
|
1387
|
+
{
|
1388
|
+
goto syntax_error;
|
1389
|
+
}
|
1390
|
+
|
1391
|
+
if (!is_equal(dict, ';'))
|
1392
|
+
{
|
1393
|
+
dict_error(dict, "Expecting \";\" at the end of an entry.");
|
1394
|
+
goto syntax_error;
|
1395
|
+
}
|
1396
|
+
|
1397
|
+
/* pass the ; */
|
1398
|
+
if (!link_advance(dict))
|
1399
|
+
{
|
1400
|
+
goto syntax_error;
|
1401
|
+
}
|
1402
|
+
|
1403
|
+
/* At this point, dn points to a list of Dict_nodes connected by
|
1404
|
+
* their left pointers. These are to be inserted into the dictionary */
|
1405
|
+
i = 0;
|
1406
|
+
for (dnx = dn; dnx != NULL; dnx = dnx->left)
|
1407
|
+
{
|
1408
|
+
dnx->exp = n;
|
1409
|
+
i++;
|
1410
|
+
}
|
1411
|
+
insert_list(dict, dn, i);
|
1412
|
+
return 1;
|
1413
|
+
|
1414
|
+
syntax_error:
|
1415
|
+
free_lookup_list(dn);
|
1416
|
+
return 0;
|
1417
|
+
}
|
1418
|
+
|
1419
|
+
#if ! defined INFIX_NOTATION
|
1420
|
+
/**
|
1421
|
+
* print the expression, in prefix-style
|
1422
|
+
*/
|
1423
|
+
void print_expression(Exp * n)
|
1424
|
+
{
|
1425
|
+
E_list * el;
|
1426
|
+
int i, icost;
|
1427
|
+
|
1428
|
+
if (n == NULL)
|
1429
|
+
{
|
1430
|
+
printf("NULL expression");
|
1431
|
+
return;
|
1432
|
+
}
|
1433
|
+
|
1434
|
+
icost = (int) (n->cost);
|
1435
|
+
if (n->type == CONNECTOR_type)
|
1436
|
+
{
|
1437
|
+
for (i=0; i<icost; i++) printf("[");
|
1438
|
+
if (n->multi) printf("@");
|
1439
|
+
printf("%s%c",n->u.string, n->dir);
|
1440
|
+
for (i=0; i<icost; i++) printf("]");
|
1441
|
+
if (icost > 0) printf(" ");
|
1442
|
+
}
|
1443
|
+
else
|
1444
|
+
{
|
1445
|
+
for (i=0; i<icost; i++) printf("[");
|
1446
|
+
if (icost == 0) printf("(");
|
1447
|
+
if (n->type == AND_type) printf("& ");
|
1448
|
+
if (n->type == OR_type) printf("or ");
|
1449
|
+
for (el = n->u.l; el != NULL; el = el->next)
|
1450
|
+
{
|
1451
|
+
print_expression(el->e);
|
1452
|
+
}
|
1453
|
+
for (i=0; i<icost; i++) printf("]");
|
1454
|
+
if (icost > 0) printf(" ");
|
1455
|
+
if (icost == 0) printf(") ");
|
1456
|
+
}
|
1457
|
+
}
|
1458
|
+
|
1459
|
+
#else /* INFIX_NOTATION */
|
1460
|
+
|
1461
|
+
/**
|
1462
|
+
* print the expression, in infix-style
|
1463
|
+
*/
|
1464
|
+
static void print_expression_parens(Exp * n, int need_parens)
|
1465
|
+
{
|
1466
|
+
E_list * el;
|
1467
|
+
int i, icost;
|
1468
|
+
|
1469
|
+
if (n == NULL)
|
1470
|
+
{
|
1471
|
+
printf("NULL expression");
|
1472
|
+
return;
|
1473
|
+
}
|
1474
|
+
|
1475
|
+
icost = (int) (n->cost);
|
1476
|
+
/* print the connector only */
|
1477
|
+
if (n->type == CONNECTOR_type)
|
1478
|
+
{
|
1479
|
+
for (i=0; i<icost; i++) printf("[");
|
1480
|
+
if (n->multi) printf("@");
|
1481
|
+
printf("%s%c",n->u.string, n->dir);
|
1482
|
+
for (i=0; i<icost; i++) printf("]");
|
1483
|
+
return;
|
1484
|
+
}
|
1485
|
+
|
1486
|
+
/* Look for optional, and print only that */
|
1487
|
+
el = n->u.l;
|
1488
|
+
if (el == NULL)
|
1489
|
+
{
|
1490
|
+
for (i=0; i<icost; i++) printf("[");
|
1491
|
+
printf ("()");
|
1492
|
+
for (i=0; i<icost; i++) printf("]");
|
1493
|
+
return;
|
1494
|
+
}
|
1495
|
+
|
1496
|
+
for (i=0; i<icost; i++) printf("[");
|
1497
|
+
if ((n->type == OR_type) &&
|
1498
|
+
el && el->e && (NULL == el->e->u.l))
|
1499
|
+
{
|
1500
|
+
printf ("{");
|
1501
|
+
print_expression_parens(el->next->e, FALSE);
|
1502
|
+
printf ("}");
|
1503
|
+
return;
|
1504
|
+
}
|
1505
|
+
|
1506
|
+
if ((icost == 0) && need_parens) printf("(");
|
1507
|
+
|
1508
|
+
/* print left side of binary expr */
|
1509
|
+
print_expression_parens(el->e, TRUE);
|
1510
|
+
|
1511
|
+
/* get a funny "and optional" when its a named expression thing. */
|
1512
|
+
if ((n->type == AND_type) && (el->next == NULL))
|
1513
|
+
{
|
1514
|
+
return;
|
1515
|
+
}
|
1516
|
+
|
1517
|
+
if (n->type == AND_type) printf(" & ");
|
1518
|
+
if (n->type == OR_type) printf(" or ");
|
1519
|
+
|
1520
|
+
/* print right side of binary expr */
|
1521
|
+
el = el->next;
|
1522
|
+
if (el == NULL)
|
1523
|
+
{
|
1524
|
+
printf ("()");
|
1525
|
+
}
|
1526
|
+
else
|
1527
|
+
{
|
1528
|
+
if (el->e->type == n->type)
|
1529
|
+
{
|
1530
|
+
print_expression_parens(el->e, FALSE);
|
1531
|
+
}
|
1532
|
+
else
|
1533
|
+
{
|
1534
|
+
print_expression_parens(el->e, TRUE);
|
1535
|
+
}
|
1536
|
+
if (el->next != NULL)
|
1537
|
+
printf ("\nERROR! Unexpected list!\n");
|
1538
|
+
}
|
1539
|
+
|
1540
|
+
for (i=0; i<icost; i++) printf("]");
|
1541
|
+
if ((icost == 0) && need_parens) printf(")");
|
1542
|
+
}
|
1543
|
+
|
1544
|
+
void print_expression(Exp * n)
|
1545
|
+
{
|
1546
|
+
print_expression_parens(n, FALSE);
|
1547
|
+
printf("\n");
|
1548
|
+
}
|
1549
|
+
#endif /* INFIX_NOTATION */
|
1550
|
+
|
1551
|
+
static void rprint_dictionary_data(Dict_node * n)
|
1552
|
+
{
|
1553
|
+
if (n == NULL) return;
|
1554
|
+
rprint_dictionary_data(n->left);
|
1555
|
+
printf("%s: ", n->string);
|
1556
|
+
print_expression(n->exp);
|
1557
|
+
printf("\n");
|
1558
|
+
rprint_dictionary_data(n->right);
|
1559
|
+
}
|
1560
|
+
|
1561
|
+
/**
|
1562
|
+
* Dump the entire contents of the dictionary
|
1563
|
+
* XXX This is not currently called by anything, but is a "good thing
|
1564
|
+
* to keep around".
|
1565
|
+
*/
|
1566
|
+
void print_dictionary_data(Dictionary dict)
|
1567
|
+
{
|
1568
|
+
rprint_dictionary_data(dict->root);
|
1569
|
+
}
|
1570
|
+
|
1571
|
+
int read_dictionary(Dictionary dict)
|
1572
|
+
{
|
1573
|
+
if (!link_advance(dict))
|
1574
|
+
{
|
1575
|
+
return 0;
|
1576
|
+
}
|
1577
|
+
while (dict->token[0] != '\0')
|
1578
|
+
{
|
1579
|
+
if (!read_entry(dict))
|
1580
|
+
{
|
1581
|
+
return 0;
|
1582
|
+
}
|
1583
|
+
}
|
1584
|
+
dict->root = dsw_tree_to_vine(dict->root);
|
1585
|
+
dict->root = dsw_vine_to_tree(dict->root, dict->num_entries);
|
1586
|
+
return 1;
|
1587
|
+
}
|
1588
|
+
|
1589
|
+
/* ======================================================================= */
|
1590
|
+
/* the following functions are for handling deletion */
|
1591
|
+
/**
|
1592
|
+
* Returns true if it finds a non-idiom dict_node in a file that matches
|
1593
|
+
* the string s.
|
1594
|
+
*
|
1595
|
+
** note: this now DOES include non-file words in its search.
|
1596
|
+
*
|
1597
|
+
* Also sets parent and to_be_deleted appropriately.
|
1598
|
+
*/
|
1599
|
+
static int find_one_non_idiom_node(Dict_node * p, Dict_node * dn,
|
1600
|
+
const char * s,
|
1601
|
+
Dict_node **parent, Dict_node **to_be_deleted)
|
1602
|
+
{
|
1603
|
+
int m;
|
1604
|
+
if (dn == NULL) return FALSE;
|
1605
|
+
m = dict_order_wild(s, dn->string);
|
1606
|
+
if (m <= 0) {
|
1607
|
+
if (find_one_non_idiom_node(dn,dn->left, s, parent, to_be_deleted)) return TRUE;
|
1608
|
+
}
|
1609
|
+
/* if ((m == 0) && (!is_idiom_word(dn->string)) && (dn->file != NULL)) { */
|
1610
|
+
if ((m == 0) && (!is_idiom_word(dn->string))) {
|
1611
|
+
*to_be_deleted = dn;
|
1612
|
+
*parent = p;
|
1613
|
+
return TRUE;
|
1614
|
+
}
|
1615
|
+
if (m >= 0) {
|
1616
|
+
if (find_one_non_idiom_node(dn,dn->right, s, parent, to_be_deleted)) return TRUE;
|
1617
|
+
}
|
1618
|
+
return FALSE;
|
1619
|
+
}
|
1620
|
+
|
1621
|
+
static void set_parent_of_node(Dictionary dict,
|
1622
|
+
Dict_node *p,
|
1623
|
+
Dict_node * del,
|
1624
|
+
Dict_node * newnode)
|
1625
|
+
{
|
1626
|
+
if (p == NULL) {
|
1627
|
+
dict->root = newnode;
|
1628
|
+
} else {
|
1629
|
+
if (p->left == del) {
|
1630
|
+
p->left = newnode;
|
1631
|
+
} else if (p->right == del) {
|
1632
|
+
p->right = newnode;
|
1633
|
+
} else {
|
1634
|
+
assert(FALSE, "Dictionary broken?");
|
1635
|
+
}
|
1636
|
+
}
|
1637
|
+
}
|
1638
|
+
|
1639
|
+
/**
|
1640
|
+
* This deletes all the non-idiom words of the dictionary that match
|
1641
|
+
* the given string. Returns TRUE if some deleted, FALSE otherwise.
|
1642
|
+
*
|
1643
|
+
* XXX Note: this function is not currently used anywhere in the code,
|
1644
|
+
* but it could be useful for general dictionary editing.
|
1645
|
+
*/
|
1646
|
+
int delete_dictionary_words(Dictionary dict, const char * s)
|
1647
|
+
{
|
1648
|
+
Dict_node *pred, *pred_parent;
|
1649
|
+
Dict_node *parent, *to_be_deleted;
|
1650
|
+
|
1651
|
+
if (!find_one_non_idiom_node(NULL, dict->root, s, &parent, &to_be_deleted)) return FALSE;
|
1652
|
+
for(;;) {
|
1653
|
+
/* now parent and to_be_deleted are set */
|
1654
|
+
if (to_be_deleted->file != NULL) {
|
1655
|
+
to_be_deleted->file->changed = TRUE;
|
1656
|
+
}
|
1657
|
+
if (to_be_deleted->left == NULL) {
|
1658
|
+
set_parent_of_node(dict, parent, to_be_deleted, to_be_deleted->right);
|
1659
|
+
free_dict_node(to_be_deleted);
|
1660
|
+
} else {
|
1661
|
+
pred_parent = to_be_deleted;
|
1662
|
+
pred = to_be_deleted->left;
|
1663
|
+
while(pred->right != NULL) {
|
1664
|
+
pred_parent = pred;
|
1665
|
+
pred = pred->right;
|
1666
|
+
}
|
1667
|
+
to_be_deleted->string = pred->string;
|
1668
|
+
to_be_deleted->file = pred->file;
|
1669
|
+
to_be_deleted->exp = pred->exp;
|
1670
|
+
set_parent_of_node(dict, pred_parent, pred, pred->left);
|
1671
|
+
free_dict_node(pred);
|
1672
|
+
}
|
1673
|
+
if (!find_one_non_idiom_node(NULL, dict->root, s, &parent, &to_be_deleted)) return TRUE;
|
1674
|
+
}
|
1675
|
+
}
|
1676
|
+
|
1677
|
+
static void free_Word_file(Word_file * wf)
|
1678
|
+
{
|
1679
|
+
Word_file *wf1;
|
1680
|
+
|
1681
|
+
for (;wf != NULL; wf = wf1) {
|
1682
|
+
wf1 = wf->next;
|
1683
|
+
xfree((char *) wf, sizeof(Word_file));
|
1684
|
+
}
|
1685
|
+
}
|
1686
|
+
|
1687
|
+
/**
|
1688
|
+
* The following two functions free the Exp s and the
|
1689
|
+
* E_lists of the dictionary. Not to be confused with
|
1690
|
+
* free_E_list in utilities.c
|
1691
|
+
*/
|
1692
|
+
static void free_Elist(E_list * l)
|
1693
|
+
{
|
1694
|
+
E_list * l1;
|
1695
|
+
|
1696
|
+
for (; l != NULL; l = l1) {
|
1697
|
+
l1 = l->next;
|
1698
|
+
xfree(l, sizeof(E_list));
|
1699
|
+
}
|
1700
|
+
}
|
1701
|
+
|
1702
|
+
static void free_Exp_list(Exp * e)
|
1703
|
+
{
|
1704
|
+
Exp * e1;
|
1705
|
+
for (; e != NULL; e = e1)
|
1706
|
+
{
|
1707
|
+
e1 = e->next;
|
1708
|
+
if (e->type != CONNECTOR_type)
|
1709
|
+
{
|
1710
|
+
free_Elist(e->u.l);
|
1711
|
+
}
|
1712
|
+
exp_free(e);
|
1713
|
+
}
|
1714
|
+
}
|
1715
|
+
|
1716
|
+
void free_dictionary(Dictionary dict)
|
1717
|
+
{
|
1718
|
+
free_dict_node_recursive(dict->root);
|
1719
|
+
free_Word_file(dict->word_file_header);
|
1720
|
+
free_Exp_list(dict->exp_list);
|
1721
|
+
}
|
1722
|
+
|
1723
|
+
/**
|
1724
|
+
* dict_display_word_info() - display the information about the given word.
|
1725
|
+
*/
|
1726
|
+
void dict_display_word_info(Dictionary dict, const char * s)
|
1727
|
+
{
|
1728
|
+
Dict_node *dn, *dn_head;
|
1729
|
+
Disjunct * d1, * d2;
|
1730
|
+
int len;
|
1731
|
+
dn_head = dictionary_lookup_list(dict, s);
|
1732
|
+
if (dn_head == NULL)
|
1733
|
+
{
|
1734
|
+
printf(" \"%s\" matches nothing in the dictionary.\n", s);
|
1735
|
+
return;
|
1736
|
+
}
|
1737
|
+
printf("Matches:\n");
|
1738
|
+
for (dn = dn_head; dn != NULL; dn = dn->right)
|
1739
|
+
{
|
1740
|
+
len = 0;
|
1741
|
+
d1 = build_disjuncts_for_dict_node(dn);
|
1742
|
+
for(d2 = d1 ; d2 != NULL; d2 = d2->next)
|
1743
|
+
{
|
1744
|
+
len++;
|
1745
|
+
}
|
1746
|
+
free_disjuncts(d1);
|
1747
|
+
printf(" ");
|
1748
|
+
left_print_string(stdout, dn->string,
|
1749
|
+
" ");
|
1750
|
+
printf(" %5d disjuncts ", len);
|
1751
|
+
if (dn->file != NULL)
|
1752
|
+
{
|
1753
|
+
printf("<%s>", dn->file->file);
|
1754
|
+
}
|
1755
|
+
printf("\n");
|
1756
|
+
}
|
1757
|
+
free_lookup_list(dn_head);
|
1758
|
+
return;
|
1759
|
+
}
|
1760
|
+
|
1761
|
+
/**
|
1762
|
+
* dict_display_word_expr() - display the connector info for a given word.
|
1763
|
+
*/
|
1764
|
+
void dict_display_word_expr(Dictionary dict, const char * s)
|
1765
|
+
{
|
1766
|
+
Dict_node *dn, *dn_head;
|
1767
|
+
|
1768
|
+
dn_head = dictionary_lookup_list(dict, s);
|
1769
|
+
if (dn_head == NULL)
|
1770
|
+
{
|
1771
|
+
printf(" \"%s\" matches nothing in the dictionary.\n", s);
|
1772
|
+
return;
|
1773
|
+
}
|
1774
|
+
printf("\nExpressions:\n");
|
1775
|
+
for (dn = dn_head; dn != NULL; dn = dn->right)
|
1776
|
+
{
|
1777
|
+
printf(" ");
|
1778
|
+
left_print_string(stdout, dn->string,
|
1779
|
+
" ");
|
1780
|
+
print_expression(dn->exp);
|
1781
|
+
printf("\n\n");
|
1782
|
+
}
|
1783
|
+
free_lookup_list(dn_head);
|
1784
|
+
return;
|
1785
|
+
}
|