grammar_cop 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
Parse_info parse_info_new(int nwords);
|
14
|
+
void free_parse_info(Parse_info);
|
15
|
+
int build_parse_set(Sentence sent, int cost, Parse_Options opts);
|
16
|
+
void extract_links(int index, int cost, Parse_info pi);
|
@@ -0,0 +1,309 @@
|
|
1
|
+
/**************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/**************************************************************************/
|
13
|
+
|
14
|
+
#include "api.h"
|
15
|
+
#include "fast-match.h"
|
16
|
+
|
17
|
+
/**
|
18
|
+
* returns the number of disjuncts in the list that have non-null
|
19
|
+
* left connector lists.
|
20
|
+
*/
|
21
|
+
static int left_disjunct_list_length(Disjunct * d)
|
22
|
+
{
|
23
|
+
int i;
|
24
|
+
for (i=0; d!=NULL; d=d->next) {
|
25
|
+
if (d->left != NULL) i++;
|
26
|
+
}
|
27
|
+
return i;
|
28
|
+
}
|
29
|
+
|
30
|
+
static int right_disjunct_list_length(Disjunct * d)
|
31
|
+
{
|
32
|
+
int i;
|
33
|
+
for (i=0; d!=NULL; d=d->next) {
|
34
|
+
if (d->right != NULL) i++;
|
35
|
+
}
|
36
|
+
return i;
|
37
|
+
}
|
38
|
+
|
39
|
+
struct match_context_s
|
40
|
+
{
|
41
|
+
int match_cost;
|
42
|
+
int l_table_size[MAX_SENTENCE]; /* the sizes of the hash tables */
|
43
|
+
int r_table_size[MAX_SENTENCE];
|
44
|
+
|
45
|
+
/* the beginnings of the hash tables */
|
46
|
+
Match_node ** l_table[MAX_SENTENCE];
|
47
|
+
Match_node ** r_table[MAX_SENTENCE];
|
48
|
+
|
49
|
+
/* I'll pedantically maintain my own list of these cells */
|
50
|
+
Match_node * mn_free_list;
|
51
|
+
};
|
52
|
+
|
53
|
+
|
54
|
+
/**
|
55
|
+
* Return a match node to be used by the caller
|
56
|
+
*/
|
57
|
+
static Match_node * get_match_node(match_context_t *ctxt)
|
58
|
+
{
|
59
|
+
Match_node * m;
|
60
|
+
if (ctxt->mn_free_list != NULL)
|
61
|
+
{
|
62
|
+
m = ctxt->mn_free_list;
|
63
|
+
ctxt->mn_free_list = m->next;
|
64
|
+
}
|
65
|
+
else
|
66
|
+
{
|
67
|
+
m = (Match_node *) xalloc(sizeof(Match_node));
|
68
|
+
}
|
69
|
+
return m;
|
70
|
+
}
|
71
|
+
|
72
|
+
/**
|
73
|
+
* Put these nodes back onto my free list
|
74
|
+
*/
|
75
|
+
void put_match_list(Sentence sent, Match_node *m)
|
76
|
+
{
|
77
|
+
Match_node * xm;
|
78
|
+
match_context_t *ctxt = sent->match_ctxt;
|
79
|
+
|
80
|
+
for (; m != NULL; m = xm)
|
81
|
+
{
|
82
|
+
xm = m->next;
|
83
|
+
m->next = ctxt->mn_free_list;
|
84
|
+
ctxt->mn_free_list = m;
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
static void free_match_list(Match_node * t)
|
89
|
+
{
|
90
|
+
Match_node *xt;
|
91
|
+
for (; t!=NULL; t=xt) {
|
92
|
+
xt = t->next;
|
93
|
+
xfree((char *)t, sizeof(Match_node));
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
/**
|
98
|
+
* Free all of the hash tables and Match_nodes
|
99
|
+
*/
|
100
|
+
void free_fast_matcher(Sentence sent)
|
101
|
+
{
|
102
|
+
int w;
|
103
|
+
int i;
|
104
|
+
match_context_t *ctxt = sent->match_ctxt;
|
105
|
+
|
106
|
+
if (verbosity > 1) printf("%d Match cost\n", ctxt->match_cost);
|
107
|
+
for (w = 0; w < sent->length; w++)
|
108
|
+
{
|
109
|
+
for (i = 0; i < ctxt->l_table_size[w]; i++)
|
110
|
+
{
|
111
|
+
free_match_list(ctxt->l_table[w][i]);
|
112
|
+
}
|
113
|
+
xfree((char *)ctxt->l_table[w], ctxt->l_table_size[w] * sizeof (Match_node *));
|
114
|
+
for (i = 0; i < ctxt->r_table_size[w]; i++)
|
115
|
+
{
|
116
|
+
free_match_list(ctxt->r_table[w][i]);
|
117
|
+
}
|
118
|
+
xfree((char *)ctxt->r_table[w], ctxt->r_table_size[w] * sizeof (Match_node *));
|
119
|
+
}
|
120
|
+
free_match_list(ctxt->mn_free_list);
|
121
|
+
ctxt->mn_free_list = NULL;
|
122
|
+
|
123
|
+
free(ctxt);
|
124
|
+
sent->match_ctxt = NULL;
|
125
|
+
}
|
126
|
+
|
127
|
+
/**
|
128
|
+
* Adds the match node m to the sorted list of match nodes l.
|
129
|
+
* The parameter dir determines the order of the sorting to be used.
|
130
|
+
* Makes the list sorted from smallest to largest.
|
131
|
+
*/
|
132
|
+
static Match_node * add_to_right_table_list(Match_node * m, Match_node * l)
|
133
|
+
{
|
134
|
+
if (l == NULL) return m;
|
135
|
+
if ((m->d->right->word) <= (l->d->right->word)) {
|
136
|
+
m->next = l;
|
137
|
+
return m;
|
138
|
+
} else {
|
139
|
+
l->next = add_to_right_table_list(m, l->next);
|
140
|
+
return l;
|
141
|
+
}
|
142
|
+
}
|
143
|
+
|
144
|
+
/**
|
145
|
+
* Adds the match node m to the sorted list of match nodes l.
|
146
|
+
* The parameter dir determines the order of the sorting to be used.
|
147
|
+
* Makes the list sorted from largest to smallest
|
148
|
+
*/
|
149
|
+
static Match_node * add_to_left_table_list(Match_node * m, Match_node * l)
|
150
|
+
{
|
151
|
+
if (l==NULL) return m;
|
152
|
+
if ((m->d->left->word) >= (l->d->left->word)) {
|
153
|
+
m->next = l;
|
154
|
+
return m;
|
155
|
+
} else {
|
156
|
+
l->next = add_to_left_table_list(m, l->next);
|
157
|
+
return l;
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
/**
|
162
|
+
* The disjunct d (whose left or right pointer points to c) is put
|
163
|
+
* into the appropriate hash table
|
164
|
+
* dir = 1, we're putting this into a right table.
|
165
|
+
* dir = -1, we're putting this into a left table.
|
166
|
+
*/
|
167
|
+
static void put_into_match_table(int size, Match_node ** t,
|
168
|
+
Disjunct * d, Connector * c, int dir )
|
169
|
+
{
|
170
|
+
int h;
|
171
|
+
Match_node * m;
|
172
|
+
h = connector_hash(c) & (size-1);
|
173
|
+
m = (Match_node *) xalloc (sizeof(Match_node));
|
174
|
+
m->next = NULL;
|
175
|
+
m->d = d;
|
176
|
+
if (dir == 1) {
|
177
|
+
t[h] = add_to_right_table_list(m, t[h]);
|
178
|
+
} else {
|
179
|
+
t[h] = add_to_left_table_list(m, t[h]);
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
void init_fast_matcher(Sentence sent)
|
184
|
+
{
|
185
|
+
int w, len, size, i;
|
186
|
+
Match_node ** t;
|
187
|
+
Disjunct * d;
|
188
|
+
match_context_t *ctxt;
|
189
|
+
|
190
|
+
ctxt = (match_context_t *) malloc(sizeof(match_context_t));
|
191
|
+
sent->match_ctxt = ctxt;
|
192
|
+
|
193
|
+
ctxt->match_cost = 0;
|
194
|
+
ctxt->mn_free_list = NULL;
|
195
|
+
|
196
|
+
for (w=0; w<sent->length; w++)
|
197
|
+
{
|
198
|
+
len = left_disjunct_list_length(sent->word[w].d);
|
199
|
+
size = next_power_of_two_up(len);
|
200
|
+
ctxt->l_table_size[w] = size;
|
201
|
+
t = ctxt->l_table[w] = (Match_node **) xalloc(size * sizeof(Match_node *));
|
202
|
+
for (i = 0; i < size; i++) t[i] = NULL;
|
203
|
+
|
204
|
+
for (d = sent->word[w].d; d != NULL; d = d->next)
|
205
|
+
{
|
206
|
+
if (d->left != NULL)
|
207
|
+
{
|
208
|
+
put_into_match_table(size, t, d, d->left, -1);
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
len = right_disjunct_list_length(sent->word[w].d);
|
213
|
+
size = next_power_of_two_up(len);
|
214
|
+
ctxt->r_table_size[w] = size;
|
215
|
+
t = ctxt->r_table[w] = (Match_node **) xalloc(size * sizeof(Match_node *));
|
216
|
+
for (i = 0; i < size; i++) t[i] = NULL;
|
217
|
+
|
218
|
+
for (d = sent->word[w].d; d != NULL; d = d->next)
|
219
|
+
{
|
220
|
+
if (d->right != NULL)
|
221
|
+
{
|
222
|
+
put_into_match_table(size, t, d, d->right, 1);
|
223
|
+
}
|
224
|
+
}
|
225
|
+
}
|
226
|
+
}
|
227
|
+
|
228
|
+
/**
|
229
|
+
* Forms and returns a list of disjuncts that might match lc or rc or both.
|
230
|
+
* lw and rw are the words from which lc and rc came respectively.
|
231
|
+
* The list is formed by the link pointers of Match_nodes.
|
232
|
+
* The list contains no duplicates. A quadratic algorithm is used to
|
233
|
+
* eliminate duplicates. In practice the match_cost is less than the
|
234
|
+
* parse_cost (and the loop is tiny), so there's no reason to bother
|
235
|
+
* to fix this.
|
236
|
+
*/
|
237
|
+
Match_node *
|
238
|
+
form_match_list(Sentence sent, int w,
|
239
|
+
Connector *lc, int lw, Connector *rc, int rw)
|
240
|
+
{
|
241
|
+
Match_node *ml, *mr, *mx, *my, * mz, *front, *free_later;
|
242
|
+
|
243
|
+
match_context_t *ctxt = sent->match_ctxt;
|
244
|
+
|
245
|
+
if (lc != NULL) {
|
246
|
+
ml = ctxt->l_table[w][connector_hash(lc) & (ctxt->l_table_size[w]-1)];
|
247
|
+
} else {
|
248
|
+
ml = NULL;
|
249
|
+
}
|
250
|
+
if (rc != NULL) {
|
251
|
+
mr = ctxt->r_table[w][connector_hash(rc) & (ctxt->r_table_size[w]-1)];
|
252
|
+
} else {
|
253
|
+
mr = NULL;
|
254
|
+
}
|
255
|
+
|
256
|
+
front = NULL;
|
257
|
+
for (mx = ml; mx != NULL; mx = mx->next)
|
258
|
+
{
|
259
|
+
if (mx->d->left->word < lw) break;
|
260
|
+
my = get_match_node(ctxt);
|
261
|
+
my->d = mx->d;
|
262
|
+
my->next = front;
|
263
|
+
front = my;
|
264
|
+
}
|
265
|
+
ml = front; /* ml is now the list of things that could match the left */
|
266
|
+
|
267
|
+
front = NULL;
|
268
|
+
for (mx = mr; mx != NULL; mx = mx->next)
|
269
|
+
{
|
270
|
+
if (mx->d->right->word > rw) break;
|
271
|
+
my = get_match_node(ctxt);
|
272
|
+
my->d = mx->d;
|
273
|
+
my->next = front;
|
274
|
+
front = my;
|
275
|
+
}
|
276
|
+
mr = front; /* mr is now the list of things that could match the right */
|
277
|
+
|
278
|
+
/* now we want to eliminate duplicates from the lists */
|
279
|
+
|
280
|
+
free_later = NULL;
|
281
|
+
front = NULL;
|
282
|
+
for (mx = mr; mx != NULL; mx = mz)
|
283
|
+
{
|
284
|
+
/* see if mx in first list, put it in if its not */
|
285
|
+
mz = mx->next;
|
286
|
+
ctxt->match_cost++;
|
287
|
+
for (my=ml; my!=NULL; my=my->next) {
|
288
|
+
ctxt->match_cost++;
|
289
|
+
if (mx->d == my->d) break;
|
290
|
+
}
|
291
|
+
if (my != NULL) { /* mx was in the l list */
|
292
|
+
mx->next = free_later;
|
293
|
+
free_later = mx;
|
294
|
+
}
|
295
|
+
if (my==NULL) { /* it was not there */
|
296
|
+
mx->next = front;
|
297
|
+
front = mx;
|
298
|
+
}
|
299
|
+
}
|
300
|
+
mr = front; /* mr is now the abbreviated right list */
|
301
|
+
put_match_list(sent, free_later);
|
302
|
+
|
303
|
+
/* now catenate the two lists */
|
304
|
+
if (mr == NULL) return ml;
|
305
|
+
for (mx = mr; mx->next != NULL; mx = mx->next)
|
306
|
+
;
|
307
|
+
mx->next = ml;
|
308
|
+
return mr;
|
309
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
void init_fast_matcher(Sentence sent);
|
15
|
+
void free_fast_matcher(Sentence sent);
|
16
|
+
void put_match_list(Sentence, Match_node *);
|
17
|
+
Match_node * form_match_list(Sentence, int, Connector *, int, Connector *, int);
|
@@ -0,0 +1,373 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#include "api.h"
|
15
|
+
#include "error.h"
|
16
|
+
|
17
|
+
/**
|
18
|
+
* Returns TRUE if the string contains an underbar character.
|
19
|
+
*/
|
20
|
+
int contains_underbar(const char * s)
|
21
|
+
{
|
22
|
+
while(*s != '\0') {
|
23
|
+
if (*s == '_') return TRUE;
|
24
|
+
s++;
|
25
|
+
}
|
26
|
+
return FALSE;
|
27
|
+
}
|
28
|
+
|
29
|
+
/**
|
30
|
+
* Returns FALSE if it is not a correctly formed idiom string.
|
31
|
+
* Such a string is correct if it:
|
32
|
+
* () contains no "."
|
33
|
+
* () non-empty strings separated by _
|
34
|
+
*/
|
35
|
+
static int is_idiom_string(const char * s)
|
36
|
+
{
|
37
|
+
size_t len;
|
38
|
+
const char * t;
|
39
|
+
|
40
|
+
for (t = s; *t != '\0'; t++)
|
41
|
+
{
|
42
|
+
if (*t == '.') return FALSE;
|
43
|
+
}
|
44
|
+
|
45
|
+
len = strlen(s);
|
46
|
+
if ((s[0] == '_') || (s[len-1] == '_'))
|
47
|
+
{
|
48
|
+
return FALSE;
|
49
|
+
}
|
50
|
+
|
51
|
+
for (t = s; *t != '\0'; t++)
|
52
|
+
{
|
53
|
+
if ((*t == '_') && (*(t+1) == '_')) return FALSE;
|
54
|
+
}
|
55
|
+
return TRUE;
|
56
|
+
}
|
57
|
+
|
58
|
+
/**
|
59
|
+
* return TRUE if the string s is a sequence of digits.
|
60
|
+
*/
|
61
|
+
static int is_number_idiom(const char *s)
|
62
|
+
{
|
63
|
+
int nb;
|
64
|
+
while(*s != '\0') {
|
65
|
+
nb = is_utf8_digit(s);
|
66
|
+
if (!nb) return FALSE;
|
67
|
+
s += nb;
|
68
|
+
}
|
69
|
+
return TRUE;
|
70
|
+
}
|
71
|
+
|
72
|
+
/**
|
73
|
+
* If the string contains a single ".", and ends in ".Ix" where
|
74
|
+
* x is a number, return x. Return -1 if not of this form.
|
75
|
+
*/
|
76
|
+
static int numberfy(const char * s)
|
77
|
+
{
|
78
|
+
for (; (*s != '\0') && (*s != '.'); s++)
|
79
|
+
;
|
80
|
+
if (*s++ != '.') return -1;
|
81
|
+
if (*s++ != 'I') return -1;
|
82
|
+
if (!is_number_idiom(s)) return -1;
|
83
|
+
return atoi(s);
|
84
|
+
}
|
85
|
+
|
86
|
+
/**
|
87
|
+
* Look for words that end in ".Ix" where x is a number.
|
88
|
+
* Return the largest x found.
|
89
|
+
*/
|
90
|
+
static int max_postfix_found(Dict_node * d)
|
91
|
+
{
|
92
|
+
int i, j;
|
93
|
+
i = 0;
|
94
|
+
while(d != NULL) {
|
95
|
+
j = numberfy(d->string);
|
96
|
+
if (j > i) i = j;
|
97
|
+
d = d->right;
|
98
|
+
}
|
99
|
+
return i;
|
100
|
+
}
|
101
|
+
|
102
|
+
/**
|
103
|
+
* build_idiom_word_name() -- return idiomized name of given string.
|
104
|
+
*
|
105
|
+
* Allocates string space and returns a pointer to it.
|
106
|
+
* In this string is placed the idiomized name of the given string s.
|
107
|
+
* This is the same as s, but with a postfix of ".Ix", where x is an
|
108
|
+
* appropriate number. x is the minimum number that distinguishes
|
109
|
+
* this word from others in the dictionary.
|
110
|
+
*/
|
111
|
+
static const char * build_idiom_word_name(Dictionary dict, const char * s)
|
112
|
+
{
|
113
|
+
char buff[2*MAX_WORD];
|
114
|
+
char *x;
|
115
|
+
int count;
|
116
|
+
|
117
|
+
Dict_node *dn = dictionary_lookup_list(dict, s);
|
118
|
+
count = max_postfix_found(dn)+1;
|
119
|
+
free_lookup_list(dn);
|
120
|
+
|
121
|
+
x = buff;
|
122
|
+
while((*s != '\0') && (*s != '.'))
|
123
|
+
{
|
124
|
+
*x = *s;
|
125
|
+
x++;
|
126
|
+
s++;
|
127
|
+
}
|
128
|
+
sprintf(x, ".I%d",count);
|
129
|
+
|
130
|
+
return string_set_add(buff, dict->string_set);
|
131
|
+
}
|
132
|
+
|
133
|
+
/**
|
134
|
+
* Tear the idiom string apart.
|
135
|
+
* Put the parts into a list of Dict_nodes (connected by their right pointers)
|
136
|
+
* Sets the string fields of these Dict_nodes pointing to the
|
137
|
+
* fragments of the string s. Later these will be replaced by
|
138
|
+
* correct names (with .Ix suffixes).
|
139
|
+
* The list is reversed from the way they occur in the string.
|
140
|
+
* A pointer to this list is returned.
|
141
|
+
*/
|
142
|
+
static Dict_node * make_idiom_Dict_nodes(Dictionary dict, const char * string)
|
143
|
+
{
|
144
|
+
Dict_node * dn, * dn_new;
|
145
|
+
char * t, *s, *p;
|
146
|
+
int more, sz;
|
147
|
+
dn = NULL;
|
148
|
+
|
149
|
+
sz = strlen(string)+1;
|
150
|
+
p = s = (char *) xalloc(sz);
|
151
|
+
strcpy(s, string);
|
152
|
+
|
153
|
+
while (*s != '\0') {
|
154
|
+
t = s;
|
155
|
+
while((*s != '\0') && (*s != '_')) s++;
|
156
|
+
if (*s == '_') {
|
157
|
+
more = TRUE;
|
158
|
+
*s = '\0';
|
159
|
+
} else {
|
160
|
+
more = FALSE;
|
161
|
+
}
|
162
|
+
dn_new = (Dict_node *) xalloc(sizeof (Dict_node));
|
163
|
+
dn_new->right = dn;
|
164
|
+
dn = dn_new;
|
165
|
+
dn->string = string_set_add(t, dict->string_set);
|
166
|
+
dn->file = NULL;
|
167
|
+
if (more) s++;
|
168
|
+
}
|
169
|
+
|
170
|
+
xfree(p, sz);
|
171
|
+
return dn;
|
172
|
+
}
|
173
|
+
|
174
|
+
static char current_name[] = "AAAAAAAA";
|
175
|
+
#define CN_size (sizeof(current_name)-1)
|
176
|
+
|
177
|
+
static void increment_current_name(void)
|
178
|
+
{
|
179
|
+
int i, carry;
|
180
|
+
i = CN_size-1;
|
181
|
+
carry = 1;
|
182
|
+
while (carry == 1) {
|
183
|
+
current_name[i]++;
|
184
|
+
if (current_name[i] == 'Z'+1) {
|
185
|
+
current_name[i] = 'A';
|
186
|
+
carry = 1;
|
187
|
+
} else {
|
188
|
+
carry = 0;
|
189
|
+
}
|
190
|
+
i--;
|
191
|
+
}
|
192
|
+
}
|
193
|
+
|
194
|
+
/**
|
195
|
+
* Generate a new connector name obtained from the current_name.
|
196
|
+
* allocate string space for it.
|
197
|
+
* @return a pointer to connector name.
|
198
|
+
*/
|
199
|
+
static const char * generate_id_connector(Dictionary dict)
|
200
|
+
{
|
201
|
+
char buff[2*MAX_WORD];
|
202
|
+
unsigned int i;
|
203
|
+
char * t;
|
204
|
+
|
205
|
+
for (i=0; current_name[i] == 'A'; i++)
|
206
|
+
;
|
207
|
+
/* i is now the number of characters of current_name to skip */
|
208
|
+
t = buff;
|
209
|
+
|
210
|
+
/* All idiom connector names start with the two letters "ID" */
|
211
|
+
*t++ = 'I';
|
212
|
+
*t++ = 'D';
|
213
|
+
for (; i < CN_size; i++ )
|
214
|
+
{
|
215
|
+
*t++ = current_name[i] ;
|
216
|
+
}
|
217
|
+
*t++ = '\0';
|
218
|
+
return string_set_add(buff, dict->string_set);
|
219
|
+
}
|
220
|
+
|
221
|
+
/**
|
222
|
+
* Takes as input a pointer to a Dict_node.
|
223
|
+
* The string of this Dict_node is an idiom string.
|
224
|
+
* This string is torn apart, and its components are inserted into the
|
225
|
+
* dictionary as special idiom words (ending in .I*, where * is a number).
|
226
|
+
* The expression of this Dict_node (its node field) has already been
|
227
|
+
* read and constructed. This will be used to construct the special idiom
|
228
|
+
* expressions.
|
229
|
+
* The given dict node is freed. The string is also freed.
|
230
|
+
*/
|
231
|
+
void insert_idiom(Dictionary dict, Dict_node * dn)
|
232
|
+
{
|
233
|
+
Exp * nc, * no, * n1;
|
234
|
+
E_list *ell, *elr;
|
235
|
+
const char * s;
|
236
|
+
int s_length;
|
237
|
+
Dict_node * dn_list, * xdn, * start_dn_list;
|
238
|
+
|
239
|
+
no = dn->exp;
|
240
|
+
s = dn->string;
|
241
|
+
s_length = strlen(s);
|
242
|
+
|
243
|
+
if (!is_idiom_string(s))
|
244
|
+
{
|
245
|
+
prt_error("Warning: Word \"%s\" on line %d "
|
246
|
+
"is not a correctly formed idiom string.\n"
|
247
|
+
"\tThis word will be ignored\n",
|
248
|
+
s, dict->line_number);
|
249
|
+
|
250
|
+
xfree((char *)dn, sizeof (Dict_node));
|
251
|
+
return;
|
252
|
+
}
|
253
|
+
|
254
|
+
dn_list = start_dn_list = make_idiom_Dict_nodes(dict, s);
|
255
|
+
xfree((char *)dn, sizeof (Dict_node));
|
256
|
+
|
257
|
+
if (dn_list->right == NULL) {
|
258
|
+
prt_error("Fatal Error: Idiom string with only one connector -- should have been caught");
|
259
|
+
exit(1);
|
260
|
+
}
|
261
|
+
|
262
|
+
/* first make the nodes for the base word of the idiom (last word) */
|
263
|
+
/* note that the last word of the idiom is first in our list */
|
264
|
+
|
265
|
+
/* ----- this code just sets up the node fields of the dn_list ----*/
|
266
|
+
nc = Exp_create(dict);
|
267
|
+
nc->u.string = generate_id_connector(dict);
|
268
|
+
nc->dir = '-';
|
269
|
+
nc->multi = FALSE;
|
270
|
+
nc->type = CONNECTOR_type;
|
271
|
+
nc->cost = 0;
|
272
|
+
|
273
|
+
n1 = Exp_create(dict);
|
274
|
+
n1->u.l = ell = (E_list *) xalloc(sizeof(E_list));
|
275
|
+
ell->next = elr = (E_list *) xalloc(sizeof(E_list));
|
276
|
+
elr->next = NULL;
|
277
|
+
ell->e = nc;
|
278
|
+
elr->e = no;
|
279
|
+
n1->type = AND_type;
|
280
|
+
n1->cost = 0;
|
281
|
+
|
282
|
+
dn_list->exp = n1;
|
283
|
+
|
284
|
+
dn_list = dn_list->right;
|
285
|
+
|
286
|
+
while(dn_list->right != NULL) {
|
287
|
+
/* generate the expression for a middle idiom word */
|
288
|
+
|
289
|
+
n1 = Exp_create(dict);
|
290
|
+
n1->u.string = NULL;
|
291
|
+
n1->type = AND_type;
|
292
|
+
n1->cost = 0;
|
293
|
+
n1->u.l = ell = (E_list *) xalloc(sizeof(E_list));
|
294
|
+
ell->next = elr = (E_list *) xalloc(sizeof(E_list));
|
295
|
+
elr->next = NULL;
|
296
|
+
|
297
|
+
nc = Exp_create(dict);
|
298
|
+
nc->u.string = generate_id_connector(dict);
|
299
|
+
nc->dir = '+';
|
300
|
+
nc->multi = FALSE;
|
301
|
+
nc->type = CONNECTOR_type;
|
302
|
+
nc->cost = 0;
|
303
|
+
elr->e = nc;
|
304
|
+
|
305
|
+
increment_current_name();
|
306
|
+
|
307
|
+
nc = Exp_create(dict);
|
308
|
+
nc->u.string = generate_id_connector(dict);
|
309
|
+
nc->dir = '-';
|
310
|
+
nc->multi = FALSE;
|
311
|
+
nc->type = CONNECTOR_type;
|
312
|
+
nc->cost = 0;
|
313
|
+
|
314
|
+
ell->e = nc;
|
315
|
+
|
316
|
+
dn_list->exp = n1;
|
317
|
+
|
318
|
+
dn_list = dn_list->right;
|
319
|
+
}
|
320
|
+
/* now generate the last one */
|
321
|
+
|
322
|
+
nc = Exp_create(dict);
|
323
|
+
nc->u.string = generate_id_connector(dict);
|
324
|
+
nc->dir = '+';
|
325
|
+
nc->multi = FALSE;
|
326
|
+
nc->type = CONNECTOR_type;
|
327
|
+
nc->cost = 0;
|
328
|
+
|
329
|
+
dn_list->exp = nc;
|
330
|
+
|
331
|
+
increment_current_name();
|
332
|
+
|
333
|
+
/* ---- end of the code alluded to above ---- */
|
334
|
+
|
335
|
+
/* now its time to insert them into the dictionary */
|
336
|
+
|
337
|
+
dn_list = start_dn_list;
|
338
|
+
|
339
|
+
while (dn_list != NULL) {
|
340
|
+
xdn = dn_list->right;
|
341
|
+
dn_list->left = dn_list->right = NULL;
|
342
|
+
dn_list->string = build_idiom_word_name(dict, dn_list->string);
|
343
|
+
dict->root = insert_dict(dict, dict->root, dn_list);
|
344
|
+
dict->num_entries++;
|
345
|
+
dn_list = xdn;
|
346
|
+
}
|
347
|
+
/* xfree((char *)s, s_length+1); strings are handled by string_set */
|
348
|
+
}
|
349
|
+
|
350
|
+
/**
|
351
|
+
* returns TRUE if this is a word ending in ".Ix", where x is a number.
|
352
|
+
*/
|
353
|
+
int is_idiom_word(const char * s)
|
354
|
+
{
|
355
|
+
return (numberfy(s) != -1) ;
|
356
|
+
}
|
357
|
+
|
358
|
+
#ifdef THIS_IS_NOT_USED
|
359
|
+
/*
|
360
|
+
returns TRUE if the list of words contains only words that are
|
361
|
+
idiom words. This is useful, because under this condition you want
|
362
|
+
to be able to insert the word anyway, as long as it doesn't match
|
363
|
+
exactly.
|
364
|
+
*/
|
365
|
+
int only_idiom_words(Dict_node * dn)
|
366
|
+
{
|
367
|
+
while(dn != NULL) {
|
368
|
+
if (!is_idiom_word(dn->string)) return FALSE;
|
369
|
+
dn = dn->right;
|
370
|
+
}
|
371
|
+
return TRUE;
|
372
|
+
}
|
373
|
+
#endif
|