grammar_cop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
void init_analyze(Sentence);
|
15
|
+
void free_analyze(Sentence);
|
16
|
+
|
17
|
+
void extract_thin_linkage(Sentence, Parse_Options, Linkage);
|
18
|
+
void extract_fat_linkage (Sentence, Parse_Options, Linkage);
|
19
|
+
Linkage_info analyze_fat_linkage (Sentence, Parse_Options, int pass);
|
20
|
+
Linkage_info analyze_thin_linkage(Sentence, Parse_Options, int pass);
|
21
|
+
void free_DIS_tree(DIS_node *);
|
22
|
+
|
23
|
+
void zero_sublinkage(Sublinkage *s);
|
24
|
+
|
@@ -0,0 +1,1603 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#include "api.h"
|
15
|
+
#include "disjunct-utils.h"
|
16
|
+
|
17
|
+
/*
|
18
|
+
Notes about AND
|
19
|
+
|
20
|
+
A large fraction of the code of this parser seems to deal with handling
|
21
|
+
conjunctions. This comment (combined with reading the paper) should
|
22
|
+
give an idea of how it works.
|
23
|
+
|
24
|
+
First of all, we need a more detailed discussion of strings, what they
|
25
|
+
match, etc. (This entire discussion ignores the labels, which are
|
26
|
+
semantically the same as the leading upper case letters of the
|
27
|
+
connector.)
|
28
|
+
|
29
|
+
We'll deal with infinite strings from an alphabet of three types of
|
30
|
+
characters: "*". "^" and ordinary characters (denoted "a" and "b").
|
31
|
+
(The end of a string should be thought of as an infinite sequence of
|
32
|
+
"*"s).
|
33
|
+
|
34
|
+
Let match(s) be the set of strings that will match the string s. This
|
35
|
+
is defined as follows. A string t is in match(s) if (1) its leading
|
36
|
+
upper case letters exactly match those of s. (2) traversing through
|
37
|
+
both strings, from left to right in step, no missmatch is found
|
38
|
+
between corresponding letters. A missmatch is a pair of differing
|
39
|
+
ordinary characters, or a "^" and any ordinary letter or two "^"s.
|
40
|
+
In other words, a match is exactly a "*" and anything, or two
|
41
|
+
identical ordinary letters.
|
42
|
+
|
43
|
+
Alternative definition of the set match(s):
|
44
|
+
{t | t is obtained from s by replacing each "^" and any other characters
|
45
|
+
by "*"s, and replacing any original "*" in s by any other character
|
46
|
+
(or "^").}
|
47
|
+
|
48
|
+
Theorem: if t in match(s) then s in match(t).
|
49
|
+
|
50
|
+
It is also a theorem that given any two strings s and t, there exists a
|
51
|
+
unique new string u with the property that:
|
52
|
+
|
53
|
+
match(u) = match(s) intersect match(t)
|
54
|
+
|
55
|
+
This string is called the GCD of s and t. Here are some examples.
|
56
|
+
|
57
|
+
GCD(N*a,Nb) = Nba
|
58
|
+
GCD(Na, Nb) = N^
|
59
|
+
GCD(Nab,Nb) = N^b
|
60
|
+
GCD(N^,N*a) = N^a
|
61
|
+
GCD(N^, N) = N^
|
62
|
+
GCD(N^^,N^) = N^^
|
63
|
+
|
64
|
+
We need an algorithm for computing the GCD of two strings. Here is
|
65
|
+
one.
|
66
|
+
|
67
|
+
First get by the upper case letters (which must be equal, otherwise
|
68
|
+
there is no intersection), issuing them. Traverse the rest of the
|
69
|
+
characters of s and t in lockstep until there is nothing left but
|
70
|
+
"*"s. If the two characters are:
|
71
|
+
|
72
|
+
"a" and "a", issue "a"
|
73
|
+
"a" and "b", issue "^"
|
74
|
+
"a" and "*", issue "a"
|
75
|
+
"*" and "*", issue "*"
|
76
|
+
"*" and "^", issue "^"
|
77
|
+
"a" and "^", issue "^"
|
78
|
+
"^" and "^", issue "^"
|
79
|
+
|
80
|
+
A simple case analysis suffices to show that any string that matches
|
81
|
+
the right side, must match both of the left sides, and any string not
|
82
|
+
matching the right side must not match at least one of the left sides.
|
83
|
+
|
84
|
+
This proves that the GCD operator is associative and commutative.
|
85
|
+
(There must be a name for a mathematical structure with these properties.)
|
86
|
+
|
87
|
+
To elaborate further on this theory, define the notion of two strings
|
88
|
+
matching in the dual sense as follows: s and t dual-match if
|
89
|
+
match(s) is contained in match(t) or vice versa---
|
90
|
+
|
91
|
+
Full development of this theory could lead to a more efficient
|
92
|
+
algorithm for this problem. I'll defer this until such time as it
|
93
|
+
appears necessary.
|
94
|
+
|
95
|
+
|
96
|
+
We need a data structure that stores a set of fat links. Each fat
|
97
|
+
link has a number (called its label). The fat link operates in liu of
|
98
|
+
a collection of links. The particular stuff it is a substitute for is
|
99
|
+
defined by a disjunct. This disjunct is stored in the data structure.
|
100
|
+
|
101
|
+
The type of a disjunct is defined by the sequence of connector types
|
102
|
+
(defined by their upper case letters) that comprises it. Each entry
|
103
|
+
of the label_table[] points to a list of disjuncts that have the same
|
104
|
+
type (a hash table is uses so that, given a disjunct, we can efficiently
|
105
|
+
compute the element of the label table in which it belongs).
|
106
|
+
|
107
|
+
We begin by loading up the label table with all of the possible
|
108
|
+
fat links that occur through the words of the sentence. These are
|
109
|
+
obtained by taking every sub-range of the connectors of each disjunct
|
110
|
+
(containing the center). We also compute the closure (under the GCD
|
111
|
+
operator) of these disjuncts and store also store these in the
|
112
|
+
label_table. Each disjunct in this table has a string which represents
|
113
|
+
the subscripts of all of its connectors (and their multi-connector bits).
|
114
|
+
|
115
|
+
It is possible to generate a fat connector for any one of the
|
116
|
+
disjuncts in the label_table. This connector's label field is given
|
117
|
+
the label from the disjunct from which it arose. It's string field
|
118
|
+
is taken from the string of the disjunct (mentioned above). It will be
|
119
|
+
given a priority with a value of UP_priority or DOWN_priority (depending
|
120
|
+
on how it will be used). A connector of UP_priority can match one of
|
121
|
+
DOWN_priority, but neither of these can match any other priority.
|
122
|
+
(Of course, a fat connector can match only another fat connector with
|
123
|
+
the same label.)
|
124
|
+
|
125
|
+
The paper describes in some detail how disjuncts are given to words
|
126
|
+
and to "and" and ",", etc. Each word in the sentence gets many more
|
127
|
+
new disjuncts. For each contiguous set of connectors containing (or
|
128
|
+
adjacent to) the center of the disjunct, we generate a fat link, and
|
129
|
+
replace these connector in the word by a fat link. (Actually we do
|
130
|
+
this twice. Once pointing to the right, once to the left.) These fat
|
131
|
+
links have priority UP_priority.
|
132
|
+
|
133
|
+
What do we generate for ","? For each type of fat link (each label)
|
134
|
+
we make a disjunct that has two down connectors (to the right and left)
|
135
|
+
and one up connector (to the right). There will be a unique way of
|
136
|
+
hooking together a comma-separated and-list.
|
137
|
+
|
138
|
+
The disjuncts on "and" are more complicated. Here we have to do just what
|
139
|
+
we did for comma (but also include the up link to the left), then
|
140
|
+
we also have to allow the process to terminate. So, there is a disjunct
|
141
|
+
with two down fat links, and between them are the original thin links.
|
142
|
+
These are said to "blossom" out. However, this is not all that is
|
143
|
+
necessary. It's possible for an and-list to be part of another and list
|
144
|
+
with a different labeled fat connector. To make this possible, we
|
145
|
+
regroup the just blossomed disjuncts (in all possible ways about the center)
|
146
|
+
and install them as fat links. If this sounds like a lot of disjuncts --
|
147
|
+
it is! The program is currently fairly slow on long sentence with and.
|
148
|
+
|
149
|
+
It is slightly non-obvious that the fat-links in a linkage constructed
|
150
|
+
from disjuncts defined in this way form a binary tree. Naturally,
|
151
|
+
connectors with UP_priority point up the tree, and those with DOWN_priority
|
152
|
+
point down the tree.
|
153
|
+
|
154
|
+
Think of the string x on the connector as representing a set X of strings.
|
155
|
+
X = match(x). So, for example, if x="S^" then match(x) = {"S", "S*a",
|
156
|
+
"S*b", etc}. The matching rules for UP and DOWN priority connectors
|
157
|
+
are such that as you go up (the tree of ands) the X sets get no larger.
|
158
|
+
So, for example, a "Sb" pointing up can match an "S^" pointing down.
|
159
|
+
(Because more stuff can match "Sb" than can match "S^".)
|
160
|
+
This guarantees that whatever connector ultimately gets used after the
|
161
|
+
fat connector blossoms out (see below), it is a powerful enough connector
|
162
|
+
to be able to match to any of the connectors associated with it.
|
163
|
+
|
164
|
+
One problem with the scheme just descibed is that it sometimes generates
|
165
|
+
essentially the same linkage several times. This happens if there is
|
166
|
+
a gap in the connective power, and the mismatch can be moved around in
|
167
|
+
different ways. Here is an example of how this happens.
|
168
|
+
|
169
|
+
(Left is DOWN, right is UP)
|
170
|
+
|
171
|
+
Sa <---> S^ <---> S or Sa <---> Sa <---> S
|
172
|
+
fat thin fat thin
|
173
|
+
|
174
|
+
Here two of the disjunct types are given by "S^" and "Sa". Notice that
|
175
|
+
the criterion of shrinking the matching set is satisfied by the the fat
|
176
|
+
link (traversing from left to right). How do I eliminate one of these?
|
177
|
+
|
178
|
+
I use the technique of canonization. I generate all the linkages. There
|
179
|
+
is then a procedure that can check to see of a linkage is canonical.
|
180
|
+
If it is, it's used, otherwise it's ignored. It's claimed that exactly
|
181
|
+
one canonical one of each equivalence class will be generated.
|
182
|
+
We basically insist that the intermediate fat disjuncts (ones that
|
183
|
+
have a fat link pointing down) are all minimal -- that is, that they
|
184
|
+
cannot be replaced by by another (with a strictly) smaller match set.
|
185
|
+
If one is not minimal, then the linkage is rejected.
|
186
|
+
|
187
|
+
Here's a proof that this is correct. Consider the set of equivalent
|
188
|
+
linkages that are generated. These Pick a disjunct that is the root of
|
189
|
+
its tree. Consider the set of all disjuncts which occur in that positon
|
190
|
+
among the equivalent linkages. The GCD of all of these can fit in that
|
191
|
+
position (it matches down the tree, since its match set has gotten
|
192
|
+
smaller, and it also matches to the THIN links.) Since the GCD is put
|
193
|
+
on "and" this particular one will be generated. Therefore rejecting
|
194
|
+
a linkage in which a root fat disjunct can be replaced by a smaller one
|
195
|
+
is ok (since the smaller one will be generated separately). What about
|
196
|
+
a fat disjunct that is not the root. We consider the set of linkages in
|
197
|
+
which the root is minimal (the ones for which it's not have already been
|
198
|
+
eliminated). Now, consider one of the children of the root in precisely
|
199
|
+
the way we just considered the root. The same argument holds. The only
|
200
|
+
difference is that the root node gives another constraint on how small
|
201
|
+
you can make the disjunct -- so, within these constraints, if we can go
|
202
|
+
smaller, we reject.
|
203
|
+
|
204
|
+
The code to do all of this is fairly ugly, but I think it works.
|
205
|
+
|
206
|
+
|
207
|
+
Problems with this stuff:
|
208
|
+
|
209
|
+
1) There is obviously a combinatorial explosion that takes place.
|
210
|
+
As the number of disjuncts (and the number of their subscripts
|
211
|
+
increase) the number of disjuncts that get put onto "and" will
|
212
|
+
increase tremendously. When we made the transcript for the tech
|
213
|
+
report (Around August 1991) most of the sentence were processed
|
214
|
+
in well under 10 seconds. Now (Jan 1992), some of these sentences
|
215
|
+
take ten times longer. As of this writing I don't really know the
|
216
|
+
reason, other than just the fact that the dictionary entries are
|
217
|
+
more complex than they used to be. The number of linkages has also
|
218
|
+
increased significantly.
|
219
|
+
|
220
|
+
2) Each element of an and list must be attached through only one word.
|
221
|
+
This disallows "there is time enough and space enough for both of us",
|
222
|
+
and many other reasonable sounding things. The combinatorial
|
223
|
+
explosion that would occur if you allowed two different connection
|
224
|
+
points would be tremendous, and the number of solutions would also
|
225
|
+
probably go up by another order of magnitude. Perhaps if there
|
226
|
+
were strong constraints on the type of connectors in which this
|
227
|
+
would be allowed, then this would be a conceivable prospect.
|
228
|
+
|
229
|
+
3) A multi-connector must be either all "outside" or all "inside" the and.
|
230
|
+
For example, "the big black dog and cat ran" has only two ways to
|
231
|
+
linkages (instead of three).
|
232
|
+
|
233
|
+
Possible bug: It seems that the following two linkages should be the
|
234
|
+
same under the canonical linkage test. Could this have to do with the
|
235
|
+
pluralization system?
|
236
|
+
|
237
|
+
> I am big and the bike and the car were broken
|
238
|
+
Accepted (4 linkages, 4 with no P.P. violations) at stage 1
|
239
|
+
Linkage 1, cost vector = (0, 0, 18)
|
240
|
+
|
241
|
+
+------Spx-----+
|
242
|
+
+-----CC-----+------Wd------+-d^^*i^-+ |
|
243
|
+
+-Wd-+Spi+-Pa+ | +--Ds-+d^^*+ +-Ds-+ +--Pv-+
|
244
|
+
| | | | | | | | | | | |
|
245
|
+
///// I.p am big.a and the bike.n and the car.n were broken
|
246
|
+
|
247
|
+
///// RW <---RW----> RW /////
|
248
|
+
///// Wd <---Wd----> Wd I.p
|
249
|
+
I.p CC <---CC----> CC and
|
250
|
+
I.p Sp*i <---Spii--> Spi am
|
251
|
+
am Pa <---Pa----> Pa big.a
|
252
|
+
and Wd <---Wd----> Wd and
|
253
|
+
bike.n d^s** 6<---d^^*i-> d^^*i 6 and
|
254
|
+
the D <---Ds----> Ds bike.n
|
255
|
+
and Sp <---Spx---> Spx were
|
256
|
+
and d^^*i 6<---d^^*i-> d^s** 6 car.n
|
257
|
+
the D <---Ds----> Ds car.n
|
258
|
+
were Pv <---Pv----> Pv broken
|
259
|
+
|
260
|
+
(press return for another)
|
261
|
+
>
|
262
|
+
Linkage 2, cost vector = (0, 0, 18)
|
263
|
+
|
264
|
+
+------Spx-----+
|
265
|
+
+-----CC-----+------Wd------+-d^s**^-+ |
|
266
|
+
+-Wd-+Spi+-Pa+ | +--Ds-+d^s*+ +-Ds-+ +--Pv-+
|
267
|
+
| | | | | | | | | | | |
|
268
|
+
///// I.p am big.a and the bike.n and the car.n were broken
|
269
|
+
|
270
|
+
///// RW <---RW----> RW /////
|
271
|
+
///// Wd <---Wd----> Wd I.p
|
272
|
+
I.p CC <---CC----> CC and
|
273
|
+
I.p Sp*i <---Spii--> Spi am
|
274
|
+
am Pa <---Pa----> Pa big.a
|
275
|
+
and Wd <---Wd----> Wd and
|
276
|
+
bike.n d^s** 6<---d^s**-> d^s** 6 and
|
277
|
+
the D <---Ds----> Ds bike.n
|
278
|
+
and Sp <---Spx---> Spx were
|
279
|
+
and d^s** 6<---d^s**-> d^s** 6 car.n
|
280
|
+
the D <---Ds----> Ds car.n
|
281
|
+
were Pv <---Pv----> Pv broken
|
282
|
+
|
283
|
+
*/
|
284
|
+
|
285
|
+
static void init_LT(Sentence sent)
|
286
|
+
{
|
287
|
+
sent->and_data.LT_bound = 20;
|
288
|
+
sent->and_data.LT_size = 0;
|
289
|
+
sent->and_data.label_table =
|
290
|
+
(Disjunct **) xalloc(sent->and_data.LT_bound * sizeof(Disjunct *));
|
291
|
+
}
|
292
|
+
|
293
|
+
static void grow_LT(Sentence sent)
|
294
|
+
{
|
295
|
+
size_t oldsize = sent->and_data.LT_bound * sizeof(Disjunct *);
|
296
|
+
sent->and_data.LT_bound = (3*sent->and_data.LT_bound)/2;
|
297
|
+
sent->and_data.label_table =
|
298
|
+
(Disjunct **) xrealloc(sent->and_data.label_table,
|
299
|
+
oldsize,
|
300
|
+
sent->and_data.LT_bound * sizeof(Disjunct *));
|
301
|
+
}
|
302
|
+
|
303
|
+
static void init_HT(Sentence sent)
|
304
|
+
{
|
305
|
+
memset(sent->and_data.hash_table, 0, HT_SIZE*sizeof(Label_node *));
|
306
|
+
}
|
307
|
+
|
308
|
+
static void free_HT(Sentence sent)
|
309
|
+
{
|
310
|
+
int i;
|
311
|
+
Label_node * la, * la1;
|
312
|
+
for (i=0; i<HT_SIZE; i++) {
|
313
|
+
for (la=sent->and_data.hash_table[i]; la != NULL; la = la1) {
|
314
|
+
la1 = la->next;
|
315
|
+
xfree((char *)la, sizeof(Label_node));
|
316
|
+
}
|
317
|
+
sent->and_data.hash_table[i] = NULL;
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
321
|
+
static void free_LT(Sentence sent)
|
322
|
+
{
|
323
|
+
int i;
|
324
|
+
for (i=0; i<sent->and_data.LT_size; i++) {
|
325
|
+
free_disjuncts(sent->and_data.label_table[i]);
|
326
|
+
}
|
327
|
+
xfree((char *) sent->and_data.label_table,
|
328
|
+
sent->and_data.LT_bound * sizeof(Disjunct*));
|
329
|
+
sent->and_data.LT_bound = 0;
|
330
|
+
sent->and_data.LT_size = 0;
|
331
|
+
sent->and_data.label_table = NULL;
|
332
|
+
}
|
333
|
+
|
334
|
+
void free_AND_tables(Sentence sent)
|
335
|
+
{
|
336
|
+
free_LT(sent);
|
337
|
+
free_HT(sent);
|
338
|
+
}
|
339
|
+
|
340
|
+
void initialize_conjunction_tables(Sentence sent)
|
341
|
+
{
|
342
|
+
int i;
|
343
|
+
sent->and_data.LT_bound = 0;
|
344
|
+
sent->and_data.LT_size = 0;
|
345
|
+
sent->and_data.label_table = NULL;
|
346
|
+
for (i=0; i<HT_SIZE; i++) {
|
347
|
+
sent->and_data.hash_table[i] = NULL;
|
348
|
+
}
|
349
|
+
}
|
350
|
+
|
351
|
+
/**
|
352
|
+
* This is a hash function for disjuncts
|
353
|
+
*/
|
354
|
+
static inline int and_hash_disjunct(Disjunct *d)
|
355
|
+
{
|
356
|
+
unsigned int i;
|
357
|
+
Connector *e;
|
358
|
+
i = 0;
|
359
|
+
for (e = d->left ; e != NULL; e = e->next) {
|
360
|
+
i += connector_hash(e);
|
361
|
+
}
|
362
|
+
i += (i<<5);
|
363
|
+
for (e = d->right ; e != NULL; e = e->next) {
|
364
|
+
i += connector_hash(e);
|
365
|
+
}
|
366
|
+
return (i & (HT_SIZE-1));
|
367
|
+
}
|
368
|
+
|
369
|
+
/**
|
370
|
+
* Returns TRUE if the disjunct is appropriate to be made into fat links.
|
371
|
+
* Check here that the connectors are from some small set.
|
372
|
+
* This will disallow, for example "the and their dog ran".
|
373
|
+
*/
|
374
|
+
static int is_appropriate(Sentence sent, Disjunct * d)
|
375
|
+
{
|
376
|
+
Connector * c;
|
377
|
+
|
378
|
+
if (sent->dict->andable_connector_set == NULL) return TRUE;
|
379
|
+
/* if no set, then everything is considered andable */
|
380
|
+
for (c = d->right; c!=NULL; c=c->next) {
|
381
|
+
if (!match_in_connector_set(sent, sent->dict->andable_connector_set, c, '+')) return FALSE;
|
382
|
+
}
|
383
|
+
for (c = d->left; c!=NULL; c=c->next) {
|
384
|
+
if (!match_in_connector_set(sent, sent->dict->andable_connector_set, c, '-')) return FALSE;
|
385
|
+
}
|
386
|
+
return TRUE;
|
387
|
+
}
|
388
|
+
|
389
|
+
/**
|
390
|
+
* Two connectors are said to be of the same type if they have
|
391
|
+
* the same label, and the initial upper case letters of their
|
392
|
+
* strings match.
|
393
|
+
*/
|
394
|
+
static int connector_types_equal(Connector * c1, Connector * c2)
|
395
|
+
{
|
396
|
+
if (c1->label != c2->label) return FALSE;
|
397
|
+
return utf8_upper_match(c1->string, c2->string);
|
398
|
+
}
|
399
|
+
|
400
|
+
/**
|
401
|
+
* Two disjuncts are said to be the same type if they're the same
|
402
|
+
* ignoring the multi fields, the priority fields, and the subscripts
|
403
|
+
* of the connectors (and the string field of the disjunct of course).
|
404
|
+
* Disjuncts of the same type are located in the same label_table list.
|
405
|
+
*
|
406
|
+
* This returns TRUE if they are of the same type.
|
407
|
+
*/
|
408
|
+
static int disjunct_types_equal(Disjunct * d1, Disjunct * d2)
|
409
|
+
{
|
410
|
+
Connector *e1, *e2;
|
411
|
+
|
412
|
+
e1 = d1->left;
|
413
|
+
e2 = d2->left;
|
414
|
+
while((e1!=NULL) && (e2!=NULL)) {
|
415
|
+
if (!connector_types_equal(e1,e2)) break;
|
416
|
+
e1 = e1->next;
|
417
|
+
e2 = e2->next;
|
418
|
+
}
|
419
|
+
if ((e1!=NULL) || (e2!=NULL)) return FALSE;
|
420
|
+
e1 = d1->right;
|
421
|
+
e2 = d2->right;
|
422
|
+
while((e1!=NULL) && (e2!=NULL)) {
|
423
|
+
if (!connector_types_equal(e1,e2)) break;
|
424
|
+
e1 = e1->next;
|
425
|
+
e2 = e2->next;
|
426
|
+
}
|
427
|
+
if ((e1!=NULL) || (e2!=NULL)) return FALSE;
|
428
|
+
return TRUE;
|
429
|
+
}
|
430
|
+
|
431
|
+
/**
|
432
|
+
* This returns a string that is the the GCD of the two given strings.
|
433
|
+
* If the GCD is equal to one of them, a pointer to it is returned.
|
434
|
+
* Otherwise a new string for the GCD is xalloced and put on the
|
435
|
+
* "free later" list.
|
436
|
+
*/
|
437
|
+
const char * intersect_strings(Sentence sent, const char * s, const char * t)
|
438
|
+
{
|
439
|
+
int len, i, j, d;
|
440
|
+
const char *w, *s0;
|
441
|
+
char u0[MAX_TOKEN_LENGTH]; /* Links are *always* less than 10 chars long */
|
442
|
+
char *u;
|
443
|
+
if (strcmp(s,t)==0) return s; /* would work without this */
|
444
|
+
i = strlen(s);
|
445
|
+
j = strlen(t);
|
446
|
+
if (j > i) {
|
447
|
+
w = s; s = t; t = w;
|
448
|
+
len = j;
|
449
|
+
} else {
|
450
|
+
len = i;
|
451
|
+
}
|
452
|
+
/* s is now the longer (at least not the shorter) string */
|
453
|
+
/* and len is its length */
|
454
|
+
u = u0;
|
455
|
+
d = 0;
|
456
|
+
s0 = s;
|
457
|
+
while (*t != '\0') {
|
458
|
+
if ((*s == *t) || (*t == '*')) {
|
459
|
+
*u = *s;
|
460
|
+
} else {
|
461
|
+
d++;
|
462
|
+
if (*s == '*') *u = *t;
|
463
|
+
else *u = '^';
|
464
|
+
}
|
465
|
+
s++; t++; u++;
|
466
|
+
}
|
467
|
+
if (d==0) {
|
468
|
+
return s0;
|
469
|
+
} else {
|
470
|
+
strcpy(u, s); /* get the remainder of s */
|
471
|
+
return string_set_add(u0, sent->string_set);
|
472
|
+
}
|
473
|
+
}
|
474
|
+
|
475
|
+
/**
|
476
|
+
* Two connectors are said to be equal if they are of the same type
|
477
|
+
* (defined above), they have the same multi field, and they have
|
478
|
+
* exactly the same connectors (including lower case chars).
|
479
|
+
* (priorities ignored).
|
480
|
+
*/
|
481
|
+
static int connectors_equal_AND(Connector *c1, Connector *c2)
|
482
|
+
{
|
483
|
+
return (c1->label == c2->label) &&
|
484
|
+
(c1->multi == c2->multi) &&
|
485
|
+
(strcmp(c1->string, c2->string) == 0);
|
486
|
+
}
|
487
|
+
|
488
|
+
/**
|
489
|
+
* Return true if the disjuncts are equal (ignoring priority fields)
|
490
|
+
* and the string of the disjunct.
|
491
|
+
*/
|
492
|
+
static int disjuncts_equal_AND(Sentence sent, Disjunct * d1, Disjunct * d2)
|
493
|
+
{
|
494
|
+
Connector *e1, *e2;
|
495
|
+
sent->and_data.STAT_calls_to_equality_test++;
|
496
|
+
e1 = d1->left;
|
497
|
+
e2 = d2->left;
|
498
|
+
while((e1!=NULL) && (e2!=NULL)) {
|
499
|
+
if (!connectors_equal_AND(e1, e2)) break;
|
500
|
+
e1 = e1->next;
|
501
|
+
e2 = e2->next;
|
502
|
+
}
|
503
|
+
if ((e1!=NULL) || (e2!=NULL)) return FALSE;
|
504
|
+
e1 = d1->right;
|
505
|
+
e2 = d2->right;
|
506
|
+
while((e1!=NULL) && (e2!=NULL)) {
|
507
|
+
if (!connectors_equal_AND(e1, e2)) break;
|
508
|
+
e1 = e1->next;
|
509
|
+
e2 = e2->next;
|
510
|
+
}
|
511
|
+
if ((e1!=NULL) || (e2!=NULL)) return FALSE;
|
512
|
+
return TRUE;
|
513
|
+
}
|
514
|
+
|
515
|
+
/**
|
516
|
+
* Create a new disjunct that is the GCD of d1 and d2.
|
517
|
+
* It assumes that the disjuncts are of the same type, so the
|
518
|
+
* GCD will not be empty.
|
519
|
+
*/
|
520
|
+
static Disjunct * intersect_disjuncts(Sentence sent, Disjunct * d1, Disjunct * d2)
|
521
|
+
{
|
522
|
+
Disjunct * d;
|
523
|
+
Connector *c1, *c2, *c;
|
524
|
+
d = copy_disjunct(d1);
|
525
|
+
c = d->left;
|
526
|
+
c1 = d1->left;
|
527
|
+
c2 = d2->left;
|
528
|
+
while (c1!=NULL) {
|
529
|
+
connector_set_string (c, intersect_strings(sent, c1->string, c2->string));
|
530
|
+
c->multi = (c1->multi) && (c2->multi);
|
531
|
+
c = c->next; c1 = c1->next; c2 = c2->next;
|
532
|
+
}
|
533
|
+
c = d->right;
|
534
|
+
c1 = d1->right;
|
535
|
+
c2 = d2->right;
|
536
|
+
while (c1!=NULL) {
|
537
|
+
connector_set_string (c, intersect_strings(sent, c1->string, c2->string));
|
538
|
+
c->multi = (c1->multi) && (c2->multi);
|
539
|
+
c = c->next; c1 = c1->next; c2 = c2->next;
|
540
|
+
}
|
541
|
+
return d;
|
542
|
+
}
|
543
|
+
|
544
|
+
/**
|
545
|
+
* (1) look for the given disjunct in the table structures
|
546
|
+
* if it's already in the table structures, do nothing
|
547
|
+
* (2) otherwise make a copy of it, and put it into the table structures
|
548
|
+
* (3) also put all of the GCDs of this disjunct with all of the
|
549
|
+
* other matching disjuncts into the table.
|
550
|
+
*
|
551
|
+
* The costs are set to zero.
|
552
|
+
* Note that this has no effect on disjunct d.
|
553
|
+
*/
|
554
|
+
static void put_disjunct_into_table(Sentence sent, Disjunct *d)
|
555
|
+
{
|
556
|
+
Disjunct *d1=NULL, *d2, *di, *d_copy;
|
557
|
+
Label_node * lp;
|
558
|
+
int h, k;
|
559
|
+
|
560
|
+
h = and_hash_disjunct(d);
|
561
|
+
|
562
|
+
for (lp = sent->and_data.hash_table[h]; lp != NULL; lp = lp->next)
|
563
|
+
{
|
564
|
+
d1 = sent->and_data.label_table[lp->label];
|
565
|
+
if (disjunct_types_equal(d,d1)) break;
|
566
|
+
}
|
567
|
+
if (lp != NULL)
|
568
|
+
{
|
569
|
+
/* there is already a label for disjuncts of this type */
|
570
|
+
/* d1 points to the list of disjuncts of this type already there */
|
571
|
+
while(d1 != NULL)
|
572
|
+
{
|
573
|
+
if (disjuncts_equal_AND(sent, d1, d)) return;
|
574
|
+
d1 = d1->next;
|
575
|
+
}
|
576
|
+
/* now we must put the d disjunct in there, and all of the GCDs of
|
577
|
+
it with the ones already there.
|
578
|
+
|
579
|
+
This is done as follows. We scan through the list of disjuncts
|
580
|
+
computing the gcd of the new one with each of the others, putting
|
581
|
+
the resulting disjuncts onto another list rooted at d2.
|
582
|
+
Now insert d into the the list already there. Now for each
|
583
|
+
one on the d2 list, put it in if it isn't already there.
|
584
|
+
|
585
|
+
Here we're making use of the following theorem: Given a
|
586
|
+
collection of sets s1, s2 ... sn closed under intersection,
|
587
|
+
to if we add a new set s to the collection and also add
|
588
|
+
all the intersections between s and s1...sn to the collection,
|
589
|
+
then the collection is still closed under intersection.
|
590
|
+
|
591
|
+
Use a Venn diagram to prove this theorem.
|
592
|
+
|
593
|
+
*/
|
594
|
+
d_copy = copy_disjunct(d);
|
595
|
+
d_copy->cost = 0;
|
596
|
+
k = lp->label;
|
597
|
+
d2 = NULL;
|
598
|
+
for (d1=sent->and_data.label_table[k]; d1!=NULL; d1 = d1->next) {
|
599
|
+
di = intersect_disjuncts(sent, d_copy, d1);
|
600
|
+
di->next = d2;
|
601
|
+
d2 = di;
|
602
|
+
}
|
603
|
+
d_copy->next = sent->and_data.label_table[k];
|
604
|
+
sent->and_data.label_table[k] = d_copy;
|
605
|
+
for (;d2 != NULL; d2 = di) {
|
606
|
+
di = d2->next;
|
607
|
+
for (d1 = sent->and_data.label_table[k]; d1 != NULL; d1 = d1->next) {
|
608
|
+
if (disjuncts_equal_AND(sent, d1, d2)) break;
|
609
|
+
}
|
610
|
+
if (d1 == NULL) {
|
611
|
+
sent->and_data.STAT_N_disjuncts++;
|
612
|
+
d2->next = sent->and_data.label_table[k];
|
613
|
+
sent->and_data.label_table[k] = d2;
|
614
|
+
} else {
|
615
|
+
d2->next = NULL;
|
616
|
+
free_disjuncts(d2);
|
617
|
+
}
|
618
|
+
}
|
619
|
+
} else {
|
620
|
+
/* create a new label for disjuncts of this type */
|
621
|
+
d_copy = copy_disjunct(d);
|
622
|
+
d_copy->cost = 0;
|
623
|
+
d_copy->next = NULL;
|
624
|
+
if (sent->and_data.LT_size == sent->and_data.LT_bound) grow_LT(sent);
|
625
|
+
lp = (Label_node *) xalloc(sizeof(Label_node));
|
626
|
+
lp->next = sent->and_data.hash_table[h];
|
627
|
+
sent->and_data.hash_table[h] = lp;
|
628
|
+
lp->label = sent->and_data.LT_size;
|
629
|
+
sent->and_data.label_table[sent->and_data.LT_size] = d_copy;
|
630
|
+
sent->and_data.LT_size++;
|
631
|
+
sent->and_data.STAT_N_disjuncts++;
|
632
|
+
}
|
633
|
+
}
|
634
|
+
|
635
|
+
/**
|
636
|
+
* A sub disjuct of d is any disjunct obtained by killing the tail
|
637
|
+
* of either connector list at any point.
|
638
|
+
* Here we go through each sub-disjunct of d, and put it into our
|
639
|
+
* table data structure.
|
640
|
+
*
|
641
|
+
* The function has no side effects on d.
|
642
|
+
*/
|
643
|
+
static void extract_all_fat_links(Sentence sent, Disjunct * d)
|
644
|
+
{
|
645
|
+
Connector * cl, * cr, *tl, *tr;
|
646
|
+
tl = d->left;
|
647
|
+
d->left = NULL;
|
648
|
+
for (cr = d->right; cr!=NULL; cr = cr->next) {
|
649
|
+
tr = cr->next;
|
650
|
+
cr->next = NULL;
|
651
|
+
if (is_appropriate(sent, d)) put_disjunct_into_table(sent, d);
|
652
|
+
cr->next = tr;
|
653
|
+
}
|
654
|
+
d->left = tl;
|
655
|
+
|
656
|
+
tr = d->right;
|
657
|
+
d->right = NULL;
|
658
|
+
for (cl = d->left; cl!=NULL; cl = cl->next) {
|
659
|
+
tl = cl->next;
|
660
|
+
cl->next = NULL;
|
661
|
+
if (is_appropriate(sent, d)) put_disjunct_into_table(sent, d);
|
662
|
+
cl->next = tl;
|
663
|
+
}
|
664
|
+
d->right = tr;
|
665
|
+
|
666
|
+
for (cl = d->left; cl!=NULL; cl = cl->next) {
|
667
|
+
for (cr = d->right; cr!=NULL; cr = cr->next) {
|
668
|
+
tl = cl->next;
|
669
|
+
tr = cr->next;
|
670
|
+
cl->next = cr->next = NULL;
|
671
|
+
|
672
|
+
if (is_appropriate(sent, d)) put_disjunct_into_table(sent, d);
|
673
|
+
|
674
|
+
cl->next = tl;
|
675
|
+
cr->next = tr;
|
676
|
+
}
|
677
|
+
}
|
678
|
+
}
|
679
|
+
|
680
|
+
/**
|
681
|
+
* put the next len characters from c->string (skipping upper
|
682
|
+
* case ones) into s. If there are fewer than this, pad with '*'s.
|
683
|
+
* Then put in a character for the multi match bit of c.
|
684
|
+
* Then put in a '\0', and return a pointer to this place.
|
685
|
+
*/
|
686
|
+
static char * stick_in_one_connector(char *s, Connector *c, int len)
|
687
|
+
{
|
688
|
+
const char * t;
|
689
|
+
|
690
|
+
t = skip_utf8_upper(c->string);
|
691
|
+
|
692
|
+
while (*t != '\0') {
|
693
|
+
*s++ = *t++;
|
694
|
+
len--;
|
695
|
+
}
|
696
|
+
while (len > 0) {
|
697
|
+
*s++ = '*';
|
698
|
+
len--;
|
699
|
+
}
|
700
|
+
if (c->multi) *s++ = '*'; else *s++ = '^'; /* check this sometime */
|
701
|
+
*s = '\0';
|
702
|
+
return s;
|
703
|
+
}
|
704
|
+
|
705
|
+
/**
|
706
|
+
* This takes a label k, modifies the list of disjuncts with that
|
707
|
+
* label. For each such disjunct, it computes the string that
|
708
|
+
* will be used in the fat connector that represents it.
|
709
|
+
*
|
710
|
+
* The only hard part is finding the length of each of the strings
|
711
|
+
* so that "*" can be put in. A better explanation will have to wait.
|
712
|
+
*/
|
713
|
+
static void compute_matchers_for_a_label(Sentence sent, int k)
|
714
|
+
{
|
715
|
+
char buff[2*MAX_WORD];
|
716
|
+
int lengths[MAX_LINKS];
|
717
|
+
int N_connectors, i, j;
|
718
|
+
Connector * c;
|
719
|
+
Disjunct * d;
|
720
|
+
const char *cs;
|
721
|
+
char *s;
|
722
|
+
|
723
|
+
d = sent->and_data.label_table[k];
|
724
|
+
|
725
|
+
N_connectors = 0;
|
726
|
+
for (c=d->left; c != NULL; c = c->next) N_connectors ++;
|
727
|
+
for (c=d->right; c != NULL; c = c->next) N_connectors ++;
|
728
|
+
|
729
|
+
for (i=0; i<N_connectors; i++) lengths[i] = 0;
|
730
|
+
while(d != NULL) {
|
731
|
+
i = 0;
|
732
|
+
for (c=d->left; c != NULL; c = c->next) {
|
733
|
+
cs = skip_utf8_upper(c->string);
|
734
|
+
j = strlen(cs);
|
735
|
+
if (j > lengths[i]) lengths[i] = j;
|
736
|
+
i++;
|
737
|
+
}
|
738
|
+
for (c=d->right; c != NULL; c = c->next) {
|
739
|
+
cs = c->string;
|
740
|
+
cs = skip_utf8_upper(cs);
|
741
|
+
j = strlen(cs);
|
742
|
+
if (j > lengths[i]) lengths[i] = j;
|
743
|
+
i++;
|
744
|
+
}
|
745
|
+
d = d->next;
|
746
|
+
}
|
747
|
+
|
748
|
+
for (d = sent->and_data.label_table[k]; d!= NULL; d = d->next)
|
749
|
+
{
|
750
|
+
i=0;
|
751
|
+
s = buff;
|
752
|
+
for (c=d->left; c != NULL; c = c->next) {
|
753
|
+
s = stick_in_one_connector(s, c, lengths[i]);
|
754
|
+
i++;
|
755
|
+
}
|
756
|
+
for (c=d->right; c != NULL; c = c->next) {
|
757
|
+
s = stick_in_one_connector(s, c, lengths[i]);
|
758
|
+
i++;
|
759
|
+
}
|
760
|
+
d->string = string_set_add(buff, sent->string_set);
|
761
|
+
}
|
762
|
+
}
|
763
|
+
|
764
|
+
/**
|
765
|
+
* Goes through the entire sentence and builds the fat link tables
|
766
|
+
* for all the disjuncts of all the words.
|
767
|
+
*/
|
768
|
+
void build_conjunction_tables(Sentence sent)
|
769
|
+
{
|
770
|
+
int w;
|
771
|
+
int k;
|
772
|
+
Disjunct * d;
|
773
|
+
|
774
|
+
init_HT(sent);
|
775
|
+
init_LT(sent);
|
776
|
+
sent->and_data.STAT_N_disjuncts = 0;
|
777
|
+
sent->and_data.STAT_calls_to_equality_test = 0;
|
778
|
+
|
779
|
+
for (w=0; w<sent->length; w++) {
|
780
|
+
for (d=sent->word[w].d; d!=NULL; d=d->next) {
|
781
|
+
extract_all_fat_links(sent, d);
|
782
|
+
}
|
783
|
+
}
|
784
|
+
|
785
|
+
for (k=0; k<sent->and_data.LT_size; k++) {
|
786
|
+
compute_matchers_for_a_label(sent, k);
|
787
|
+
}
|
788
|
+
}
|
789
|
+
|
790
|
+
void print_AND_statistics(Sentence sent)
|
791
|
+
{
|
792
|
+
printf("Number of disjunct types (labels): %d\n", sent->and_data.LT_size);
|
793
|
+
printf("Number of disjuncts in the table: %d\n", sent->and_data.STAT_N_disjuncts);
|
794
|
+
if (sent->and_data.LT_size != 0) {
|
795
|
+
printf("average list length: %f\n",
|
796
|
+
(float)sent->and_data.STAT_N_disjuncts/sent->and_data.LT_size);
|
797
|
+
}
|
798
|
+
printf("Number of equality tests: %d\n", sent->and_data.STAT_calls_to_equality_test);
|
799
|
+
}
|
800
|
+
|
801
|
+
/**
|
802
|
+
* Fill in the fields of c for the disjunct. This must be in
|
803
|
+
* the table data structures. The label field and the string field
|
804
|
+
* are filled in appropriately. Priority is set to UP_priority.
|
805
|
+
*/
|
806
|
+
static void connector_for_disjunct(Sentence sent, Disjunct * d, Connector * c)
|
807
|
+
{
|
808
|
+
int h;
|
809
|
+
Disjunct * d1 = NULL;
|
810
|
+
Label_node * lp;
|
811
|
+
|
812
|
+
h = and_hash_disjunct(d);
|
813
|
+
|
814
|
+
for (lp = sent->and_data.hash_table[h]; lp != NULL; lp = lp->next) {
|
815
|
+
d1 = sent->and_data.label_table[lp->label];
|
816
|
+
if (disjunct_types_equal(d,d1)) break;
|
817
|
+
}
|
818
|
+
assert(lp != NULL, "A disjunct I inserted was not there. (1)");
|
819
|
+
|
820
|
+
while(d1 != NULL) {
|
821
|
+
if (disjuncts_equal_AND(sent, d1, d)) break;
|
822
|
+
d1 = d1->next;
|
823
|
+
}
|
824
|
+
|
825
|
+
assert(d1 != NULL, "A disjunct I inserted was not there. (2)");
|
826
|
+
|
827
|
+
c->label = lp->label;
|
828
|
+
connector_set_string(c, d1->string);
|
829
|
+
c->priority = UP_priority;
|
830
|
+
c->multi = FALSE;
|
831
|
+
}
|
832
|
+
|
833
|
+
|
834
|
+
/**
|
835
|
+
* This function allocates and returns a list of disjuncts.
|
836
|
+
* This is the one obtained by substituting each contiguous
|
837
|
+
* non-empty subrange of d (incident on the center) by an appropriate
|
838
|
+
* fat link, in two possible positions. Does not effect d.
|
839
|
+
* The cost of d is inherited by all of the disjuncts in the result.
|
840
|
+
*/
|
841
|
+
static Disjunct * build_fat_link_substitutions(Sentence sent, Disjunct *d)
|
842
|
+
{
|
843
|
+
Connector * cl, * cr, *tl, *tr, *wc, work_connector;
|
844
|
+
Disjunct *d1, *wd, work_disjunct, *d_list;
|
845
|
+
if (d==NULL) return NULL;
|
846
|
+
wd = &work_disjunct;
|
847
|
+
wc = init_connector(&work_connector);
|
848
|
+
d_list = NULL;
|
849
|
+
*wd = *d;
|
850
|
+
tl = d->left;
|
851
|
+
d->left = NULL;
|
852
|
+
for (cr = d->right; cr!=NULL; cr = cr->next) {
|
853
|
+
tr = cr->next;
|
854
|
+
cr->next = NULL;
|
855
|
+
if (is_appropriate(sent, d)) {
|
856
|
+
connector_for_disjunct(sent, d, wc);
|
857
|
+
wd->left = tl;
|
858
|
+
wd->right = wc;
|
859
|
+
wc->next = tr;
|
860
|
+
d1 = copy_disjunct(wd);
|
861
|
+
d1->next = d_list;
|
862
|
+
d_list = d1;
|
863
|
+
wd->left = wc;
|
864
|
+
wc->next = tl;
|
865
|
+
wd->right = tr;
|
866
|
+
d1 = copy_disjunct(wd);
|
867
|
+
d1->next = d_list;
|
868
|
+
d_list = d1;
|
869
|
+
}
|
870
|
+
cr->next = tr;
|
871
|
+
}
|
872
|
+
d->left = tl;
|
873
|
+
|
874
|
+
tr = d->right;
|
875
|
+
d->right = NULL;
|
876
|
+
for (cl = d->left; cl!=NULL; cl = cl->next) {
|
877
|
+
tl = cl->next;
|
878
|
+
cl->next = NULL;
|
879
|
+
if (is_appropriate(sent, d)) {
|
880
|
+
connector_for_disjunct(sent, d, wc);
|
881
|
+
wd->left = tl;
|
882
|
+
wd->right = wc;
|
883
|
+
wc->next = tr;
|
884
|
+
d1 = copy_disjunct(wd);
|
885
|
+
d1->next = d_list;
|
886
|
+
d_list = d1;
|
887
|
+
wd->left = wc;
|
888
|
+
wc->next = tl;
|
889
|
+
wd->right = tr;
|
890
|
+
d1 = copy_disjunct(wd);
|
891
|
+
d1->next = d_list;
|
892
|
+
d_list = d1;
|
893
|
+
}
|
894
|
+
cl->next = tl;
|
895
|
+
}
|
896
|
+
d->right = tr;
|
897
|
+
|
898
|
+
for (cl = d->left; cl!=NULL; cl = cl->next) {
|
899
|
+
for (cr = d->right; cr!=NULL; cr = cr->next) {
|
900
|
+
tl = cl->next;
|
901
|
+
tr = cr->next;
|
902
|
+
cl->next = cr->next = NULL;
|
903
|
+
if (is_appropriate(sent, d)) {
|
904
|
+
connector_for_disjunct(sent, d, wc);
|
905
|
+
wd->left = tl;
|
906
|
+
wd->right = wc;
|
907
|
+
wc->next = tr;
|
908
|
+
d1 = copy_disjunct(wd);
|
909
|
+
d1->next = d_list;
|
910
|
+
d_list = d1;
|
911
|
+
wd->left = wc;
|
912
|
+
wc->next = tl;
|
913
|
+
wd->right = tr;
|
914
|
+
d1 = copy_disjunct(wd);
|
915
|
+
d1->next = d_list;
|
916
|
+
d_list = d1;
|
917
|
+
}
|
918
|
+
cl->next = tl;
|
919
|
+
cr->next = tr;
|
920
|
+
}
|
921
|
+
}
|
922
|
+
return d_list;
|
923
|
+
}
|
924
|
+
|
925
|
+
/**
|
926
|
+
* This is basically a "map" function for build_fat_link_substitutions.
|
927
|
+
* It's applied to the disjuncts for all regular words of the sentence.
|
928
|
+
*/
|
929
|
+
Disjunct * explode_disjunct_list(Sentence sent, Disjunct *d)
|
930
|
+
{
|
931
|
+
Disjunct *d1;
|
932
|
+
|
933
|
+
d1 = NULL;
|
934
|
+
|
935
|
+
for (; d!=NULL; d = d->next) {
|
936
|
+
d1 = catenate_disjuncts(d1, build_fat_link_substitutions(sent, d));
|
937
|
+
}
|
938
|
+
return d1;
|
939
|
+
}
|
940
|
+
|
941
|
+
/**
|
942
|
+
* Builds and returns a disjunct list for the comma. These are the
|
943
|
+
* disjuncts that are used when "," operates in conjunction with "and".
|
944
|
+
* Does not deal with the ", and" issue, nor the other uses
|
945
|
+
* of comma.
|
946
|
+
*/
|
947
|
+
Disjunct * build_COMMA_disjunct_list(Sentence sent)
|
948
|
+
{
|
949
|
+
int lab;
|
950
|
+
Disjunct *d1, *d2, *d, work_disjunct, *wd;
|
951
|
+
Connector work_connector1, work_connector2, *c1, *c2;
|
952
|
+
Connector work_connector3, *c3;
|
953
|
+
c1 = init_connector(&work_connector1);
|
954
|
+
c2 = init_connector(&work_connector2);
|
955
|
+
c3 = init_connector(&work_connector3);
|
956
|
+
wd = &work_disjunct;
|
957
|
+
|
958
|
+
d1 = NULL; /* where we put the list we're building */
|
959
|
+
|
960
|
+
c1->next = NULL;
|
961
|
+
c2->next = c3;
|
962
|
+
c3->next = NULL;
|
963
|
+
c1->priority = c3->priority = DOWN_priority;
|
964
|
+
c2->priority = UP_priority;
|
965
|
+
c1->multi = c2->multi = c3->multi = FALSE;
|
966
|
+
wd->left = c1;
|
967
|
+
wd->right = c2;
|
968
|
+
wd->string = ","; /* *** fix this later?? */
|
969
|
+
wd->next = NULL;
|
970
|
+
wd->cost = 0;
|
971
|
+
for (lab = 0; lab < sent->and_data.LT_size; lab++) {
|
972
|
+
for (d = sent->and_data.label_table[lab]; d!=NULL; d=d->next) {
|
973
|
+
c1->string = c2->string = c3->string = d->string;
|
974
|
+
c1->label = c2->label = c3->label = lab;
|
975
|
+
d2 = copy_disjunct(wd);
|
976
|
+
d2->next = d1;
|
977
|
+
d1 = d2;
|
978
|
+
}
|
979
|
+
}
|
980
|
+
return d1;
|
981
|
+
}
|
982
|
+
|
983
|
+
/**
|
984
|
+
* Builds and returns a disjunct list for "and", "or" and "nor"
|
985
|
+
* for each disjunct in the label_table, we build three disjuncts
|
986
|
+
* this means that "Danny and Tycho and Billy" will be parsable in
|
987
|
+
* two ways. I don't know an easy way to avoid this
|
988
|
+
* the string is either "and", or "or", or "nor" at the moment.
|
989
|
+
*/
|
990
|
+
Disjunct * build_AND_disjunct_list(Sentence sent, char * s)
|
991
|
+
{
|
992
|
+
int lab;
|
993
|
+
Disjunct *d_list, *d1, *d3, *d, *d_copy;
|
994
|
+
Connector *c1, *c2, *c3;
|
995
|
+
|
996
|
+
d_list = NULL; /* where we put the list we're building */
|
997
|
+
|
998
|
+
for (lab = 0; lab < sent->and_data.LT_size; lab++) {
|
999
|
+
for (d = sent->and_data.label_table[lab]; d!=NULL; d=d->next) {
|
1000
|
+
d1 = build_fat_link_substitutions(sent, d);
|
1001
|
+
d_copy = copy_disjunct(d); /* also include the thing itself! */
|
1002
|
+
d_copy->next = d1;
|
1003
|
+
d1 = d_copy;
|
1004
|
+
for(;d1 != NULL; d1 = d3) {
|
1005
|
+
d3 = d1->next;
|
1006
|
+
|
1007
|
+
c1 = connector_new();
|
1008
|
+
c2 = connector_new();
|
1009
|
+
c1->priority = c2->priority = DOWN_priority;
|
1010
|
+
connector_set_string(c1, d->string);
|
1011
|
+
connector_set_string(c2, d->string);
|
1012
|
+
c1->label = c2->label = lab;
|
1013
|
+
|
1014
|
+
d1->string = s;
|
1015
|
+
|
1016
|
+
if (d1->right == NULL) {
|
1017
|
+
d1->right = c2;
|
1018
|
+
} else {
|
1019
|
+
for (c3=d1->right; c3->next != NULL; c3 = c3->next)
|
1020
|
+
;
|
1021
|
+
c3->next = c2;
|
1022
|
+
}
|
1023
|
+
if (d1->left == NULL) {
|
1024
|
+
d1->left = c1;
|
1025
|
+
} else {
|
1026
|
+
for (c3=d1->left; c3->next != NULL; c3 = c3->next)
|
1027
|
+
;
|
1028
|
+
c3->next = c1;
|
1029
|
+
}
|
1030
|
+
d1->next = d_list;
|
1031
|
+
d_list = d1;
|
1032
|
+
}
|
1033
|
+
}
|
1034
|
+
}
|
1035
|
+
#if defined(PLURALIZATION)
|
1036
|
+
/* here is where "and" makes singular into plural. */
|
1037
|
+
/* must accommodate "he and I are good", "Davy and I are good"
|
1038
|
+
"Danny and Davy are good", and reject all of these with "is"
|
1039
|
+
instead of "are".
|
1040
|
+
|
1041
|
+
The SI connectors must also be modified to accommodate "are John
|
1042
|
+
and Dave here", but kill "is John and Dave here"
|
1043
|
+
*/
|
1044
|
+
if (strcmp(s, "and") == 0)
|
1045
|
+
{
|
1046
|
+
for (d1 = d_list; d1 != NULL; d1 = d1->next)
|
1047
|
+
{
|
1048
|
+
for (c1 = d1->right; c1 != NULL; c1 = c1->next)
|
1049
|
+
{
|
1050
|
+
if ((c1->string[0] == 'S') &&
|
1051
|
+
((c1->string[1] == '^') ||
|
1052
|
+
(c1->string[1] == 's') ||
|
1053
|
+
(c1->string[1] == 'p') ||
|
1054
|
+
(c1->string[1] == '\0')))
|
1055
|
+
{
|
1056
|
+
connector_set_string(c1, "Sp");
|
1057
|
+
}
|
1058
|
+
}
|
1059
|
+
for (c1 = d1->left; c1 != NULL; c1 = c1->next)
|
1060
|
+
{
|
1061
|
+
if ((c1->string[0] == 'S') && (c1->string[1] == 'I') &&
|
1062
|
+
((c1->string[2] == '^') ||
|
1063
|
+
(c1->string[2] == 's') ||
|
1064
|
+
(c1->string[2] == 'p') ||
|
1065
|
+
(c1->string[2] == '\0')))
|
1066
|
+
{
|
1067
|
+
connector_set_string(c1, "SIp");
|
1068
|
+
}
|
1069
|
+
}
|
1070
|
+
}
|
1071
|
+
}
|
1072
|
+
/*
|
1073
|
+
"a cat or a dog is here" vs "a cat or a dog are here"
|
1074
|
+
The first seems right, the second seems wrong. I'll stick with this.
|
1075
|
+
|
1076
|
+
That is, "or" has the property that if both parts are the same in
|
1077
|
+
number, we use that but if they differ, we use plural.
|
1078
|
+
|
1079
|
+
The connectors on "I" must be handled specially. We accept
|
1080
|
+
"I or the dogs are here" but reject "I or the dogs is here"
|
1081
|
+
*/
|
1082
|
+
|
1083
|
+
/* the code here still does now work "right", rejecting "is John or I invited"
|
1084
|
+
and accepting "I or my friend know what happened"
|
1085
|
+
|
1086
|
+
The more generous code for "nor" has been used instead
|
1087
|
+
*/
|
1088
|
+
/*
|
1089
|
+
else if (strcmp(s, "or") == 0) {
|
1090
|
+
for (d1 = d_list; d1!=NULL; d1=d1->next) {
|
1091
|
+
for (c1=d1->right; c1!=NULL; c1=c1->next) {
|
1092
|
+
if (c1->string[0] == 'S') {
|
1093
|
+
if (c1->string[1]=='^') {
|
1094
|
+
if (c1->string[2]=='a') {
|
1095
|
+
connector_set_string(c1, "Ss");
|
1096
|
+
} else {
|
1097
|
+
connector_set_string(c1, "Sp");
|
1098
|
+
}
|
1099
|
+
} else if ((c1->string[1]=='p') && (c1->string[2]=='a')){
|
1100
|
+
connector_set_string(c1, "Sp");
|
1101
|
+
}
|
1102
|
+
}
|
1103
|
+
}
|
1104
|
+
for (c1=d1->left; c1!=NULL; c1=c1->next) {
|
1105
|
+
if ((c1->string[0] == 'S') && (c1->string[1] == 'I')) {
|
1106
|
+
if (c1->string[2]=='^') {
|
1107
|
+
if (c1->string[3]=='a') {
|
1108
|
+
connector_set_string(c1, "Ss");
|
1109
|
+
} else {
|
1110
|
+
connector_set_string(c1, "Sp");
|
1111
|
+
}
|
1112
|
+
} else if ((c1->string[2]=='p') && (c1->string[3]=='a')){
|
1113
|
+
connector_set_string(c1, "Sp");
|
1114
|
+
}
|
1115
|
+
}
|
1116
|
+
}
|
1117
|
+
}
|
1118
|
+
}
|
1119
|
+
*/
|
1120
|
+
/*
|
1121
|
+
It appears that the "nor" of two things can be either singular or
|
1122
|
+
plural. "neither she nor John likes dogs"
|
1123
|
+
"neither she nor John like dogs"
|
1124
|
+
|
1125
|
+
*/
|
1126
|
+
else if ((strcmp(s,"nor")==0) || (strcmp(s,"or")==0)) {
|
1127
|
+
for (d1 = d_list; d1!=NULL; d1=d1->next) {
|
1128
|
+
for (c1=d1->right; c1!=NULL; c1=c1->next) {
|
1129
|
+
if ((c1->string[0] == 'S') &&
|
1130
|
+
((c1->string[1]=='^') ||
|
1131
|
+
(c1->string[1]=='s') ||
|
1132
|
+
(c1->string[1]=='p'))) {
|
1133
|
+
connector_set_string(c1, "S");
|
1134
|
+
}
|
1135
|
+
}
|
1136
|
+
for (c1=d1->left; c1!=NULL; c1=c1->next) {
|
1137
|
+
if ((c1->string[0] == 'S') && (c1->string[1] == 'I') &&
|
1138
|
+
((c1->string[2]=='^') ||
|
1139
|
+
(c1->string[2]=='s') ||
|
1140
|
+
(c1->string[2]=='p'))) {
|
1141
|
+
connector_set_string(c1, "SI");
|
1142
|
+
}
|
1143
|
+
}
|
1144
|
+
}
|
1145
|
+
}
|
1146
|
+
|
1147
|
+
#endif
|
1148
|
+
return d_list;
|
1149
|
+
}
|
1150
|
+
|
1151
|
+
|
1152
|
+
/* The following routines' purpose is to eliminate all but the
|
1153
|
+
canonical linkage (of a collection of linkages that are identical
|
1154
|
+
except for fat links). An example of the problem is
|
1155
|
+
"I went to a talk and ate lunch". Without the canonical checker
|
1156
|
+
this has two linkages with identical structure.
|
1157
|
+
|
1158
|
+
We restrict our attention to a collection of linkages that are all
|
1159
|
+
isomorphic. Consider the set of all disjuncts that are used on one
|
1160
|
+
word (over the collection of linkages). This set is closed under GCD,
|
1161
|
+
since two linkages could both be used in that position, then so could
|
1162
|
+
their GCD. The GCD has been constructed and put in the label table.
|
1163
|
+
|
1164
|
+
The canonical linkage is the one in which the minimal disjunct that
|
1165
|
+
ever occurrs in a position is used in that position. It is easy to
|
1166
|
+
prove that a disjunct is not canonical -- just find one of it's fat
|
1167
|
+
disjuncts that can be replaced by a smaller one. If this can not be
|
1168
|
+
done, then the linkage is canonical.
|
1169
|
+
|
1170
|
+
The algorithm uses link_array[] and chosen_disjuncts[] as input to
|
1171
|
+
describe the linkage, and also uses the label_table.
|
1172
|
+
|
1173
|
+
(1) find all the words with fat disjuncts
|
1174
|
+
(2) scan all links and build, for each fat disjucnt used,
|
1175
|
+
an "image" structure that contains what this disjunct must
|
1176
|
+
connect to in the rest of the linkage.
|
1177
|
+
(3) For each fat disjunct, run through the label_table for disjuncts
|
1178
|
+
with the same label, considering only those with strictly more
|
1179
|
+
restricted match sets (this uses the string fields of the disjuncts
|
1180
|
+
from the table).
|
1181
|
+
(4) For each that passes this test, we see if it can replace the chosen
|
1182
|
+
disjunct. This is performed by examining how this disjunct
|
1183
|
+
compares with the image structure for this word.
|
1184
|
+
*/
|
1185
|
+
|
1186
|
+
struct Image_node_struct {
|
1187
|
+
Image_node * next;
|
1188
|
+
Connector * c; /* the connector the place on the disjunct must match */
|
1189
|
+
int place; /* Indicates the place in the fat disjunct where this
|
1190
|
+
connector must connect. If 0 then this is a fat
|
1191
|
+
connector. If >0 then go place to the right, if
|
1192
|
+
<0 then go -place to the left. */
|
1193
|
+
};
|
1194
|
+
|
1195
|
+
/**
|
1196
|
+
* Fill in the has_fat_down array. Uses link_array[].
|
1197
|
+
* Returns TRUE if there exists at least one word with a
|
1198
|
+
* fat down label.
|
1199
|
+
*/
|
1200
|
+
int set_has_fat_down(Sentence sent)
|
1201
|
+
{
|
1202
|
+
int link, w, N_fat;
|
1203
|
+
Parse_info pi = sent->parse_info;
|
1204
|
+
|
1205
|
+
N_fat = 0;
|
1206
|
+
|
1207
|
+
for (w = 0; w < pi->N_words; w++)
|
1208
|
+
{
|
1209
|
+
pi->has_fat_down[w] = FALSE;
|
1210
|
+
}
|
1211
|
+
|
1212
|
+
for (link = 0; link < pi->N_links; link++)
|
1213
|
+
{
|
1214
|
+
if (pi->link_array[link].lc->priority == DOWN_priority)
|
1215
|
+
{
|
1216
|
+
N_fat ++;
|
1217
|
+
pi->has_fat_down[pi->link_array[link].l] = TRUE;
|
1218
|
+
}
|
1219
|
+
else if (pi->link_array[link].rc->priority == DOWN_priority)
|
1220
|
+
{
|
1221
|
+
N_fat ++;
|
1222
|
+
pi->has_fat_down[pi->link_array[link].r] = TRUE;
|
1223
|
+
}
|
1224
|
+
}
|
1225
|
+
return (N_fat > 0);
|
1226
|
+
}
|
1227
|
+
|
1228
|
+
static void free_image_array(Parse_info pi)
|
1229
|
+
{
|
1230
|
+
int w;
|
1231
|
+
Image_node * in, * inx;
|
1232
|
+
for (w = 0; w < pi->N_words; w++)
|
1233
|
+
{
|
1234
|
+
for (in = pi->image_array[w]; in != NULL; in = inx)
|
1235
|
+
{
|
1236
|
+
inx = in->next;
|
1237
|
+
xfree((char *)in, sizeof(Image_node));
|
1238
|
+
}
|
1239
|
+
pi->image_array[w] = NULL;
|
1240
|
+
}
|
1241
|
+
}
|
1242
|
+
|
1243
|
+
/**
|
1244
|
+
* Uses link_array, chosen_disjuncts, and down_label to construct
|
1245
|
+
* image_array
|
1246
|
+
*/
|
1247
|
+
static void build_image_array(Sentence sent)
|
1248
|
+
{
|
1249
|
+
int link, end, word;
|
1250
|
+
Connector * this_end_con, *other_end_con, * upcon, * updiscon, *clist;
|
1251
|
+
Disjunct * dis, * updis;
|
1252
|
+
Image_node * in;
|
1253
|
+
Parse_info pi = sent->parse_info;
|
1254
|
+
|
1255
|
+
for (word=0; word<pi->N_words; word++)
|
1256
|
+
{
|
1257
|
+
pi->image_array[word] = NULL;
|
1258
|
+
}
|
1259
|
+
|
1260
|
+
for (end = -1; end <= 1; end += 2)
|
1261
|
+
{
|
1262
|
+
for (link = 0; link < pi->N_links; link++)
|
1263
|
+
{
|
1264
|
+
if (end < 0)
|
1265
|
+
{
|
1266
|
+
word = pi->link_array[link].l;
|
1267
|
+
if (!pi->has_fat_down[word]) continue;
|
1268
|
+
this_end_con = pi->link_array[link].lc;
|
1269
|
+
other_end_con = pi->link_array[link].rc;
|
1270
|
+
dis = pi->chosen_disjuncts[word];
|
1271
|
+
clist = dis->right;
|
1272
|
+
}
|
1273
|
+
else
|
1274
|
+
{
|
1275
|
+
word = pi->link_array[link].r;
|
1276
|
+
if (!pi->has_fat_down[word]) continue;
|
1277
|
+
this_end_con = pi->link_array[link].rc;
|
1278
|
+
other_end_con = pi->link_array[link].lc;
|
1279
|
+
dis = pi->chosen_disjuncts[word];
|
1280
|
+
clist = dis->left;
|
1281
|
+
}
|
1282
|
+
|
1283
|
+
if (this_end_con->priority == DOWN_priority) continue;
|
1284
|
+
if ((this_end_con->label != NORMAL_LABEL) &&
|
1285
|
+
(this_end_con->label < 0)) continue;
|
1286
|
+
/* no need to construct an image node for down links,
|
1287
|
+
or commas links or either/neither links */
|
1288
|
+
|
1289
|
+
in = (Image_node *) xalloc(sizeof(Image_node));
|
1290
|
+
in->next = pi->image_array[word];
|
1291
|
+
pi->image_array[word] = in;
|
1292
|
+
in->c = other_end_con;
|
1293
|
+
|
1294
|
+
/* the rest of this code is for computing in->place */
|
1295
|
+
if (this_end_con->priority == UP_priority)
|
1296
|
+
{
|
1297
|
+
in->place = 0;
|
1298
|
+
}
|
1299
|
+
else
|
1300
|
+
{
|
1301
|
+
in->place = 1;
|
1302
|
+
if ((dis->left != NULL) &&
|
1303
|
+
(dis->left->priority == UP_priority))
|
1304
|
+
{
|
1305
|
+
upcon = dis->left;
|
1306
|
+
}
|
1307
|
+
else if ((dis->right != NULL) &&
|
1308
|
+
(dis->right->priority == UP_priority))
|
1309
|
+
{
|
1310
|
+
upcon = dis->right;
|
1311
|
+
}
|
1312
|
+
else
|
1313
|
+
{
|
1314
|
+
upcon = NULL;
|
1315
|
+
}
|
1316
|
+
if (upcon != NULL)
|
1317
|
+
{
|
1318
|
+
/* add on extra for a fat up link */
|
1319
|
+
updis = sent->and_data.label_table[upcon->label];
|
1320
|
+
if (end > 0)
|
1321
|
+
{
|
1322
|
+
updiscon = updis->left;
|
1323
|
+
}
|
1324
|
+
else
|
1325
|
+
{
|
1326
|
+
updiscon = updis->right;
|
1327
|
+
}
|
1328
|
+
for (;updiscon != NULL; updiscon = updiscon->next)
|
1329
|
+
{
|
1330
|
+
in->place ++;
|
1331
|
+
}
|
1332
|
+
}
|
1333
|
+
for (; clist != this_end_con; clist = clist->next)
|
1334
|
+
{
|
1335
|
+
if (clist->label < 0) in->place++;
|
1336
|
+
}
|
1337
|
+
in->place = in->place * (-end);
|
1338
|
+
}
|
1339
|
+
}
|
1340
|
+
}
|
1341
|
+
}
|
1342
|
+
|
1343
|
+
/**
|
1344
|
+
* returns TRUE if string s represents a strictly smaller match set
|
1345
|
+
* than does t
|
1346
|
+
*/
|
1347
|
+
static int strictly_smaller(const char * s, const char * t)
|
1348
|
+
{
|
1349
|
+
int strictness;
|
1350
|
+
strictness = 0;
|
1351
|
+
for (;(*s!='\0') && (*t!='\0'); s++,t++) {
|
1352
|
+
if (*s == *t) continue;
|
1353
|
+
if ((*t == '*') || (*s == '^')) {
|
1354
|
+
strictness++;
|
1355
|
+
} else {
|
1356
|
+
return FALSE;
|
1357
|
+
}
|
1358
|
+
}
|
1359
|
+
assert(! ((*s!='\0') || (*t!='\0')), "s and t should be the same length!");
|
1360
|
+
return (strictness > 0);
|
1361
|
+
}
|
1362
|
+
|
1363
|
+
/**
|
1364
|
+
* dis points to a disjunct in the label_table. label is the label
|
1365
|
+
* of a different set of disjuncts. These can be derived from the label
|
1366
|
+
* of dis. Find the specific disjunct of in label_table[label]
|
1367
|
+
* which corresponds to dis.
|
1368
|
+
*/
|
1369
|
+
static Disjunct * find_subdisjunct(Sentence sent, Disjunct * dis, int label)
|
1370
|
+
{
|
1371
|
+
Disjunct * d;
|
1372
|
+
Connector * cx, *cy;
|
1373
|
+
for (d=sent->and_data.label_table[label]; d!=NULL; d=d->next)
|
1374
|
+
{
|
1375
|
+
for (cx=d->left, cy=dis->left; cx!=NULL; cx=cx->next,cy=cy->next)
|
1376
|
+
{
|
1377
|
+
/* if ((cx->string != cy->string) || */
|
1378
|
+
if ((strcmp(connector_get_string(cx),
|
1379
|
+
connector_get_string(cy)) != 0) ||
|
1380
|
+
(cx->multi != cy->multi)) break;/* have to check multi? */
|
1381
|
+
}
|
1382
|
+
if (cx!=NULL) continue;
|
1383
|
+
for (cx=d->right, cy=dis->right; cx!=NULL; cx=cx->next,cy=cy->next)
|
1384
|
+
{
|
1385
|
+
/* if ((cx->string != cy->string) || */
|
1386
|
+
if ((strcmp(connector_get_string(cx),
|
1387
|
+
connector_get_string(cy)) != 0) ||
|
1388
|
+
(cx->multi != cy->multi)) break;
|
1389
|
+
}
|
1390
|
+
if (cx==NULL) break;
|
1391
|
+
}
|
1392
|
+
assert(d!=NULL, "Never found subdisjunct");
|
1393
|
+
return d;
|
1394
|
+
}
|
1395
|
+
|
1396
|
+
/**
|
1397
|
+
* is_canonical_linkage --
|
1398
|
+
* This uses link_array[], chosen_disjuncts[], has_fat_down[].
|
1399
|
+
* It assumes that there is a fat link in the current linkage.
|
1400
|
+
* See the comments above for more information about how it works
|
1401
|
+
*/
|
1402
|
+
int is_canonical_linkage(Sentence sent)
|
1403
|
+
{
|
1404
|
+
int w, d_label=0, place;
|
1405
|
+
Connector *d_c, *c, dummy_connector, *upcon;
|
1406
|
+
Disjunct *dis, *chosen_d;
|
1407
|
+
Image_node * in;
|
1408
|
+
Parse_info pi = sent->parse_info;
|
1409
|
+
|
1410
|
+
init_connector(&dummy_connector);
|
1411
|
+
dummy_connector.priority = UP_priority;
|
1412
|
+
|
1413
|
+
build_image_array(sent);
|
1414
|
+
|
1415
|
+
for (w=0; w<pi->N_words; w++)
|
1416
|
+
{
|
1417
|
+
if (!pi->has_fat_down[w]) continue;
|
1418
|
+
chosen_d = pi->chosen_disjuncts[w];
|
1419
|
+
|
1420
|
+
/* there must be a down connector in both the left and right list */
|
1421
|
+
for (d_c = chosen_d->left; d_c!=NULL; d_c=d_c->next)
|
1422
|
+
{
|
1423
|
+
if (d_c->priority == DOWN_priority)
|
1424
|
+
{
|
1425
|
+
d_label = d_c->label;
|
1426
|
+
break;
|
1427
|
+
}
|
1428
|
+
}
|
1429
|
+
assert(d_c != NULL, "Should have found the down link.");
|
1430
|
+
|
1431
|
+
if ((chosen_d->left != NULL) &&
|
1432
|
+
(chosen_d->left->priority == UP_priority)) {
|
1433
|
+
upcon = chosen_d->left;
|
1434
|
+
} else if ((chosen_d->right != NULL) &&
|
1435
|
+
(chosen_d->right->priority == UP_priority)) {
|
1436
|
+
upcon = chosen_d->right;
|
1437
|
+
} else {
|
1438
|
+
upcon = NULL;
|
1439
|
+
}
|
1440
|
+
|
1441
|
+
/* check that the disjunct on w is minimal (canonical) */
|
1442
|
+
|
1443
|
+
for (dis=sent->and_data.label_table[d_label]; dis!=NULL; dis=dis->next)
|
1444
|
+
{
|
1445
|
+
/* now, reject a disjunct if it's not strictly below the old */
|
1446
|
+
if(!strictly_smaller(dis->string,
|
1447
|
+
connector_get_string(d_c))) continue;
|
1448
|
+
|
1449
|
+
/* Now, it has to match the image connectors */
|
1450
|
+
for (in = pi->image_array[w]; in != NULL; in = in->next)
|
1451
|
+
{
|
1452
|
+
place = in->place;
|
1453
|
+
if (place == 0)
|
1454
|
+
{
|
1455
|
+
assert(upcon != NULL, "Should have found an up link");
|
1456
|
+
dummy_connector.label = upcon->label;
|
1457
|
+
|
1458
|
+
/* now we have to compute the string of the
|
1459
|
+
disjunct with upcon->label that corresponds
|
1460
|
+
to dis */
|
1461
|
+
if (upcon->label == d_label)
|
1462
|
+
{
|
1463
|
+
connector_set_string(&dummy_connector, dis->string);
|
1464
|
+
} else {
|
1465
|
+
connector_set_string(&dummy_connector,
|
1466
|
+
find_subdisjunct(sent, dis, upcon->label)->string);
|
1467
|
+
}
|
1468
|
+
|
1469
|
+
/* I hope using x_match here is right */
|
1470
|
+
if (!x_match(sent, &dummy_connector, in->c)) break;
|
1471
|
+
} else if (place > 0) {
|
1472
|
+
for (c=dis->right; place > 1; place--) {
|
1473
|
+
c = c->next;
|
1474
|
+
}
|
1475
|
+
if (!x_match(sent, c, in->c)) break; /* Ditto above comment --DS 07/97*/
|
1476
|
+
} else {
|
1477
|
+
for (c=dis->left; place < -1; place++) {
|
1478
|
+
c = c->next;
|
1479
|
+
}
|
1480
|
+
if (!x_match(sent, c, in->c)) break; /* Ditto Ditto */
|
1481
|
+
}
|
1482
|
+
}
|
1483
|
+
|
1484
|
+
if (in == NULL) break;
|
1485
|
+
}
|
1486
|
+
if (dis != NULL) break;
|
1487
|
+
/* there is a better disjunct that the one we're using, so this
|
1488
|
+
word is bad, so we're done */
|
1489
|
+
}
|
1490
|
+
free_image_array(pi);
|
1491
|
+
return (w == pi->N_words);
|
1492
|
+
}
|
1493
|
+
|
1494
|
+
/**
|
1495
|
+
* This takes as input link_array[], sublinkage->link[]->l and
|
1496
|
+
* sublinkage->link[]->r (and also has_fat_down[word], which has been
|
1497
|
+
* computed in a prior call to is_canonical()), and from these
|
1498
|
+
* computes sublinkage->link[].lc and .rc. We assume these have
|
1499
|
+
* been initialized with the values from link_array. We also assume
|
1500
|
+
* that there are fat links.
|
1501
|
+
*/
|
1502
|
+
void compute_pp_link_array_connectors(Sentence sent, Sublinkage *sublinkage)
|
1503
|
+
{
|
1504
|
+
int link, end, word, place;
|
1505
|
+
Connector * this_end_con, * upcon, * updiscon, *clist, *con, *mycon;
|
1506
|
+
Disjunct * dis, * updis, *mydis;
|
1507
|
+
Parse_info pi = sent->parse_info;
|
1508
|
+
|
1509
|
+
for (end = -1; end <= 1; end += 2)
|
1510
|
+
{
|
1511
|
+
for (link=0; link<pi->N_links; link++)
|
1512
|
+
{
|
1513
|
+
if (sublinkage->link[link]->l == -1) continue;
|
1514
|
+
if (end < 0)
|
1515
|
+
{
|
1516
|
+
word = pi->link_array[link].l;
|
1517
|
+
if (!pi->has_fat_down[word]) continue;
|
1518
|
+
this_end_con = pi->link_array[link].lc;
|
1519
|
+
dis = pi->chosen_disjuncts[word];
|
1520
|
+
mydis = pi->chosen_disjuncts[sublinkage->link[link]->l];
|
1521
|
+
clist = dis->right;
|
1522
|
+
}
|
1523
|
+
else
|
1524
|
+
{
|
1525
|
+
word = pi->link_array[link].r;
|
1526
|
+
if (!pi->has_fat_down[word]) continue;
|
1527
|
+
this_end_con = pi->link_array[link].rc;
|
1528
|
+
dis = pi->chosen_disjuncts[word];
|
1529
|
+
mydis = pi->chosen_disjuncts[sublinkage->link[link]->r];
|
1530
|
+
clist = dis->left;
|
1531
|
+
}
|
1532
|
+
|
1533
|
+
if (this_end_con->label != NORMAL_LABEL) continue;
|
1534
|
+
/* no need to construct a connector for up links,
|
1535
|
+
or commas links or either/neither links */
|
1536
|
+
|
1537
|
+
/* Now compute the place */
|
1538
|
+
place = 0;
|
1539
|
+
if ((dis->left != NULL) &&
|
1540
|
+
(dis->left->priority == UP_priority)) {
|
1541
|
+
upcon = dis->left;
|
1542
|
+
} else if ((dis->right != NULL) &&
|
1543
|
+
(dis->right->priority == UP_priority)) {
|
1544
|
+
upcon = dis->right;
|
1545
|
+
} else {
|
1546
|
+
upcon = NULL;
|
1547
|
+
}
|
1548
|
+
if (upcon != NULL) { /* add on extra for a fat up link */
|
1549
|
+
updis = sent->and_data.label_table[upcon->label];
|
1550
|
+
if (end > 0) {
|
1551
|
+
updiscon = updis->left;
|
1552
|
+
} else {
|
1553
|
+
updiscon = updis->right;
|
1554
|
+
}
|
1555
|
+
for (;updiscon != NULL; updiscon = updiscon->next) {
|
1556
|
+
place ++;
|
1557
|
+
}
|
1558
|
+
}
|
1559
|
+
for (; clist != this_end_con; clist = clist->next) {
|
1560
|
+
if (clist->label < 0) place++;
|
1561
|
+
}
|
1562
|
+
/* place has just been computed */
|
1563
|
+
|
1564
|
+
/* now find the right disjunct in the table */
|
1565
|
+
if ((mydis->left != NULL) &&
|
1566
|
+
(mydis->left->priority == UP_priority)) {
|
1567
|
+
mycon = mydis->left;
|
1568
|
+
} else if ((mydis->right != NULL) &&
|
1569
|
+
(mydis->right->priority == UP_priority)) {
|
1570
|
+
mycon = mydis->right;
|
1571
|
+
} else {
|
1572
|
+
printf("word = %d\n", word);
|
1573
|
+
printf("fat link: [%d, %d]\n",
|
1574
|
+
pi->link_array[link].l, pi->link_array[link].r);
|
1575
|
+
printf("thin link: [%d, %d]\n",
|
1576
|
+
sublinkage->link[link]->l, sublinkage->link[link]->r);
|
1577
|
+
assert(FALSE, "There should be a fat UP link here");
|
1578
|
+
}
|
1579
|
+
|
1580
|
+
for (dis=sent->and_data.label_table[mycon->label];
|
1581
|
+
dis != NULL; dis=dis->next) {
|
1582
|
+
if (dis->string == connector_get_string(mycon)) break;
|
1583
|
+
}
|
1584
|
+
assert(dis!=NULL, "Should have found this connector string");
|
1585
|
+
/* the disjunct in the table has just been found */
|
1586
|
+
|
1587
|
+
if (end < 0)
|
1588
|
+
{
|
1589
|
+
for (con = dis->right; place > 0; place--, con=con->next) {}
|
1590
|
+
/* sublinkage->link[link]->lc = con; OLD CODE */
|
1591
|
+
exfree_connectors(sublinkage->link[link]->lc);
|
1592
|
+
sublinkage->link[link]->lc = excopy_connectors(con);
|
1593
|
+
}
|
1594
|
+
else
|
1595
|
+
{
|
1596
|
+
for (con = dis->left; place > 0; place--, con=con->next) {}
|
1597
|
+
/* sublinkage->link[link]->rc = con; OLD CODE */
|
1598
|
+
exfree_connectors(sublinkage->link[link]->rc);
|
1599
|
+
sublinkage->link[link]->rc = excopy_connectors(con);
|
1600
|
+
}
|
1601
|
+
}
|
1602
|
+
}
|
1603
|
+
}
|