grammar_cop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,1317 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
|
15
|
+
#include <stdarg.h>
|
16
|
+
#include "api.h"
|
17
|
+
|
18
|
+
/**
|
19
|
+
* The functions in this file do several things: () take a linkage
|
20
|
+
* involving fat links and expand it into a sequence of linkages
|
21
|
+
* (involving a subset of the given words), one for each way of
|
22
|
+
* eliminating the conjunctions. () determine if a linkage involving
|
23
|
+
* fat links has a structural violation. () make sure each of the expanded
|
24
|
+
* linkages has a consistent post-processing behavior. () compute the
|
25
|
+
* cost of the linkage.
|
26
|
+
*/
|
27
|
+
|
28
|
+
typedef struct patch_element_struct Patch_element;
|
29
|
+
struct patch_element_struct
|
30
|
+
{
|
31
|
+
char used; /* TRUE if this link is used, else FALSE */
|
32
|
+
char changed;/* TRUE if this link changed, else FALSE */
|
33
|
+
int newl; /* the new value of the left end */
|
34
|
+
int newr; /* the new value of the right end */
|
35
|
+
};
|
36
|
+
|
37
|
+
struct analyze_context_s
|
38
|
+
{
|
39
|
+
List_o_links *word_links[MAX_SENTENCE]; /* ptr to l.o.l. out of word */
|
40
|
+
int structure_violation;
|
41
|
+
|
42
|
+
int dfs_root_word[MAX_SENTENCE]; /* for the depth-first search */
|
43
|
+
int dfs_height[MAX_SENTENCE]; /* to determine the order to do the root word dfs */
|
44
|
+
int height_perm[MAX_SENTENCE]; /* permute the vertices from highest to lowest */
|
45
|
+
|
46
|
+
/* The following are all for computing the cost of and lists */
|
47
|
+
int visited[MAX_SENTENCE];
|
48
|
+
int and_element_sizes[MAX_SENTENCE];
|
49
|
+
int and_element[MAX_SENTENCE];
|
50
|
+
int N_and_elements;
|
51
|
+
int outside_word[MAX_SENTENCE];
|
52
|
+
int N_outside_words;
|
53
|
+
Patch_element patch_array[MAX_LINKS];
|
54
|
+
};
|
55
|
+
|
56
|
+
typedef struct CON_node_struct CON_node;
|
57
|
+
typedef struct CON_list_struct CON_list;
|
58
|
+
typedef struct DIS_list_struct DIS_list;
|
59
|
+
typedef struct Links_to_patch_struct Links_to_patch;
|
60
|
+
|
61
|
+
struct DIS_node_struct
|
62
|
+
{
|
63
|
+
CON_list * cl; /* the list of children */
|
64
|
+
List_o_links * lol;/* the links that comprise this region of the graph */
|
65
|
+
int word; /* the word defining this node */
|
66
|
+
};
|
67
|
+
|
68
|
+
struct CON_node_struct
|
69
|
+
{
|
70
|
+
DIS_list * dl; /* the list of children */
|
71
|
+
DIS_list * current;/* defines the current child */
|
72
|
+
int word; /* the word defining this node */
|
73
|
+
};
|
74
|
+
|
75
|
+
struct DIS_list_struct
|
76
|
+
{
|
77
|
+
DIS_list * next;
|
78
|
+
DIS_node * dn;
|
79
|
+
};
|
80
|
+
|
81
|
+
struct CON_list_struct
|
82
|
+
{
|
83
|
+
CON_list * next;
|
84
|
+
CON_node * cn;
|
85
|
+
};
|
86
|
+
|
87
|
+
struct Links_to_patch_struct
|
88
|
+
{
|
89
|
+
Links_to_patch * next;
|
90
|
+
int link;
|
91
|
+
char dir; /* this is 'r' or 'l' depending on which end of the link
|
92
|
+
is to be patched. */
|
93
|
+
};
|
94
|
+
|
95
|
+
void zero_sublinkage(Sublinkage *s)
|
96
|
+
{
|
97
|
+
int i;
|
98
|
+
s->pp_info = NULL;
|
99
|
+
s->violation = NULL;
|
100
|
+
for (i=0; i<s->num_links; i++) s->link[i] = NULL;
|
101
|
+
|
102
|
+
memset(&s->pp_data, 0, sizeof(PP_data));
|
103
|
+
}
|
104
|
+
|
105
|
+
static Sublinkage * x_create_sublinkage(Parse_info pi)
|
106
|
+
{
|
107
|
+
Sublinkage *s = (Sublinkage *) xalloc (sizeof(Sublinkage));
|
108
|
+
s->link = (Link **) xalloc(MAX_LINKS*sizeof(Link *));
|
109
|
+
s->num_links = MAX_LINKS;
|
110
|
+
|
111
|
+
zero_sublinkage(s);
|
112
|
+
|
113
|
+
s->num_links = pi->N_links;
|
114
|
+
assert(pi->N_links < MAX_LINKS, "Too many links");
|
115
|
+
return s;
|
116
|
+
}
|
117
|
+
|
118
|
+
static Sublinkage * ex_create_sublinkage(Parse_info pi)
|
119
|
+
{
|
120
|
+
Sublinkage *s = (Sublinkage *) exalloc (sizeof(Sublinkage));
|
121
|
+
s->link = (Link **) exalloc(pi->N_links*sizeof(Link *));
|
122
|
+
s->num_links = pi->N_links;
|
123
|
+
|
124
|
+
zero_sublinkage(s);
|
125
|
+
|
126
|
+
assert(pi->N_links < MAX_LINKS, "Too many links");
|
127
|
+
return s;
|
128
|
+
}
|
129
|
+
|
130
|
+
static void free_sublinkage(Sublinkage *s)
|
131
|
+
{
|
132
|
+
int i;
|
133
|
+
for (i=0; i<MAX_LINKS; i++) {
|
134
|
+
if (s->link[i]!=NULL) exfree_link(s->link[i]);
|
135
|
+
}
|
136
|
+
xfree(s->link, MAX_LINKS*sizeof(Link));
|
137
|
+
xfree(s, sizeof(Sublinkage));
|
138
|
+
}
|
139
|
+
|
140
|
+
static void replace_link_name(Link *l, const char *s)
|
141
|
+
{
|
142
|
+
/* XXX can get some perf improvement by avoiding strlen! */
|
143
|
+
char * t;
|
144
|
+
exfree((char *) l->name, sizeof(char)*(strlen(l->name)+1));
|
145
|
+
t = (char *) exalloc(sizeof(char)*(strlen(s)+1));
|
146
|
+
strcpy(t, s);
|
147
|
+
l->name = t;
|
148
|
+
}
|
149
|
+
|
150
|
+
static void copy_full_link(Link **dest, Link *src)
|
151
|
+
{
|
152
|
+
if (*dest != NULL) exfree_link(*dest);
|
153
|
+
*dest = excopy_link(src);
|
154
|
+
}
|
155
|
+
|
156
|
+
/* end new code 9/97 ALB */
|
157
|
+
|
158
|
+
|
159
|
+
/**
|
160
|
+
* Constructs a graph in the wordlinks array based on the contents of
|
161
|
+
* the global link_array. Makes the wordlinks array point to a list of
|
162
|
+
* words neighboring each word (actually a list of links). This is a
|
163
|
+
* directed graph, constructed for dealing with "and". For a link in
|
164
|
+
* which the priorities are UP or DOWN_priority, the edge goes from the
|
165
|
+
* one labeled DOWN to the one labeled UP.
|
166
|
+
* Don't generate links edges for the bogus comma connectors.
|
167
|
+
*/
|
168
|
+
static void build_digraph(analyze_context_t *actx, Parse_info pi)
|
169
|
+
{
|
170
|
+
int i, link, N_fat;
|
171
|
+
Link *lp;
|
172
|
+
List_o_links * lol;
|
173
|
+
N_fat = 0;
|
174
|
+
|
175
|
+
for (i = 0; i < pi->N_words; i++)
|
176
|
+
{
|
177
|
+
actx->word_links[i] = NULL;
|
178
|
+
}
|
179
|
+
|
180
|
+
for (link = 0; link < pi->N_links; link++)
|
181
|
+
{
|
182
|
+
lp = &(pi->link_array[link]);
|
183
|
+
i = lp->lc->label;
|
184
|
+
if (i < NORMAL_LABEL) { /* one of those special links for either-or, etc */
|
185
|
+
continue;
|
186
|
+
}
|
187
|
+
|
188
|
+
lol = (List_o_links *) xalloc(sizeof(List_o_links));
|
189
|
+
lol->next = actx->word_links[lp->l];
|
190
|
+
actx->word_links[lp->l] = lol;
|
191
|
+
lol->link = link;
|
192
|
+
lol->word = lp->r;
|
193
|
+
i = lp->lc->priority;
|
194
|
+
if (i == THIN_priority) {
|
195
|
+
lol->dir = 0;
|
196
|
+
} else if (i == DOWN_priority) {
|
197
|
+
lol->dir = 1;
|
198
|
+
} else {
|
199
|
+
lol->dir = -1;
|
200
|
+
}
|
201
|
+
|
202
|
+
lol = (List_o_links *) xalloc(sizeof(List_o_links));
|
203
|
+
lol->next = actx->word_links[lp->r];
|
204
|
+
actx->word_links[lp->r] = lol;
|
205
|
+
lol->link = link;
|
206
|
+
lol->word = lp->l;
|
207
|
+
i = lp->rc->priority;
|
208
|
+
if (i == THIN_priority) {
|
209
|
+
lol->dir = 0;
|
210
|
+
} else if (i == DOWN_priority) {
|
211
|
+
lol->dir = 1;
|
212
|
+
} else {
|
213
|
+
lol->dir = -1;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
218
|
+
/**
|
219
|
+
* Returns TRUE if there is at least one fat link pointing out of this word.
|
220
|
+
*/
|
221
|
+
static int is_CON_word(int w, List_o_links **wordlinks)
|
222
|
+
{
|
223
|
+
List_o_links * lol;
|
224
|
+
for (lol = wordlinks[w]; lol != NULL; lol = lol->next)
|
225
|
+
{
|
226
|
+
if (lol->dir == 1) return TRUE;
|
227
|
+
}
|
228
|
+
return FALSE;
|
229
|
+
}
|
230
|
+
|
231
|
+
static DIS_node * build_DIS_node(analyze_context_t*, int);
|
232
|
+
|
233
|
+
/**
|
234
|
+
* This word is a CON word (has fat links down). Build the tree for it.
|
235
|
+
*/
|
236
|
+
static CON_node * build_CON_node(analyze_context_t *actx, int w)
|
237
|
+
{
|
238
|
+
List_o_links * lol;
|
239
|
+
CON_node * a;
|
240
|
+
DIS_list * d, *dx;
|
241
|
+
d = NULL;
|
242
|
+
for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
|
243
|
+
{
|
244
|
+
if (lol->dir == 1)
|
245
|
+
{
|
246
|
+
dx = (DIS_list *) xalloc (sizeof (DIS_list));
|
247
|
+
dx->next = d;
|
248
|
+
d = dx;
|
249
|
+
d->dn = build_DIS_node(actx, lol->word);
|
250
|
+
}
|
251
|
+
}
|
252
|
+
a = (CON_node *) xalloc(sizeof (CON_node));
|
253
|
+
a->dl = a->current = d;
|
254
|
+
a->word = w;
|
255
|
+
return a;
|
256
|
+
}
|
257
|
+
|
258
|
+
/**
|
259
|
+
* Does a depth-first-search starting from w. Puts on the front of the
|
260
|
+
* list pointed to by c all of the CON nodes it finds, and returns the
|
261
|
+
* result. Also construct the list of all edges reached as part of this
|
262
|
+
* DIS_node search and append it to the lol list of start_dn.
|
263
|
+
*
|
264
|
+
* Both of the structure violations actually occur, and represent
|
265
|
+
* linkages that have improper structure. Fortunately, they
|
266
|
+
* seem to be rather rare.
|
267
|
+
*/
|
268
|
+
static CON_list * c_dfs(analyze_context_t *actx,
|
269
|
+
int w, DIS_node * start_dn, CON_list * c)
|
270
|
+
{
|
271
|
+
CON_list *cx;
|
272
|
+
List_o_links * lol, *lolx;
|
273
|
+
if (actx->dfs_root_word[w] != -1)
|
274
|
+
{
|
275
|
+
if (actx->dfs_root_word[w] != start_dn->word)
|
276
|
+
{
|
277
|
+
actx->structure_violation = TRUE;
|
278
|
+
}
|
279
|
+
return c;
|
280
|
+
}
|
281
|
+
actx->dfs_root_word[w] = start_dn->word;
|
282
|
+
for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
|
283
|
+
{
|
284
|
+
if (lol->dir < 0) /* a backwards link */
|
285
|
+
{
|
286
|
+
if (actx->dfs_root_word[lol->word] == -1)
|
287
|
+
{
|
288
|
+
actx->structure_violation = TRUE;
|
289
|
+
}
|
290
|
+
}
|
291
|
+
else if (lol->dir == 0)
|
292
|
+
{
|
293
|
+
/* Make a copy of the link */
|
294
|
+
lolx = (List_o_links *) xalloc(sizeof(List_o_links));
|
295
|
+
lolx->word = lol->word;
|
296
|
+
lolx->dir = lol->dir;
|
297
|
+
lolx->link = lol->link;
|
298
|
+
|
299
|
+
/* Chain it into place */
|
300
|
+
lolx->next = start_dn->lol;
|
301
|
+
start_dn->lol = lolx;
|
302
|
+
c = c_dfs(actx, lol->word, start_dn, c);
|
303
|
+
}
|
304
|
+
}
|
305
|
+
|
306
|
+
/* if the current node is CON, put it first */
|
307
|
+
if (is_CON_word(w, actx->word_links))
|
308
|
+
{
|
309
|
+
cx = (CON_list *) xalloc(sizeof(CON_list));
|
310
|
+
cx->next = c;
|
311
|
+
c = cx;
|
312
|
+
c->cn = build_CON_node(actx, w);
|
313
|
+
}
|
314
|
+
return c;
|
315
|
+
}
|
316
|
+
|
317
|
+
/**
|
318
|
+
* This node is connected to its parent via a fat link. Search the
|
319
|
+
* region reachable via thin links, and put all reachable nodes with fat
|
320
|
+
* links out of them in its list of children.
|
321
|
+
*/
|
322
|
+
static DIS_node * build_DIS_node(analyze_context_t *actx,
|
323
|
+
int w)
|
324
|
+
{
|
325
|
+
DIS_node * dn;
|
326
|
+
dn = (DIS_node *) xalloc(sizeof (DIS_node));
|
327
|
+
dn->word = w; /* must do this before dfs so it knows the start word */
|
328
|
+
dn->lol = NULL;
|
329
|
+
dn->cl = c_dfs(actx, w, dn, NULL);
|
330
|
+
return dn;
|
331
|
+
}
|
332
|
+
|
333
|
+
static void height_dfs(analyze_context_t *actx, int w, int height)
|
334
|
+
{
|
335
|
+
List_o_links * lol;
|
336
|
+
if (actx->dfs_height[w] != 0) return;
|
337
|
+
|
338
|
+
actx->dfs_height[w] = height;
|
339
|
+
|
340
|
+
for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
|
341
|
+
{
|
342
|
+
/* The dir is 1 for a down link. */
|
343
|
+
height_dfs(actx, lol->word, height - lol->dir);
|
344
|
+
}
|
345
|
+
}
|
346
|
+
|
347
|
+
/**
|
348
|
+
* Simple insertion sort; should be plenty fast enough, since sentences
|
349
|
+
* are almost always shorter than 30 words or so. In fact, this is
|
350
|
+
* almost surely faster than qsort for such small arrays.
|
351
|
+
*/
|
352
|
+
static void insort (analyze_context_t *actx, int nwords)
|
353
|
+
{
|
354
|
+
int i, j;
|
355
|
+
for (i=1; i<nwords; i++)
|
356
|
+
{
|
357
|
+
int heig = actx->dfs_height[i];
|
358
|
+
int perm = actx->height_perm[i];
|
359
|
+
j = i;
|
360
|
+
while (j>0 && (heig > actx->dfs_height[j-1]))
|
361
|
+
{
|
362
|
+
actx->dfs_height[j] = actx->dfs_height[j-1];
|
363
|
+
actx->height_perm[j] = actx->height_perm[j-1];
|
364
|
+
j--;
|
365
|
+
}
|
366
|
+
actx->dfs_height[j] = heig;
|
367
|
+
actx->height_perm[j] = perm;
|
368
|
+
}
|
369
|
+
}
|
370
|
+
|
371
|
+
static DIS_node * build_DIS_CON_tree(analyze_context_t *actx, Parse_info pi)
|
372
|
+
{
|
373
|
+
int xw, w;
|
374
|
+
DIS_node * dnroot, * dn;
|
375
|
+
CON_list * child, * xchild;
|
376
|
+
List_o_links * lol, * xlol;
|
377
|
+
|
378
|
+
/* The algorithm used here to build the DIS_CON tree depends on
|
379
|
+
* the search percolating down from the "top" of the tree. The
|
380
|
+
* original version of this started its search at the wall. This
|
381
|
+
* was fine because doing a DFS from the wall explores the tree in
|
382
|
+
* the right order.
|
383
|
+
*
|
384
|
+
* However, in order to handle null links correctly, a more careful
|
385
|
+
* ordering process must be used to explore the tree. We use
|
386
|
+
* dfs_height[] for this, and sort in height order.
|
387
|
+
*
|
388
|
+
* XXX Is the sort order correct here? This is not obvious; I think
|
389
|
+
* we want highest to lowest ... XXX is the height being calculated
|
390
|
+
* correctly? Looks weird to me ... XXX
|
391
|
+
*/
|
392
|
+
|
393
|
+
for (w=0; w < pi->N_words; w++) actx->dfs_height[w] = 0;
|
394
|
+
for (w=0; w < pi->N_words; w++) height_dfs(actx, w, MAX_SENTENCE);
|
395
|
+
|
396
|
+
for (w=0; w < pi->N_words; w++) actx->height_perm[w] = w;
|
397
|
+
|
398
|
+
/* Sort the heights, keeping only the permuted order. */
|
399
|
+
insort (actx, pi->N_words);
|
400
|
+
|
401
|
+
for (w=0; w<pi->N_words; w++) actx->dfs_root_word[w] = -1;
|
402
|
+
|
403
|
+
dnroot = NULL;
|
404
|
+
for (xw = 0; xw < pi->N_words; xw++)
|
405
|
+
{
|
406
|
+
w = actx->height_perm[xw];
|
407
|
+
if (actx->dfs_root_word[w] == -1)
|
408
|
+
{
|
409
|
+
dn = build_DIS_node(actx, w);
|
410
|
+
if (dnroot == NULL)
|
411
|
+
{
|
412
|
+
dnroot = dn;
|
413
|
+
}
|
414
|
+
else
|
415
|
+
{
|
416
|
+
for (child = dn->cl; child != NULL; child = xchild)
|
417
|
+
{
|
418
|
+
xchild = child->next;
|
419
|
+
child->next = dnroot->cl;
|
420
|
+
dnroot->cl = child;
|
421
|
+
}
|
422
|
+
for (lol = dn->lol; lol != NULL; lol = xlol)
|
423
|
+
{
|
424
|
+
xlol = lol->next;
|
425
|
+
lol->next = dnroot->lol;
|
426
|
+
dnroot->lol = lol;
|
427
|
+
}
|
428
|
+
xfree((void *) dn, sizeof(DIS_node));
|
429
|
+
}
|
430
|
+
}
|
431
|
+
}
|
432
|
+
return dnroot;
|
433
|
+
}
|
434
|
+
|
435
|
+
static int advance_CON(CON_node *);
|
436
|
+
|
437
|
+
/**
|
438
|
+
* Cyclically advance the current state of this DIS node.
|
439
|
+
* If it's now at the beginning of its cycle, return FALSE;
|
440
|
+
* otherwise return TRUE. Together with the advance_CON()
|
441
|
+
* function, this can be used to iterate over the entire
|
442
|
+
* DIS_CON tree.
|
443
|
+
*/
|
444
|
+
static int advance_DIS(DIS_node * dn)
|
445
|
+
{
|
446
|
+
CON_list * cl;
|
447
|
+
for (cl = dn->cl; cl != NULL; cl = cl->next)
|
448
|
+
{
|
449
|
+
if (advance_CON(cl->cn)) return TRUE;
|
450
|
+
}
|
451
|
+
return FALSE;
|
452
|
+
}
|
453
|
+
|
454
|
+
/**
|
455
|
+
* Cyclically advance the current state of this CON node.
|
456
|
+
* If it's now at the beginning of its cycle return FALSE,
|
457
|
+
* otherwise return TRUE. Together with the advance_CON()
|
458
|
+
* function, this can be used to iterate over the entire
|
459
|
+
* DIS_CON tree.
|
460
|
+
*/
|
461
|
+
static int advance_CON(CON_node * cn)
|
462
|
+
{
|
463
|
+
if (advance_DIS(cn->current->dn))
|
464
|
+
{
|
465
|
+
return TRUE;
|
466
|
+
}
|
467
|
+
else
|
468
|
+
{
|
469
|
+
if (cn->current->next == NULL)
|
470
|
+
{
|
471
|
+
cn->current = cn->dl;
|
472
|
+
return FALSE;
|
473
|
+
}
|
474
|
+
else
|
475
|
+
{
|
476
|
+
cn->current = cn->current->next;
|
477
|
+
return TRUE;
|
478
|
+
}
|
479
|
+
}
|
480
|
+
}
|
481
|
+
|
482
|
+
static void fill_patch_array_CON(analyze_context_t *, CON_node *, Links_to_patch *);
|
483
|
+
|
484
|
+
/**
|
485
|
+
* Patches up appropriate links in the patch_array for this DIS_node
|
486
|
+
* and this patch list.
|
487
|
+
*/
|
488
|
+
static void fill_patch_array_DIS(analyze_context_t *actx,
|
489
|
+
DIS_node * dn, Links_to_patch * ltp)
|
490
|
+
{
|
491
|
+
CON_list * cl;
|
492
|
+
List_o_links * lol;
|
493
|
+
Links_to_patch * ltpx;
|
494
|
+
|
495
|
+
for (lol = dn->lol; lol != NULL; lol = lol->next)
|
496
|
+
{
|
497
|
+
actx->patch_array[lol->link].used = TRUE;
|
498
|
+
}
|
499
|
+
|
500
|
+
if ((dn->cl == NULL) || (dn->cl->cn->word != dn->word))
|
501
|
+
{
|
502
|
+
for (; ltp != NULL; ltp = ltpx)
|
503
|
+
{
|
504
|
+
ltpx = ltp->next;
|
505
|
+
actx->patch_array[ltp->link].changed = TRUE;
|
506
|
+
if (ltp->dir == 'l')
|
507
|
+
{
|
508
|
+
actx->patch_array[ltp->link].newl = dn->word;
|
509
|
+
}
|
510
|
+
else
|
511
|
+
{
|
512
|
+
actx->patch_array[ltp->link].newr = dn->word;
|
513
|
+
}
|
514
|
+
xfree((void *) ltp, sizeof(Links_to_patch));
|
515
|
+
}
|
516
|
+
}
|
517
|
+
|
518
|
+
/* ltp != NULL at this point means that dn has child which is a cn
|
519
|
+
which is the same word */
|
520
|
+
for (cl = dn->cl; cl != NULL; cl = cl->next)
|
521
|
+
{
|
522
|
+
fill_patch_array_CON(actx, cl->cn, ltp);
|
523
|
+
ltp = NULL;
|
524
|
+
}
|
525
|
+
}
|
526
|
+
|
527
|
+
static void fill_patch_array_CON(analyze_context_t *actx,
|
528
|
+
CON_node * cn, Links_to_patch * ltp)
|
529
|
+
{
|
530
|
+
List_o_links * lol;
|
531
|
+
Links_to_patch *ltpx;
|
532
|
+
|
533
|
+
for (lol = actx->word_links[cn->word]; lol != NULL; lol = lol->next)
|
534
|
+
{
|
535
|
+
if (lol->dir == 0)
|
536
|
+
{
|
537
|
+
ltpx = (Links_to_patch *) xalloc(sizeof(Links_to_patch));
|
538
|
+
ltpx->next = ltp;
|
539
|
+
ltp = ltpx;
|
540
|
+
ltp->link = lol->link;
|
541
|
+
if (lol->word > cn->word) {
|
542
|
+
ltp->dir = 'l';
|
543
|
+
} else {
|
544
|
+
ltp->dir = 'r';
|
545
|
+
}
|
546
|
+
}
|
547
|
+
}
|
548
|
+
fill_patch_array_DIS(actx, cn->current->dn, ltp);
|
549
|
+
}
|
550
|
+
|
551
|
+
static void free_digraph(analyze_context_t *actx, Parse_info pi)
|
552
|
+
{
|
553
|
+
List_o_links * lol, *lolx;
|
554
|
+
int i;
|
555
|
+
for (i = 0; i < pi->N_words; i++)
|
556
|
+
{
|
557
|
+
for (lol = actx->word_links[i]; lol != NULL; lol = lolx)
|
558
|
+
{
|
559
|
+
lolx = lol->next;
|
560
|
+
xfree((void *) lol, sizeof(List_o_links));
|
561
|
+
}
|
562
|
+
}
|
563
|
+
}
|
564
|
+
|
565
|
+
static void free_CON_tree(CON_node *);
|
566
|
+
|
567
|
+
void free_DIS_tree(DIS_node * dn)
|
568
|
+
{
|
569
|
+
List_o_links * lol, *lolx;
|
570
|
+
CON_list *cl, *clx;
|
571
|
+
for (lol = dn->lol; lol != NULL; lol = lolx)
|
572
|
+
{
|
573
|
+
lolx = lol->next;
|
574
|
+
xfree((void *) lol, sizeof(List_o_links));
|
575
|
+
}
|
576
|
+
for (cl = dn->cl; cl != NULL; cl = clx)
|
577
|
+
{
|
578
|
+
clx = cl->next;
|
579
|
+
free_CON_tree(cl->cn);
|
580
|
+
xfree((void *) cl, sizeof(CON_list));
|
581
|
+
}
|
582
|
+
xfree((void *) dn, sizeof(DIS_node));
|
583
|
+
}
|
584
|
+
|
585
|
+
static void free_CON_tree(CON_node * cn)
|
586
|
+
{
|
587
|
+
DIS_list *dl, *dlx;
|
588
|
+
for (dl = cn->dl; dl != NULL; dl = dlx)
|
589
|
+
{
|
590
|
+
dlx = dl->next;
|
591
|
+
free_DIS_tree(dl->dn);
|
592
|
+
xfree((void *) dl, sizeof(DIS_list));
|
593
|
+
}
|
594
|
+
xfree((void *) cn, sizeof(CON_node));
|
595
|
+
}
|
596
|
+
|
597
|
+
/** scope out this and element */
|
598
|
+
static void and_dfs_full(analyze_context_t *actx, int w)
|
599
|
+
{
|
600
|
+
List_o_links *lol;
|
601
|
+
if (actx->visited[w]) return;
|
602
|
+
actx->visited[w] = TRUE;
|
603
|
+
actx->and_element_sizes[actx->N_and_elements]++;
|
604
|
+
|
605
|
+
for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
|
606
|
+
{
|
607
|
+
if (lol->dir >= 0)
|
608
|
+
{
|
609
|
+
and_dfs_full(actx, lol->word);
|
610
|
+
}
|
611
|
+
}
|
612
|
+
}
|
613
|
+
|
614
|
+
/** get down the tree past all the commas */
|
615
|
+
static void and_dfs_commas(analyze_context_t *actx, Sentence sent, int w)
|
616
|
+
{
|
617
|
+
List_o_links *lol;
|
618
|
+
if (actx->visited[w]) return;
|
619
|
+
|
620
|
+
actx->visited[w] = TRUE;
|
621
|
+
|
622
|
+
for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
|
623
|
+
{
|
624
|
+
/* we only consider UP or DOWN priority links here */
|
625
|
+
if (lol->dir == 1)
|
626
|
+
{
|
627
|
+
if (strcmp(sent->word[lol->word].string, ",") == 0)
|
628
|
+
{
|
629
|
+
/* pointing to a comma */
|
630
|
+
and_dfs_commas(actx, sent, lol->word);
|
631
|
+
}
|
632
|
+
else
|
633
|
+
{
|
634
|
+
actx->and_element[actx->N_and_elements] = lol->word;
|
635
|
+
and_dfs_full(actx, lol->word);
|
636
|
+
actx->N_and_elements++;
|
637
|
+
}
|
638
|
+
}
|
639
|
+
if (lol->dir == 0)
|
640
|
+
{
|
641
|
+
actx->outside_word[actx->N_outside_words] = lol->word;
|
642
|
+
actx->N_outside_words++;
|
643
|
+
}
|
644
|
+
}
|
645
|
+
}
|
646
|
+
|
647
|
+
/**
|
648
|
+
* This function computes the "and cost", resulting from inequalities
|
649
|
+
* in the length of and-list elements. It also computes other
|
650
|
+
* information used to construct the "andlist" structure of linkage_info.
|
651
|
+
*/
|
652
|
+
static Andlist * build_andlist(analyze_context_t *actx, Sentence sent)
|
653
|
+
{
|
654
|
+
int w, i, min, max, j, cost;
|
655
|
+
char * s;
|
656
|
+
Andlist * new_andlist, * old_andlist;
|
657
|
+
Parse_info pi = sent->parse_info;
|
658
|
+
|
659
|
+
old_andlist = NULL;
|
660
|
+
cost = 0;
|
661
|
+
|
662
|
+
for(w = 0; w<pi->N_words; w++)
|
663
|
+
{
|
664
|
+
s = sent->word[w].string;
|
665
|
+
if (sent->is_conjunction[w])
|
666
|
+
{
|
667
|
+
actx->N_and_elements = 0;
|
668
|
+
actx->N_outside_words = 0;
|
669
|
+
for(i=0; i<pi->N_words; i++)
|
670
|
+
{
|
671
|
+
actx->visited[i] = FALSE;
|
672
|
+
actx->and_element_sizes[i] = 0;
|
673
|
+
}
|
674
|
+
if (sent->dict->left_wall_defined)
|
675
|
+
actx->visited[0] = TRUE;
|
676
|
+
|
677
|
+
and_dfs_commas(actx, sent, w);
|
678
|
+
if (actx->N_and_elements == 0) continue;
|
679
|
+
|
680
|
+
new_andlist = (Andlist *) xalloc(sizeof(Andlist));
|
681
|
+
new_andlist->num_elements = actx->N_and_elements;
|
682
|
+
new_andlist->num_outside_words = actx->N_outside_words;
|
683
|
+
|
684
|
+
for (i=0; i < actx->N_and_elements; i++)
|
685
|
+
{
|
686
|
+
new_andlist->element[i] = actx->and_element[i];
|
687
|
+
}
|
688
|
+
for (i=0; i < actx->N_outside_words; i++)
|
689
|
+
{
|
690
|
+
new_andlist->outside_word[i] = actx->outside_word[i];
|
691
|
+
}
|
692
|
+
new_andlist->conjunction = w;
|
693
|
+
new_andlist->next = old_andlist;
|
694
|
+
old_andlist = new_andlist;
|
695
|
+
|
696
|
+
if (actx->N_and_elements > 0)
|
697
|
+
{
|
698
|
+
min = MAX_SENTENCE;
|
699
|
+
max = 0;
|
700
|
+
for (i=0; i < actx->N_and_elements; i++)
|
701
|
+
{
|
702
|
+
j = actx->and_element_sizes[i];
|
703
|
+
if (j < min) min = j;
|
704
|
+
if (j > max) max = j;
|
705
|
+
}
|
706
|
+
cost += max-min;
|
707
|
+
}
|
708
|
+
}
|
709
|
+
}
|
710
|
+
if (old_andlist) old_andlist->cost = cost;
|
711
|
+
return old_andlist;
|
712
|
+
}
|
713
|
+
|
714
|
+
/**
|
715
|
+
* This function defines the cost of a link as a function of its length.
|
716
|
+
*/
|
717
|
+
static inline int cost_for_length(int length)
|
718
|
+
{
|
719
|
+
return length-1;
|
720
|
+
}
|
721
|
+
|
722
|
+
/**
|
723
|
+
* Computes the cost of the current parse of the current sentence,
|
724
|
+
* due to the length of the links.
|
725
|
+
*/
|
726
|
+
static int link_cost(Parse_info pi)
|
727
|
+
{
|
728
|
+
int lcost, i;
|
729
|
+
lcost = 0;
|
730
|
+
for (i = 0; i < pi->N_links; i++)
|
731
|
+
{
|
732
|
+
lcost += cost_for_length(pi->link_array[i].r - pi->link_array[i].l);
|
733
|
+
}
|
734
|
+
return lcost;
|
735
|
+
}
|
736
|
+
|
737
|
+
static int null_cost(Parse_info pi)
|
738
|
+
{
|
739
|
+
/* computes the number of null links in the linkage */
|
740
|
+
/* No one seems to care about this -- ALB */
|
741
|
+
return 0;
|
742
|
+
}
|
743
|
+
|
744
|
+
static int unused_word_cost(Parse_info pi)
|
745
|
+
{
|
746
|
+
int lcost, i;
|
747
|
+
lcost = 0;
|
748
|
+
for (i = 0; i < pi->N_words; i++)
|
749
|
+
lcost += (pi->chosen_disjuncts[i] == NULL);
|
750
|
+
return lcost;
|
751
|
+
}
|
752
|
+
|
753
|
+
/**
|
754
|
+
* Computes the cost of the current parse of the current sentence
|
755
|
+
* due to the cost of the chosen disjuncts.
|
756
|
+
*/
|
757
|
+
static float disjunct_cost(Parse_info pi)
|
758
|
+
{
|
759
|
+
int i;
|
760
|
+
float lcost;
|
761
|
+
lcost = 0.0;
|
762
|
+
for (i = 0; i < pi->N_words; i++)
|
763
|
+
{
|
764
|
+
if (pi->chosen_disjuncts[i] != NULL)
|
765
|
+
lcost += pi->chosen_disjuncts[i]->cost;
|
766
|
+
}
|
767
|
+
return lcost;
|
768
|
+
}
|
769
|
+
|
770
|
+
/**
|
771
|
+
* Returns TRUE if string s represents a strictly smaller match set
|
772
|
+
* than does t. An almost identical function appears in and.c.
|
773
|
+
* The difference is that here we don't require s and t to be the
|
774
|
+
* same length.
|
775
|
+
*/
|
776
|
+
static int strictly_smaller_name(const char * s, const char * t)
|
777
|
+
{
|
778
|
+
int strictness, ss, tt;
|
779
|
+
strictness = 0;
|
780
|
+
while ((*s!='\0') || (*t!='\0'))
|
781
|
+
{
|
782
|
+
if (*s == '\0') {
|
783
|
+
ss = '*';
|
784
|
+
} else {
|
785
|
+
ss = *s;
|
786
|
+
s++;
|
787
|
+
}
|
788
|
+
if (*t == '\0') {
|
789
|
+
tt = '*';
|
790
|
+
} else {
|
791
|
+
tt = *t;
|
792
|
+
t++;
|
793
|
+
}
|
794
|
+
if (ss == tt) continue;
|
795
|
+
if ((tt == '*') || (ss == '^')) {
|
796
|
+
strictness++;
|
797
|
+
} else {
|
798
|
+
return FALSE;
|
799
|
+
}
|
800
|
+
}
|
801
|
+
return (strictness > 0);
|
802
|
+
}
|
803
|
+
|
804
|
+
/**
|
805
|
+
* The name of the link is set to be the GCD of the names of
|
806
|
+
* its two endpoints. Must be called after each extract_links(),
|
807
|
+
* etc. since that call issues a brand-new set of links into
|
808
|
+
* parse_info.
|
809
|
+
*/
|
810
|
+
static void compute_link_names(Sentence sent)
|
811
|
+
{
|
812
|
+
int i;
|
813
|
+
Parse_info pi = sent->parse_info;
|
814
|
+
|
815
|
+
for (i = 0; i < pi->N_links; i++)
|
816
|
+
{
|
817
|
+
pi->link_array[i].name = intersect_strings(sent,
|
818
|
+
connector_get_string(pi->link_array[i].lc),
|
819
|
+
connector_get_string(pi->link_array[i].rc));
|
820
|
+
}
|
821
|
+
}
|
822
|
+
|
823
|
+
/**
|
824
|
+
* This fills in the sublinkage->link[].name field. We assume that
|
825
|
+
* link_array[].name have already been filled in. As above, in the
|
826
|
+
* standard case, the name is just the GCD of the two end points.
|
827
|
+
* If pluralization has occurred, then we want to use the name
|
828
|
+
* already in link_array[].name. We detect this in two ways.
|
829
|
+
* If the endpoints don't match, then we know pluralization
|
830
|
+
* has occured. If they do, but the name in link_array[].name
|
831
|
+
* is *less* restrictive, then pluralization must have occured.
|
832
|
+
*/
|
833
|
+
static void compute_pp_link_names(Sentence sent, Sublinkage *sublinkage)
|
834
|
+
{
|
835
|
+
int i;
|
836
|
+
const char * s;
|
837
|
+
Parse_info pi = sent->parse_info;
|
838
|
+
|
839
|
+
for (i = 0; i < pi->N_links; i++)
|
840
|
+
{
|
841
|
+
if (sublinkage->link[i]->l == -1) continue;
|
842
|
+
/* NULL's here are quite unexpected -- I think there's a bug
|
843
|
+
* elsewhere in the code. But for now, punt. Here's a sentence
|
844
|
+
* that triggers a NULL -- "His convalescence was relatively brief
|
845
|
+
* and he was able to return and fight at The Wilderness,
|
846
|
+
* Spotsylvania and Cold Harbor."
|
847
|
+
*/
|
848
|
+
if (NULL == sublinkage->link[i]->lc) continue;
|
849
|
+
if (NULL == sublinkage->link[i]->rc) continue;
|
850
|
+
if (!x_match(sent, sublinkage->link[i]->lc, sublinkage->link[i]->rc))
|
851
|
+
{
|
852
|
+
replace_link_name(sublinkage->link[i], pi->link_array[i].name);
|
853
|
+
}
|
854
|
+
else
|
855
|
+
{
|
856
|
+
s = intersect_strings(sent,
|
857
|
+
connector_get_string(sublinkage->link[i]->lc),
|
858
|
+
connector_get_string(sublinkage->link[i]->rc));
|
859
|
+
|
860
|
+
if (strictly_smaller_name(s, pi->link_array[i].name))
|
861
|
+
replace_link_name(sublinkage->link[i], pi->link_array[i].name);
|
862
|
+
else
|
863
|
+
replace_link_name(sublinkage->link[i], s);
|
864
|
+
}
|
865
|
+
}
|
866
|
+
}
|
867
|
+
|
868
|
+
/********************** exported functions *****************************/
|
869
|
+
|
870
|
+
void init_analyze(Sentence s)
|
871
|
+
{
|
872
|
+
analyze_context_t *actx = s->analyze_ctxt;
|
873
|
+
|
874
|
+
if (NULL == actx)
|
875
|
+
{
|
876
|
+
actx = (analyze_context_t *) malloc (sizeof(analyze_context_t));
|
877
|
+
s->analyze_ctxt = actx;
|
878
|
+
}
|
879
|
+
|
880
|
+
actx->structure_violation = FALSE;
|
881
|
+
}
|
882
|
+
|
883
|
+
void free_analyze(Sentence s)
|
884
|
+
{
|
885
|
+
if (s->analyze_ctxt != NULL) free(s->analyze_ctxt);
|
886
|
+
s->analyze_ctxt = NULL;
|
887
|
+
}
|
888
|
+
|
889
|
+
/**
|
890
|
+
* This uses link_array. It enumerates and post-processes
|
891
|
+
* all the linkages represented by this one. We know this contains
|
892
|
+
* at least one fat link.
|
893
|
+
*/
|
894
|
+
Linkage_info analyze_fat_linkage(Sentence sent, Parse_Options opts, int analyze_pass)
|
895
|
+
{
|
896
|
+
int i;
|
897
|
+
Linkage_info li;
|
898
|
+
DIS_node *d_root;
|
899
|
+
PP_node *pp;
|
900
|
+
Postprocessor *postprocessor;
|
901
|
+
Sublinkage *sublinkage;
|
902
|
+
Parse_info pi = sent->parse_info;
|
903
|
+
PP_node accum; /* for domain ancestry check */
|
904
|
+
D_type_list * dtl0, * dtl1; /* for domain ancestry check */
|
905
|
+
|
906
|
+
analyze_context_t *actx = sent->analyze_ctxt;
|
907
|
+
|
908
|
+
sublinkage = x_create_sublinkage(pi);
|
909
|
+
postprocessor = sent->dict->postprocessor;
|
910
|
+
build_digraph(actx, pi);
|
911
|
+
actx->structure_violation = FALSE;
|
912
|
+
d_root = build_DIS_CON_tree(actx, pi); /* may set structure_violation to TRUE */
|
913
|
+
|
914
|
+
memset(&li, 0, sizeof(li));
|
915
|
+
li.N_violations = 0;
|
916
|
+
li.improper_fat_linkage = actx->structure_violation;
|
917
|
+
li.inconsistent_domains = FALSE;
|
918
|
+
li.unused_word_cost = unused_word_cost(sent->parse_info);
|
919
|
+
if (opts->use_sat_solver)
|
920
|
+
{
|
921
|
+
li.disjunct_cost = 0.0;
|
922
|
+
}
|
923
|
+
else
|
924
|
+
{
|
925
|
+
li.disjunct_cost = disjunct_cost(pi);
|
926
|
+
}
|
927
|
+
li.null_cost = null_cost(pi);
|
928
|
+
li.link_cost = link_cost(pi);
|
929
|
+
li.corpus_cost = -1.0f;
|
930
|
+
li.and_cost = 0;
|
931
|
+
li.andlist = NULL;
|
932
|
+
|
933
|
+
if (actx->structure_violation)
|
934
|
+
{
|
935
|
+
li.N_violations++;
|
936
|
+
free_sublinkage(sublinkage);
|
937
|
+
free_digraph(actx, pi);
|
938
|
+
free_DIS_tree(d_root);
|
939
|
+
for (i = 0; i < pi->N_links; i++)
|
940
|
+
{
|
941
|
+
pi->link_array[i].name = "";
|
942
|
+
}
|
943
|
+
return li;
|
944
|
+
}
|
945
|
+
|
946
|
+
if (analyze_pass == PP_SECOND_PASS)
|
947
|
+
{
|
948
|
+
li.andlist = build_andlist(actx, sent);
|
949
|
+
li.and_cost = 0;
|
950
|
+
if (li.andlist) li.and_cost = li.andlist->cost;
|
951
|
+
}
|
952
|
+
else li.and_cost = 0;
|
953
|
+
|
954
|
+
compute_link_names(sent);
|
955
|
+
|
956
|
+
for (i=0; i<pi->N_links; i++) accum.d_type_array[i] = NULL;
|
957
|
+
|
958
|
+
/* loop through all the sub linkages */
|
959
|
+
for (;;)
|
960
|
+
{
|
961
|
+
for (i=0; i<pi->N_links; i++)
|
962
|
+
{
|
963
|
+
actx->patch_array[i].used = actx->patch_array[i].changed = FALSE;
|
964
|
+
actx->patch_array[i].newl = pi->link_array[i].l;
|
965
|
+
actx->patch_array[i].newr = pi->link_array[i].r;
|
966
|
+
copy_full_link(&sublinkage->link[i], &(pi->link_array[i]));
|
967
|
+
}
|
968
|
+
fill_patch_array_DIS(actx, d_root, NULL);
|
969
|
+
|
970
|
+
for (i=0; i<pi->N_links; i++)
|
971
|
+
{
|
972
|
+
if (actx->patch_array[i].changed || actx->patch_array[i].used)
|
973
|
+
{
|
974
|
+
sublinkage->link[i]->l = actx->patch_array[i].newl;
|
975
|
+
sublinkage->link[i]->r = actx->patch_array[i].newr;
|
976
|
+
}
|
977
|
+
else if ((actx->dfs_root_word[pi->link_array[i].l] != -1) &&
|
978
|
+
(actx->dfs_root_word[pi->link_array[i].r] != -1))
|
979
|
+
{
|
980
|
+
sublinkage->link[i]->l = -1;
|
981
|
+
}
|
982
|
+
}
|
983
|
+
|
984
|
+
if (0 == opts->use_sat_solver)
|
985
|
+
{
|
986
|
+
compute_pp_link_array_connectors(sent, sublinkage);
|
987
|
+
compute_pp_link_names(sent, sublinkage);
|
988
|
+
}
|
989
|
+
|
990
|
+
/* 'analyze_pass' logic added ALB 1/97 */
|
991
|
+
if (analyze_pass==PP_FIRST_PASS) {
|
992
|
+
post_process_scan_linkage(postprocessor,opts,sent,sublinkage);
|
993
|
+
if (!advance_DIS(d_root)) break;
|
994
|
+
else continue;
|
995
|
+
}
|
996
|
+
|
997
|
+
pp = post_process(postprocessor, opts, sent, sublinkage, TRUE);
|
998
|
+
|
999
|
+
if (pp==NULL) {
|
1000
|
+
if (postprocessor != NULL) li.N_violations = 1;
|
1001
|
+
}
|
1002
|
+
else if (pp->violation == NULL) {
|
1003
|
+
/* the purpose of this stuff is to make sure the domain
|
1004
|
+
ancestry for a link in each of its sentences is consistent. */
|
1005
|
+
|
1006
|
+
for (i=0; i<pi->N_links; i++) {
|
1007
|
+
if (sublinkage->link[i]->l == -1) continue;
|
1008
|
+
if (accum.d_type_array[i] == NULL) {
|
1009
|
+
accum.d_type_array[i] = copy_d_type(pp->d_type_array[i]);
|
1010
|
+
} else {
|
1011
|
+
dtl0 = pp->d_type_array[i];
|
1012
|
+
dtl1 = accum.d_type_array[i];
|
1013
|
+
while((dtl0 != NULL) && (dtl1 != NULL) && (dtl0->type == dtl1->type)) {
|
1014
|
+
dtl0 = dtl0->next;
|
1015
|
+
dtl1 = dtl1->next;
|
1016
|
+
}
|
1017
|
+
if ((dtl0 != NULL) || (dtl1 != NULL)) break;
|
1018
|
+
}
|
1019
|
+
}
|
1020
|
+
if (i != pi->N_links) {
|
1021
|
+
li.N_violations++;
|
1022
|
+
li.inconsistent_domains = TRUE;
|
1023
|
+
}
|
1024
|
+
}
|
1025
|
+
else if (pp->violation!=NULL) {
|
1026
|
+
li.N_violations++;
|
1027
|
+
}
|
1028
|
+
|
1029
|
+
if (!advance_DIS(d_root)) break;
|
1030
|
+
}
|
1031
|
+
|
1032
|
+
for (i=0; i<pi->N_links; ++i) {
|
1033
|
+
free_d_type(accum.d_type_array[i]);
|
1034
|
+
}
|
1035
|
+
|
1036
|
+
/* if (display_on && (li.N_violations != 0) &&
|
1037
|
+
(verbosity > 3) && should_print_messages)
|
1038
|
+
printf("P.P. violation in one part of conjunction.\n"); */
|
1039
|
+
free_sublinkage(sublinkage);
|
1040
|
+
free_digraph(actx, pi);
|
1041
|
+
free_DIS_tree(d_root);
|
1042
|
+
return li;
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
/**
|
1046
|
+
* This uses link_array. It post-processes
|
1047
|
+
* this linkage, and prints the appropriate thing. There are no fat
|
1048
|
+
* links in it.
|
1049
|
+
*/
|
1050
|
+
Linkage_info analyze_thin_linkage(Sentence sent, Parse_Options opts, int analyze_pass)
|
1051
|
+
{
|
1052
|
+
int i;
|
1053
|
+
Linkage_info li;
|
1054
|
+
PP_node * pp;
|
1055
|
+
Postprocessor * postprocessor;
|
1056
|
+
Sublinkage *sublinkage;
|
1057
|
+
Parse_info pi = sent->parse_info;
|
1058
|
+
analyze_context_t *actx = sent->analyze_ctxt;
|
1059
|
+
|
1060
|
+
sublinkage = x_create_sublinkage(pi);
|
1061
|
+
postprocessor = sent->dict->postprocessor;
|
1062
|
+
|
1063
|
+
compute_link_names(sent);
|
1064
|
+
for (i=0; i<pi->N_links; i++)
|
1065
|
+
{
|
1066
|
+
copy_full_link(&(sublinkage->link[i]), &(pi->link_array[i]));
|
1067
|
+
}
|
1068
|
+
|
1069
|
+
if (analyze_pass == PP_FIRST_PASS)
|
1070
|
+
{
|
1071
|
+
post_process_scan_linkage(postprocessor, opts, sent, sublinkage);
|
1072
|
+
free_sublinkage(sublinkage);
|
1073
|
+
memset(&li, 0, sizeof(li));
|
1074
|
+
return li;
|
1075
|
+
}
|
1076
|
+
|
1077
|
+
build_digraph(actx, pi);
|
1078
|
+
|
1079
|
+
/* The code below can be used to generate the "islands" array.
|
1080
|
+
* For this to work, however, you have to call "build_digraph"
|
1081
|
+
* first (as in analyze_fat_linkage). and then "free_digraph".
|
1082
|
+
*/
|
1083
|
+
pp = post_process(postprocessor, opts, sent, sublinkage, TRUE);
|
1084
|
+
|
1085
|
+
memset(&li, 0, sizeof(li));
|
1086
|
+
li.N_violations = 0;
|
1087
|
+
li.and_cost = 0;
|
1088
|
+
li.unused_word_cost = unused_word_cost(sent->parse_info);
|
1089
|
+
li.improper_fat_linkage = FALSE;
|
1090
|
+
li.inconsistent_domains = FALSE;
|
1091
|
+
if (opts->use_sat_solver)
|
1092
|
+
{
|
1093
|
+
li.disjunct_cost = 0.0;
|
1094
|
+
}
|
1095
|
+
else
|
1096
|
+
{
|
1097
|
+
li.disjunct_cost = disjunct_cost(pi);
|
1098
|
+
}
|
1099
|
+
li.null_cost = null_cost(pi);
|
1100
|
+
li.link_cost = link_cost(pi);
|
1101
|
+
li.corpus_cost = -1.0f;
|
1102
|
+
li.andlist = NULL;
|
1103
|
+
|
1104
|
+
if (pp == NULL)
|
1105
|
+
{
|
1106
|
+
if (postprocessor != NULL) li.N_violations = 1;
|
1107
|
+
}
|
1108
|
+
else if (pp->violation != NULL)
|
1109
|
+
{
|
1110
|
+
li.N_violations++;
|
1111
|
+
}
|
1112
|
+
|
1113
|
+
free_sublinkage(sublinkage);
|
1114
|
+
free_digraph(actx, pi);
|
1115
|
+
return li;
|
1116
|
+
}
|
1117
|
+
|
1118
|
+
void extract_thin_linkage(Sentence sent, Parse_Options opts, Linkage linkage)
|
1119
|
+
{
|
1120
|
+
int i;
|
1121
|
+
Parse_info pi = sent->parse_info;
|
1122
|
+
|
1123
|
+
linkage->num_sublinkages = 1;
|
1124
|
+
linkage->sublinkage = ex_create_sublinkage(pi);
|
1125
|
+
|
1126
|
+
compute_link_names(sent);
|
1127
|
+
for (i=0; i<pi->N_links; ++i)
|
1128
|
+
{
|
1129
|
+
linkage->sublinkage->link[i] = excopy_link(&(pi->link_array[i]));
|
1130
|
+
}
|
1131
|
+
}
|
1132
|
+
|
1133
|
+
#ifdef DBG
|
1134
|
+
static void prt_lol(Sentence sent , List_o_links *lol)
|
1135
|
+
{
|
1136
|
+
/* It appears that the list of links is always even in length:
|
1137
|
+
* The head word first, followed by a modifier.
|
1138
|
+
*/
|
1139
|
+
while (lol)
|
1140
|
+
{
|
1141
|
+
// printf ("%d ", lol->link);
|
1142
|
+
printf ("%s ", sent->word[lol->word].string);
|
1143
|
+
lol = lol->next;
|
1144
|
+
}
|
1145
|
+
}
|
1146
|
+
|
1147
|
+
static void prt_con_list(Sentence, CON_list *);
|
1148
|
+
static void prt_dis_list(Sentence sent, DIS_list *dis)
|
1149
|
+
{
|
1150
|
+
while(dis)
|
1151
|
+
{
|
1152
|
+
/* There are three possibilities:
|
1153
|
+
* Either there's another conjunction (and we should print it)
|
1154
|
+
* Or there's a head word, with its modifiers in its list-o-links,
|
1155
|
+
* Or there's just the bare, naked word by itself.
|
1156
|
+
*/
|
1157
|
+
if (dis->dn->cl)
|
1158
|
+
{
|
1159
|
+
prt_con_list(sent, dis->dn->cl);
|
1160
|
+
}
|
1161
|
+
else if (dis->dn->lol)
|
1162
|
+
{
|
1163
|
+
printf("[");
|
1164
|
+
prt_lol(sent, dis->dn->lol);
|
1165
|
+
printf("]");
|
1166
|
+
}
|
1167
|
+
else
|
1168
|
+
{
|
1169
|
+
int wd = dis->dn->word;
|
1170
|
+
printf("%s ", sent->word[wd].string);
|
1171
|
+
}
|
1172
|
+
dis = dis->next;
|
1173
|
+
}
|
1174
|
+
}
|
1175
|
+
|
1176
|
+
static void prt_con_list(Sentence sent, CON_list *con)
|
1177
|
+
{
|
1178
|
+
while(con)
|
1179
|
+
{
|
1180
|
+
int wd = con->cn->word;
|
1181
|
+
printf("(%s ", sent->word[wd].string);
|
1182
|
+
prt_dis_list(sent, con->cn->dl);
|
1183
|
+
printf(") ");
|
1184
|
+
con = con->next;
|
1185
|
+
}
|
1186
|
+
}
|
1187
|
+
static void prt_dis_con_tree(Sentence sent, DIS_node *dis)
|
1188
|
+
{
|
1189
|
+
prt_con_list(sent, dis->cl);
|
1190
|
+
printf ("\n");
|
1191
|
+
}
|
1192
|
+
#else
|
1193
|
+
static inline void prt_dis_con_tree(Sentence sent, DIS_node *dis) {}
|
1194
|
+
#endif
|
1195
|
+
|
1196
|
+
/**
|
1197
|
+
* This procedure mimics analyze_fat_linkage in order to
|
1198
|
+
* extract the sublinkages and copy them to the Linkage
|
1199
|
+
* data structure passed in.
|
1200
|
+
*/
|
1201
|
+
void extract_fat_linkage(Sentence sent, Parse_Options opts, Linkage linkage)
|
1202
|
+
{
|
1203
|
+
int i, j, N_thin_links;
|
1204
|
+
DIS_node *d_root;
|
1205
|
+
int num_sublinkages;
|
1206
|
+
Sublinkage * sublinkage;
|
1207
|
+
Parse_info pi = sent->parse_info;
|
1208
|
+
|
1209
|
+
analyze_context_t *actx = sent->analyze_ctxt;
|
1210
|
+
|
1211
|
+
build_digraph(actx, pi);
|
1212
|
+
actx->structure_violation = FALSE;
|
1213
|
+
d_root = build_DIS_CON_tree(actx, pi);
|
1214
|
+
|
1215
|
+
if (actx->structure_violation)
|
1216
|
+
{
|
1217
|
+
compute_link_names(sent);
|
1218
|
+
linkage->num_sublinkages=1;
|
1219
|
+
linkage->sublinkage = ex_create_sublinkage(pi);
|
1220
|
+
|
1221
|
+
/* This will have fat links! */
|
1222
|
+
for (i=0; i<pi->N_links; ++i)
|
1223
|
+
{
|
1224
|
+
linkage->sublinkage->link[i] = excopy_link(&(pi->link_array[i]));
|
1225
|
+
}
|
1226
|
+
|
1227
|
+
free_digraph(actx, pi);
|
1228
|
+
free_DIS_tree(d_root);
|
1229
|
+
return;
|
1230
|
+
}
|
1231
|
+
|
1232
|
+
/* first get number of sublinkages and allocate space */
|
1233
|
+
num_sublinkages = 0;
|
1234
|
+
for (;;) {
|
1235
|
+
num_sublinkages++;
|
1236
|
+
if (!advance_DIS(d_root)) break;
|
1237
|
+
}
|
1238
|
+
|
1239
|
+
linkage->num_sublinkages = num_sublinkages;
|
1240
|
+
linkage->sublinkage =
|
1241
|
+
(Sublinkage *) exalloc(sizeof(Sublinkage)*num_sublinkages);
|
1242
|
+
for (i=0; i<num_sublinkages; ++i) {
|
1243
|
+
linkage->sublinkage[i].link = NULL;
|
1244
|
+
linkage->sublinkage[i].pp_info = NULL;
|
1245
|
+
linkage->sublinkage[i].violation = NULL;
|
1246
|
+
}
|
1247
|
+
|
1248
|
+
/* now fill out the sublinkage arrays */
|
1249
|
+
compute_link_names(sent);
|
1250
|
+
|
1251
|
+
sublinkage = x_create_sublinkage(pi);
|
1252
|
+
num_sublinkages = 0;
|
1253
|
+
for (;;)
|
1254
|
+
{
|
1255
|
+
for (i = 0; i < pi->N_links; i++)
|
1256
|
+
{
|
1257
|
+
actx->patch_array[i].used = actx->patch_array[i].changed = FALSE;
|
1258
|
+
actx->patch_array[i].newl = pi->link_array[i].l;
|
1259
|
+
actx->patch_array[i].newr = pi->link_array[i].r;
|
1260
|
+
copy_full_link(&sublinkage->link[i], &(pi->link_array[i]));
|
1261
|
+
}
|
1262
|
+
fill_patch_array_DIS(actx, d_root, NULL);
|
1263
|
+
|
1264
|
+
for (i = 0; i < pi->N_links; i++)
|
1265
|
+
{
|
1266
|
+
if (actx->patch_array[i].changed || actx->patch_array[i].used)
|
1267
|
+
{
|
1268
|
+
sublinkage->link[i]->l = actx->patch_array[i].newl;
|
1269
|
+
sublinkage->link[i]->r = actx->patch_array[i].newr;
|
1270
|
+
}
|
1271
|
+
else if ((actx->dfs_root_word[pi->link_array[i].l] != -1) &&
|
1272
|
+
(actx->dfs_root_word[pi->link_array[i].r] != -1))
|
1273
|
+
{
|
1274
|
+
sublinkage->link[i]->l = -1;
|
1275
|
+
}
|
1276
|
+
}
|
1277
|
+
|
1278
|
+
if (0 == opts->use_sat_solver)
|
1279
|
+
{
|
1280
|
+
compute_pp_link_array_connectors(sent, sublinkage);
|
1281
|
+
compute_pp_link_names(sent, sublinkage);
|
1282
|
+
}
|
1283
|
+
|
1284
|
+
/* Don't copy the fat links into the linkage */
|
1285
|
+
N_thin_links = 0;
|
1286
|
+
for (i = 0; i < pi->N_links; ++i)
|
1287
|
+
{
|
1288
|
+
if (sublinkage->link[i]->l == -1) continue;
|
1289
|
+
N_thin_links++;
|
1290
|
+
}
|
1291
|
+
|
1292
|
+
linkage->sublinkage[num_sublinkages].num_links = N_thin_links;
|
1293
|
+
linkage->sublinkage[num_sublinkages].link =
|
1294
|
+
(Link **) exalloc(sizeof(Link *)*N_thin_links);
|
1295
|
+
linkage->sublinkage[num_sublinkages].pp_info = NULL;
|
1296
|
+
linkage->sublinkage[num_sublinkages].violation = NULL;
|
1297
|
+
|
1298
|
+
for (i = 0, j = 0; i < pi->N_links; ++i)
|
1299
|
+
{
|
1300
|
+
if (sublinkage->link[i]->l == -1) continue;
|
1301
|
+
linkage->sublinkage[num_sublinkages].link[j++] =
|
1302
|
+
excopy_link(sublinkage->link[i]);
|
1303
|
+
}
|
1304
|
+
|
1305
|
+
num_sublinkages++;
|
1306
|
+
if (!advance_DIS(d_root)) break;
|
1307
|
+
}
|
1308
|
+
|
1309
|
+
free_sublinkage(sublinkage);
|
1310
|
+
free_digraph(actx, pi);
|
1311
|
+
if (linkage->dis_con_tree)
|
1312
|
+
free_DIS_tree(linkage->dis_con_tree);
|
1313
|
+
linkage->dis_con_tree = d_root;
|
1314
|
+
|
1315
|
+
prt_dis_con_tree(sent, d_root);
|
1316
|
+
}
|
1317
|
+
|