RubyGems - grammar_cop - Versions diffs - 0.1.0 - Mend

grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (344) hide show

data/.DS_Store +0 -0
data/.gitignore +4 -0
data/Gemfile +4 -0
data/Rakefile +8 -0
data/data/.DS_Store +0 -0
data/data/Makefile +511 -0
data/data/Makefile.am +4 -0
data/data/Makefile.in +511 -0
data/data/de/.DS_Store +0 -0
data/data/de/4.0.affix +7 -0
data/data/de/4.0.dict +474 -0
data/data/de/Makefile +387 -0
data/data/de/Makefile.am +9 -0
data/data/de/Makefile.in +387 -0
data/data/en/.DS_Store +0 -0
data/data/en/4.0.affix +26 -0
data/data/en/4.0.batch +1002 -0
data/data/en/4.0.biolg.batch +411 -0
data/data/en/4.0.constituent-knowledge +127 -0
data/data/en/4.0.dict +8759 -0
data/data/en/4.0.dict.m4 +6928 -0
data/data/en/4.0.enwiki.batch +14 -0
data/data/en/4.0.fixes.batch +2776 -0
data/data/en/4.0.knowledge +306 -0
data/data/en/4.0.regex +225 -0
data/data/en/4.0.voa.batch +114 -0
data/data/en/Makefile +554 -0
data/data/en/Makefile.am +19 -0
data/data/en/Makefile.in +554 -0
data/data/en/README +173 -0
data/data/en/tiny.dict +157 -0
data/data/en/words/.DS_Store +0 -0
data/data/en/words/Makefile +456 -0
data/data/en/words/Makefile.am +78 -0
data/data/en/words/Makefile.in +456 -0
data/data/en/words/currency +205 -0
data/data/en/words/currency.p +28 -0
data/data/en/words/entities.given-bisex.sing +39 -0
data/data/en/words/entities.given-female.sing +4141 -0
data/data/en/words/entities.given-male.sing +1633 -0
data/data/en/words/entities.locations.sing +68 -0
data/data/en/words/entities.national.sing +253 -0
data/data/en/words/entities.organizations.sing +7 -0
data/data/en/words/entities.us-states.sing +11 -0
data/data/en/words/units.1 +45 -0
data/data/en/words/units.1.dot +4 -0
data/data/en/words/units.3 +2 -0
data/data/en/words/units.4 +5 -0
data/data/en/words/units.4.dot +1 -0
data/data/en/words/words-medical.adv.1 +1191 -0
data/data/en/words/words-medical.prep.1 +67 -0
data/data/en/words/words-medical.v.4.1 +2835 -0
data/data/en/words/words-medical.v.4.2 +2848 -0
data/data/en/words/words-medical.v.4.3 +3011 -0
data/data/en/words/words-medical.v.4.4 +3036 -0
data/data/en/words/words-medical.v.4.5 +3050 -0
data/data/en/words/words.adj.1 +6794 -0
data/data/en/words/words.adj.2 +638 -0
data/data/en/words/words.adj.3 +667 -0
data/data/en/words/words.adv.1 +1573 -0
data/data/en/words/words.adv.2 +67 -0
data/data/en/words/words.adv.3 +157 -0
data/data/en/words/words.adv.4 +80 -0
data/data/en/words/words.n.1 +11464 -0
data/data/en/words/words.n.1.wiki +264 -0
data/data/en/words/words.n.2.s +2017 -0
data/data/en/words/words.n.2.s.biolg +1 -0
data/data/en/words/words.n.2.s.wiki +298 -0
data/data/en/words/words.n.2.x +65 -0
data/data/en/words/words.n.2.x.wiki +10 -0
data/data/en/words/words.n.3 +5717 -0
data/data/en/words/words.n.t +23 -0
data/data/en/words/words.v.1.1 +1038 -0
data/data/en/words/words.v.1.2 +1043 -0
data/data/en/words/words.v.1.3 +1052 -0
data/data/en/words/words.v.1.4 +1023 -0
data/data/en/words/words.v.1.p +17 -0
data/data/en/words/words.v.10.1 +14 -0
data/data/en/words/words.v.10.2 +15 -0
data/data/en/words/words.v.10.3 +88 -0
data/data/en/words/words.v.10.4 +17 -0
data/data/en/words/words.v.2.1 +1253 -0
data/data/en/words/words.v.2.2 +1304 -0
data/data/en/words/words.v.2.3 +1280 -0
data/data/en/words/words.v.2.4 +1285 -0
data/data/en/words/words.v.2.5 +1287 -0
data/data/en/words/words.v.4.1 +2472 -0
data/data/en/words/words.v.4.2 +2487 -0
data/data/en/words/words.v.4.3 +2441 -0
data/data/en/words/words.v.4.4 +2478 -0
data/data/en/words/words.v.4.5 +2483 -0
data/data/en/words/words.v.5.1 +98 -0
data/data/en/words/words.v.5.2 +98 -0
data/data/en/words/words.v.5.3 +103 -0
data/data/en/words/words.v.5.4 +102 -0
data/data/en/words/words.v.6.1 +388 -0
data/data/en/words/words.v.6.2 +401 -0
data/data/en/words/words.v.6.3 +397 -0
data/data/en/words/words.v.6.4 +405 -0
data/data/en/words/words.v.6.5 +401 -0
data/data/en/words/words.v.8.1 +117 -0
data/data/en/words/words.v.8.2 +118 -0
data/data/en/words/words.v.8.3 +118 -0
data/data/en/words/words.v.8.4 +119 -0
data/data/en/words/words.v.8.5 +119 -0
data/data/en/words/words.y +104 -0
data/data/lt/.DS_Store +0 -0
data/data/lt/4.0.affix +6 -0
data/data/lt/4.0.constituent-knowledge +24 -0
data/data/lt/4.0.dict +135 -0
data/data/lt/4.0.knowledge +38 -0
data/data/lt/Makefile +389 -0
data/data/lt/Makefile.am +11 -0
data/data/lt/Makefile.in +389 -0
data/ext/.DS_Store +0 -0
data/ext/link_grammar/.DS_Store +0 -0
data/ext/link_grammar/extconf.rb +2 -0
data/ext/link_grammar/link-grammar/.DS_Store +0 -0
data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
data/ext/link_grammar/link-grammar/Makefile +900 -0
data/ext/link_grammar/link-grammar/Makefile.am +202 -0
data/ext/link_grammar/link-grammar/Makefile.in +900 -0
data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
data/ext/link_grammar/link-grammar/and.c +1603 -0
data/ext/link_grammar/link-grammar/and.h +27 -0
data/ext/link_grammar/link-grammar/api-structures.h +362 -0
data/ext/link_grammar/link-grammar/api-types.h +72 -0
data/ext/link_grammar/link-grammar/api.c +1887 -0
data/ext/link_grammar/link-grammar/api.h +96 -0
data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
data/ext/link_grammar/link-grammar/autoit/README +10 -0
data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
data/ext/link_grammar/link-grammar/command-line.c +458 -0
data/ext/link_grammar/link-grammar/command-line.h +15 -0
data/ext/link_grammar/link-grammar/constituents.c +1836 -0
data/ext/link_grammar/link-grammar/constituents.h +26 -0
data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
data/ext/link_grammar/link-grammar/corpus/README +17 -0
data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
data/ext/link_grammar/link-grammar/count.c +828 -0
data/ext/link_grammar/link-grammar/count.h +25 -0
data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
data/ext/link_grammar/link-grammar/error.c +92 -0
data/ext/link_grammar/link-grammar/error.h +35 -0
data/ext/link_grammar/link-grammar/expand.c +67 -0
data/ext/link_grammar/link-grammar/expand.h +13 -0
data/ext/link_grammar/link-grammar/externs.h +22 -0
data/ext/link_grammar/link-grammar/extract-links.c +625 -0
data/ext/link_grammar/link-grammar/extract-links.h +16 -0
data/ext/link_grammar/link-grammar/fast-match.c +309 -0
data/ext/link_grammar/link-grammar/fast-match.h +17 -0
data/ext/link_grammar/link-grammar/idiom.c +373 -0
data/ext/link_grammar/link-grammar/idiom.h +15 -0
data/ext/link_grammar/link-grammar/jni-client.c +779 -0
data/ext/link_grammar/link-grammar/jni-client.h +236 -0
data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
data/ext/link_grammar/link-grammar/link-features.h +37 -0
data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
data/ext/link_grammar/link-grammar/link-includes.h +465 -0
data/ext/link_grammar/link-grammar/link-parser.c +849 -0
data/ext/link_grammar/link-grammar/massage.c +329 -0
data/ext/link_grammar/link-grammar/massage.h +13 -0
data/ext/link_grammar/link-grammar/post-process.c +1113 -0
data/ext/link_grammar/link-grammar/post-process.h +45 -0
data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
data/ext/link_grammar/link-grammar/prefix.c +482 -0
data/ext/link_grammar/link-grammar/prefix.h +139 -0
data/ext/link_grammar/link-grammar/preparation.c +412 -0
data/ext/link_grammar/link-grammar/preparation.h +20 -0
data/ext/link_grammar/link-grammar/print-util.c +87 -0
data/ext/link_grammar/link-grammar/print-util.h +32 -0
data/ext/link_grammar/link-grammar/print.c +1085 -0
data/ext/link_grammar/link-grammar/print.h +16 -0
data/ext/link_grammar/link-grammar/prune.c +1864 -0
data/ext/link_grammar/link-grammar/prune.h +17 -0
data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
data/ext/link_grammar/link-grammar/read-dict.h +29 -0
data/ext/link_grammar/link-grammar/read-regex.c +161 -0
data/ext/link_grammar/link-grammar/read-regex.h +12 -0
data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
data/ext/link_grammar/link-grammar/resources.c +180 -0
data/ext/link_grammar/link-grammar/resources.h +23 -0
data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
data/ext/link_grammar/link-grammar/string-set.c +169 -0
data/ext/link_grammar/link-grammar/string-set.h +16 -0
data/ext/link_grammar/link-grammar/structures.h +498 -0
data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
data/ext/link_grammar/link-grammar/tokenize.h +15 -0
data/ext/link_grammar/link-grammar/utilities.c +847 -0
data/ext/link_grammar/link-grammar/utilities.h +281 -0
data/ext/link_grammar/link-grammar/word-file.c +124 -0
data/ext/link_grammar/link-grammar/word-file.h +15 -0
data/ext/link_grammar/link-grammar/word-utils.c +526 -0
data/ext/link_grammar/link-grammar/word-utils.h +152 -0
data/ext/link_grammar/link_grammar.c +202 -0
data/ext/link_grammar/link_grammar.h +99 -0
data/grammar_cop.gemspec +24 -0
data/lib/.DS_Store +0 -0
data/lib/grammar_cop.rb +9 -0
data/lib/grammar_cop/.DS_Store +0 -0
data/lib/grammar_cop/dictionary.rb +19 -0
data/lib/grammar_cop/linkage.rb +30 -0
data/lib/grammar_cop/parse_options.rb +32 -0
data/lib/grammar_cop/sentence.rb +36 -0
data/lib/grammar_cop/version.rb +3 -0
data/test/.DS_Store +0 -0
data/test/grammar_cop_test.rb +27 -0
metadata +407 -0

data/ext/link_grammar/link-grammar/corpus/corpus.h ADDED Viewed

@@ -0,0 +1,46 @@
+/*
+ * corpus.h
+ *
+ * Data for corpus statistics, used to provide a parse ranking
+ * to drive the SAT solver, as well as parse ranking with the
+ * ordinary solver.
+ *
+ * Copyright (c) 2008, 2009 Linas Vepstas <linasvepstas@gmail.com>
+ */
+#ifndef _LINKGRAMMAR_CORPUS_H
+#define _LINKGRAMMAR_CORPUS_H
+#ifdef USE_CORPUS
+#include "../api-types.h"
+#include "../link-includes.h"
+Corpus * lg_corpus_new(void);
+void lg_corpus_delete(Corpus *);
+void lg_corpus_score(Sentence, Linkage_info *);
+double lg_corpus_disjunct_score(Linkage linkage, int w);
+void lg_corpus_linkage_senses(Linkage);
+Sense * lg_get_word_sense(Linkage_info *, int word);
+Sense * lg_sense_next(Sense *);
+int lg_sense_get_index(Sense *);
+const char * lg_sense_get_subscripted_word(Sense *);
+const char * lg_sense_get_disjunct(Sense *);
+const char * lg_sense_get_sense(Sense *);
+double lg_sense_get_score(Sense *);
+void lg_sense_delete(Linkage_info *);
+#else /* USE_CORPUS */
+static inline void lg_corpus_score(Sentence s, Linkage_info *li) {}
+static inline void lg_corpus_linkage_senses(Linkage l) {}
+static inline Sense * lg_get_word_sense(Linkage_info *lif, int word) { return NULL; }
+static inline Sense * lg_sense_next(Sense *s ) {return NULL; }
+static inline const char * lg_sense_get_sense(Sense *s) { return NULL; }
+static inline double lg_sense_get_score(Sense *s) { return 0.0; }
+static inline double lg_corpus_disjunct_score(Linkage linkage, int w) { return 998.0; }
+#endif /* USE_CORPUS */
+#endif /* _LINKGRAMMAR_CORPUS_H */

data/ext/link_grammar/link-grammar/count.c ADDED Viewed

@@ -0,0 +1,828 @@
+/*************************************************************************/
+/* Copyright (c) 2004                                                    */
+/* Daniel Sleator, David Temperley, and John Lafferty                    */
+/* All rights reserved                                                   */
+/*                                                                       */
+/* Use of the link grammar parsing system is subject to the terms of the */
+/* license set forth in the LICENSE file included with this software,    */
+/* and also available at http://www.link.cs.cmu.edu/link/license.html    */
+/* This license allows free redistribution and use in source and binary  */
+/* forms, with or without modification, subject to certain conditions.   */
+/*                                                                       */
+/*************************************************************************/
+#include "api.h"
+#include "disjunct-utils.h"
+/* This file contains the exhaustive search algorithm. */
+typedef struct Table_connector_s Table_connector;
+struct Table_connector_s
+{
+	short            lw, rw;
+	Connector        *le, *re;
+	short            cost;
+	s64              count;
+	Table_connector  *next;
+};
+struct count_context_s
+{
+	char ** deletable;
+	char ** effective_dist;
+	Word *  local_sent;
+	int     null_block;
+	int     islands_ok;
+	int     null_links;
+	int     table_size;
+	int     log2_table_size;
+	Table_connector ** table;
+	Resources current_resources;
+};
+static void free_table(count_context_t *ctxt)
+{
+	int i;
+	Table_connector *t, *x;
+	for (i=0; i<ctxt->table_size; i++)
+	{
+		for(t = ctxt->table[i]; t!= NULL; t=x)
+		{
+			x = t->next;
+			xfree((void *) t, sizeof(Table_connector));
+		}
+	}
+	xfree(ctxt->table, ctxt->table_size * sizeof(Table_connector*));
+	ctxt->table = NULL;
+	ctxt->table_size = 0;
+}
+static void init_table(Sentence sent)
+{
+	int shift;
+	/* A piecewise exponential function determines the size of the
+	 * hash table. Probably should make use of the actual number of
+	 * disjuncts, rather than just the number of words.
+	 */
+	count_context_t *ctxt = sent->count_ctxt;
+	if (ctxt->table) free_table(ctxt);
+	if (sent->length >= 10)
+	{
+		shift = 12 + (sent->length) / 6 ;
+	}
+	else
+	{
+		shift = 12;
+	}
+	/* Clamp at max 4*(1<<24) == 64 MBytes */
+	if (24 < shift) shift = 24;
+	ctxt->table_size = (1 << shift);
+	ctxt->log2_table_size = shift;
+	ctxt->table = (Table_connector**)
+		xalloc(ctxt->table_size * sizeof(Table_connector*));
+	memset(ctxt->table, 0, ctxt->table_size*sizeof(Table_connector*));
+}
+int x_match(Sentence sent, Connector *a, Connector *b)
+{
+	return do_match(sent, a, b, 0, 0);
+}
+void count_set_effective_distance(Sentence sent)
+{
+	sent->count_ctxt->effective_dist = sent->effective_dist;
+}
+void count_unset_effective_distance(Sentence sent)
+{
+	sent->count_ctxt->effective_dist = NULL;
+}
+/*
+ * Returns TRUE if s and t match according to the connector matching
+ * rules.  The connector strings must be properly formed, starting with
+ * zero or more upper case letters, followed by some other letters, and
+ * The algorithm is symmetric with respect to a and b.
+ *
+ * It works as follows:  The labels must match.  The priorities must be
+ * compatible (both THIN_priority, or one UP_priority and one DOWN_priority).
+ * The sequence of upper case letters must match exactly.  After these comes
+ * a sequence of lower case letters "*"s or "^"s.  The matching algorithm
+ * is different depending on which of the two priority cases is being
+ * considered.  See the comments below.
+ */
+int do_match(Sentence sent, Connector *a, Connector *b, int aw, int bw)
+{
+	const char *s, *t;
+	int x, y, dist;
+	count_context_t *ctxt;
+	if (a->label != b->label) return FALSE;
+	s = a->string;
+	t = b->string;
+	while(isupper((int)*s) || isupper((int)*t))
+	{
+		if (*s != *t) return FALSE;
+		s++;
+		t++;
+	}
+	ctxt = sent->count_ctxt;
+	x = a->priority;
+	y = b->priority;
+	/* Probably not necessary, as long as
+	 * effective_dist[0][0]=0 and is defined */
+	if (aw == 0 && bw == 0) {
+		dist = 0;
+	} else {
+		assert(aw < bw, "match() did not receive params in the natural order.");
+		dist = ctxt->effective_dist[aw][bw];
+	}
+	/*	printf("M: a=%4s b=%4s  ap=%d bp=%d  aw=%d  bw=%d  a->ll=%d b->ll=%d  dist=%d\n",
+		   s, t, x, y, aw, bw, a->length_limit, b->length_limit, dist); */
+	if (dist > a->length_limit || dist > b->length_limit) return FALSE;
+	if ((x == THIN_priority) && (y == THIN_priority))
+	{
+		/*
+		   Remember that "*" matches anything, and "^" matches nothing
+		   (except "*").  Otherwise two characters match if and only if
+		   they're equal.  ("^" can be used in the dictionary just like
+		   any other connector.)
+		   */
+		while ((*s!='\0') && (*t!='\0')) {
+			if ((*s == '*') || (*t == '*') ||
+				((*s == *t) && (*s != '^'))) {
+				s++;
+				t++;
+			} else return FALSE;
+		}
+		return TRUE;
+	} else if ((x==UP_priority) && (y==DOWN_priority)) {
+		/*
+		   As you go up (namely from x to y) the set of strings that
+		   match (in the normal THIN sense above) should get no larger.
+		   Read the comment in and.c to understand this.
+		   In other words, the y string (t) must be weaker (or at least
+		   no stronger) that the x string (s).
+		   This code is only correct if the strings are the same
+		   length.  This is currently true, but perhaps for safty
+		   this assumption should be removed.
+		   */
+		while ((*s!='\0') && (*t!='\0')) {
+			if ((*s == *t) || (*s == '*') || (*t == '^')) {
+				s++;
+				t++;
+			} else return FALSE;
+		}
+		return TRUE;
+	}
+	else if ((y == UP_priority) && (x == DOWN_priority))
+	{
+		while ((*s!='\0') && (*t!='\0'))
+		{
+			if ((*s == *t) || (*t == '*') || (*s == '^'))
+			{
+				s++;
+				t++;
+			}
+			else
+				return FALSE;
+		}
+		return TRUE;
+	}
+	else
+		return FALSE;
+}
+/**
+ * Stores the value in the table.  Assumes it's not already there.
+ */
+static Table_connector * table_store(count_context_t *ctxt,
+                                     int lw, int rw,
+                                     Connector *le, Connector *re,
+                                     int cost, s64 count)
+{
+	Table_connector *t, *n;
+	int h;
+	n = (Table_connector *) xalloc(sizeof(Table_connector));
+	n->count = count;
+	n->lw = lw; n->rw = rw; n->le = le; n->re = re; n->cost = cost;
+	h = pair_hash(ctxt->log2_table_size,lw, rw, le, re, cost);
+	t = ctxt->table[h];
+	n->next = t;
+	ctxt->table[h] = n;
+	return n;
+}
+/** returns the pointer to this info, NULL if not there */
+static Table_connector *
+find_table_pointer(count_context_t *ctxt,
+                   int lw, int rw,
+                   Connector *le, Connector *re,
+                   int cost)
+{
+	Table_connector *t;
+	int h = pair_hash(ctxt->log2_table_size,lw, rw, le, re, cost);
+	t = ctxt->table[h];
+	for (; t != NULL; t = t->next) {
+		if ((t->lw == lw) && (t->rw == rw) && (t->le == le) && (t->re == re)
+			&& (t->cost == cost))  return t;
+	}
+	/* Create a new connector only if resources are exhausted.
+	 * (???) Huh? I guess we're in panic parse mode in that case.
+	 */
+	if ((ctxt->current_resources != NULL) &&
+	     resources_exhausted(ctxt->current_resources))
+	{
+		return table_store(ctxt, lw, rw, le, re, cost, 0);
+	}
+	else return NULL;
+}
+/** returns the count for this quintuple if there, -1 otherwise */
+s64 table_lookup(Sentence sent,
+                 int lw, int rw, Connector *le, Connector *re, int cost)
+{
+	Table_connector *t = find_table_pointer(sent->count_ctxt, lw, rw, le, re, cost);
+	if (t == NULL) return -1; else return t->count;
+}
+/**
+ * Stores the value in the table.  Unlike table_store, it assumes
+ * it's already there
+ */
+static void table_update(count_context_t *ctxt, int lw, int rw,
+                         Connector *le, Connector *re,
+                         int cost, s64 count)
+{
+	Table_connector *t = find_table_pointer(ctxt, lw, rw, le, re, cost);
+	assert(t != NULL, "This entry is supposed to be in the table.");
+	t->count = count;
+}
+/**
+ * Returns 0 if and only if this entry is in the hash table
+ * with a count value of 0.
+ */
+static s64 pseudocount(Sentence sent,
+                       int lw, int rw, Connector *le, Connector *re, int cost)
+{
+	s64 count;
+	count = table_lookup(sent, lw, rw, le, re, cost);
+	if (count == 0) return 0; else return 1;
+}
+static s64 do_count(Sentence sent, int lw, int rw,
+                    Connector *le, Connector *re, int null_count)
+{
+	Disjunct * d;
+	s64 total, pseudototal;
+	int start_word, end_word, w;
+	s64 leftcount, rightcount;
+	int lcost, rcost, Lmatch, Rmatch;
+	Match_node * m, *m1;
+	Table_connector *t;
+	count_context_t *ctxt = sent->count_ctxt;
+	if (null_count < 0) return 0;  /* can this ever happen?? */
+	t = find_table_pointer(ctxt, lw, rw, le, re, null_count);
+	if (t == NULL) {
+		/* Create the table entry with a tentative null count of 0.
+	    * This count must be updated before we return. */
+		t = table_store(ctxt, lw, rw, le, re, null_count, 0);
+	} else {
+		return t->count;
+	}
+	if (rw == 1+lw)
+	{
+		/* lw and rw are neighboring words */
+		/* You can't have a linkage here with null_count > 0 */
+		if ((le == NULL) && (re == NULL) && (null_count == 0))
+		{
+			t->count = 1;
+		}
+		else
+		{
+			t->count = 0;
+		}
+		return t->count;
+	}
+	if ((le == NULL) && (re == NULL))
+	{
+		if (!ctxt->islands_ok && (lw != -1))
+		{
+			/* If we don't allow islands (a set of words linked together
+			 * but separate from the rest of the sentence) then the
+			 * null_count of skipping n words is just n */
+			if (null_count == ((rw-lw-1) + ctxt->null_block-1)/ctxt->null_block)
+			{
+				/* If null_block=4 then the null_count of
+				   1,2,3,4 nulls is 1; and 5,6,7,8 is 2 etc. */
+				t->count = 1;
+			}
+			else
+			{
+				t->count = 0;
+			}
+			return t->count;
+		}
+		if (null_count == 0)
+		{
+			/* There is no solution without nulls in this case. There is
+			 * a slight efficiency hack to separate this null_count==0
+			 * case out, but not necessary for correctness */
+			t->count = 0;
+		}
+		else
+		{
+			total = 0;
+			w = lw+1;
+			for (d = ctxt->local_sent[w].d; d != NULL; d = d->next)
+			{
+				if (d->left == NULL)
+				{
+					total += do_count(sent, w, rw, d->right, NULL, null_count-1);
+				}
+			}
+			total += do_count(sent, w, rw, NULL, NULL, null_count-1);
+			t->count = total;
+		}
+		return t->count;
+	}
+	if (le == NULL)
+	{
+		start_word = lw+1;
+	}
+	else
+	{
+		start_word = le->word;
+	}
+	if (re == NULL)
+	{
+		end_word = rw;
+	}
+	else
+	{
+		end_word = re->word +1;
+	}
+	total = 0;
+	for (w = start_word; w < end_word; w++)
+	{
+		m1 = m = form_match_list(sent, w, le, lw, re, rw);
+		for (; m!=NULL; m=m->next)
+		{
+			d = m->d;
+			for (lcost = 0; lcost <= null_count; lcost++)
+			{
+				rcost = null_count - lcost;
+				/* Now lcost and rcost are the costs we're assigning
+				 * to those parts respectively */
+				/* Now, we determine if (based on table only) we can see that
+				   the current range is not parsable. */
+				Lmatch = (le != NULL) && (d->left != NULL) &&
+				         do_match(sent, le, d->left, lw, w);
+				Rmatch = (d->right != NULL) && (re != NULL) &&
+				         do_match(sent, d->right, re, w, rw);
+				rightcount = leftcount = 0;
+				if (Lmatch)
+				{
+					leftcount = pseudocount(sent, lw, w, le->next, d->left->next, lcost);
+					if (le->multi) leftcount += pseudocount(sent, lw, w, le, d->left->next, lcost);
+					if (d->left->multi) leftcount += pseudocount(sent, lw, w, le->next, d->left, lcost);
+					if (le->multi && d->left->multi) leftcount += pseudocount(sent, lw, w, le, d->left, lcost);
+				}
+				if (Rmatch)
+				{
+					rightcount = pseudocount(sent, w, rw, d->right->next, re->next, rcost);
+					if (d->right->multi) rightcount += pseudocount(sent, w,rw,d->right,re->next, rcost);
+					if (re->multi) rightcount += pseudocount(sent, w, rw, d->right->next, re, rcost);
+					if (d->right->multi && re->multi) rightcount += pseudocount(sent, w, rw, d->right, re, rcost);
+				}
+				/* total number where links are used on both sides */
+				pseudototal = leftcount*rightcount;
+				if (leftcount > 0) {
+					/* evaluate using the left match, but not the right */
+					pseudototal += leftcount * pseudocount(sent, w, rw, d->right, re, rcost);
+				}
+				if ((le == NULL) && (rightcount > 0)) {
+					/* evaluate using the right match, but not the left */
+					pseudototal += rightcount * pseudocount(sent, lw, w, le, d->left, lcost);
+				}
+				/* now pseudototal is 0 implies that we know that the true total is 0 */
+				if (pseudototal != 0) {
+					rightcount = leftcount = 0;
+					if (Lmatch) {
+						leftcount = do_count(sent, lw, w, le->next, d->left->next, lcost);
+						if (le->multi) leftcount += do_count(sent, lw, w, le, d->left->next, lcost);
+						if (d->left->multi) leftcount += do_count(sent, lw, w, le->next, d->left, lcost);
+						if (le->multi && d->left->multi) leftcount += do_count(sent, lw, w, le, d->left, lcost);
+					}
+					if (Rmatch) {
+						rightcount = do_count(sent, w, rw, d->right->next, re->next, rcost);
+						if (d->right->multi) rightcount += do_count(sent, w,rw,d->right,re->next, rcost);
+						if (re->multi) rightcount += do_count(sent, w, rw, d->right->next, re, rcost);
+						if (d->right->multi && re->multi) rightcount += do_count(sent, w, rw, d->right, re, rcost);
+					}
+					total += leftcount*rightcount;  /* total number where links are used on both sides */
+					if (leftcount > 0) {
+						/* evaluate using the left match, but not the right */
+						total += leftcount * do_count(sent, w, rw, d->right, re, rcost);
+					}
+					if ((le == NULL) && (rightcount > 0)) {
+						/* evaluate using the right match, but not the left */
+						total += rightcount * do_count(sent, lw, w, le, d->left, lcost);
+					}
+				}
+			}
+		}
+		put_match_list(sent, m1);
+	}
+	t->count = total;
+	return total;
+}
+/**
+ * Returns the number of ways the sentence can be parsed with the
+ * specified null count. Assumes that the hash table has already been
+ * initialized, and is freed later. The "null_count" here is the
+ * number of words that are allowed to have no links to them.
+ */
+s64 do_parse(Sentence sent, int null_count, Parse_Options opts)
+{
+	s64 total;
+	count_context_t *ctxt = sent->count_ctxt;
+	count_set_effective_distance(sent);
+	ctxt->current_resources = opts->resources;
+	ctxt->local_sent = sent->word;
+	ctxt->deletable = sent->deletable;
+	ctxt->null_block = opts->null_block;
+	ctxt->islands_ok = opts->islands_ok;
+	total = do_count(sent, -1, sent->length, NULL, NULL, null_count+1);
+	ctxt->local_sent = NULL;
+	ctxt->current_resources = NULL;
+	return total;
+}
+/**
+   CONJUNCTION PRUNING.
+   The basic idea is this.  Before creating the fat disjuncts,
+   we run a modified version of the exhaustive search procedure.
+   Its purpose is to mark the disjuncts that can be used in any
+   linkage.  It's just like the normal exhaustive search, except that
+   if a subrange of words are deletable, then we treat them as though
+   they were not even there.  So, if we call the function in the
+   situation where the set of words between the left and right one
+   are deletable, and the left and right connector pointers
+   are NULL, then that range is considered to have a solution.
+   There are actually two procedures to implement this.  One is
+   mark_region() and the other is region_valid().  The latter just
+   checks to see if the given region can be completed (within it).
+   The former actually marks those disjuncts that can be used in
+   any valid linkage of the given region.
+   As in the standard search procedure, we make use of the fast-match
+   data structure (which requires power pruning to have been done), and
+   we also use a hash table.  The table is used differently in this case.
+   The meaning of values stored in the table are as follows:
+   -1  Nothing known (Actually, this is not stored.  It's returned
+   by table_lookup when nothing is known.)
+   0  This region can't be completed (marking is therefore irrelevant)
+   1  This region can be completed, but it's not yet marked
+   2  This region can be completed, and it's been marked.
+   */
+static int x_prune_match(count_context_t *ctxt,
+                         Connector *le, Connector *re, int lw, int rw)
+{
+	int dist;
+	assert(lw < rw, "prune_match() did not receive params in the natural order.");
+	dist = ctxt->effective_dist[lw][rw];
+	return prune_match(dist, le, re);
+}
+/**
+ * Returns 0 if this range cannot be successfully filled in with
+ * links.  Returns 1 if it can, and it's not been marked, and returns
+ * 2 if it can and it has been marked.
+ */
+static int region_valid(Sentence sent, int lw, int rw, Connector *le, Connector *re)
+{
+	Disjunct * d;
+	int left_valid, right_valid, found;
+	int i, start_word, end_word;
+	int w;
+	Match_node * m, *m1;
+	count_context_t *ctxt = sent->count_ctxt;
+	i = table_lookup(sent, lw, rw, le, re, 0);
+	if (i >= 0) return i;
+	if ((le == NULL) && (re == NULL) && ctxt->deletable[lw][rw]) {
+		table_store(ctxt, lw, rw, le, re, 0, 1);
+		return 1;
+	}
+	if (le == NULL) {
+		start_word = lw+1;
+	} else {
+		start_word = le->word;
+	}
+	if (re == NULL) {
+		end_word = rw;
+	} else {
+		end_word = re->word + 1;
+	}
+	found = 0;
+	for (w=start_word; w < end_word; w++)
+	{
+		m1 = m = form_match_list(sent, w, le, lw, re, rw);
+		for (; m!=NULL; m=m->next)
+		{
+			d = m->d;
+			/* mark_cost++;*/
+			/* in the following expressions we use the fact that 0=FALSE. Could eliminate
+			   by always saying "region_valid(...) != 0"  */
+			left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) &&
+						  ((region_valid(sent, lw, w, le->next, d->left->next)) ||
+						   ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) ||
+						   ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) ||
+						   ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left))));
+			if (left_valid && region_valid(sent, w, rw, d->right, re)) {
+				found = 1;
+				break;
+			}
+			right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) &&
+						   ((region_valid(sent, w, rw, d->right->next,re->next))	||
+							((d->right->multi) && region_valid(sent, w,rw,d->right,re->next))  ||
+							((re->multi) && region_valid(sent, w, rw, d->right->next, re))  ||
+							((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re))));
+			if ((left_valid && right_valid) || (right_valid && region_valid(sent, lw, w, le, d->left))) {
+				found = 1;
+				break;
+			}
+		}
+		put_match_list(sent, m1);
+		if (found != 0) break;
+	}
+	table_store(ctxt, lw, rw, le, re, 0, found);
+	return found;
+}
+/**
+ * Mark as useful all disjuncts involved in some way to complete the
+ * structure within the current region.  Note that only disjuncts
+ * strictly between lw and rw will be marked.  If it so happens that
+ * this region itself is not valid, then this fact will be recorded
+ * in the table, and nothing else happens.
+ */
+static void mark_region(Sentence sent,
+                        int lw, int rw, Connector *le, Connector *re)
+{
+	Disjunct * d;
+	int left_valid, right_valid, i;
+	int start_word, end_word;
+	int w;
+	Match_node * m, *m1;
+	count_context_t *ctxt = sent->count_ctxt;
+	i = region_valid(sent, lw, rw, le, re);
+	if ((i==0) || (i==2)) return;
+	/* we only reach this point if it's a valid unmarked region, i=1 */
+	table_update(ctxt, lw, rw, le, re, 0, 2);
+	if ((le == NULL) && (re == NULL) && (ctxt->null_links) && (rw != 1+lw)) {
+		w = lw+1;
+		for (d = ctxt->local_sent[w].d; d != NULL; d = d->next) {
+			if ((d->left == NULL) && region_valid(sent, w, rw, d->right, NULL)) {
+				d->marked = TRUE;
+				mark_region(sent, w, rw, d->right, NULL);
+			}
+		}
+		mark_region(sent, w, rw, NULL, NULL);
+		return;
+	}
+	if (le == NULL) {
+		start_word = lw+1;
+	} else {
+		start_word = le->word;
+	}
+	if (re == NULL) {
+		end_word = rw;
+	} else {
+		end_word = re->word + 1;
+	}
+	for (w=start_word; w < end_word; w++)
+	{
+		m1 = m = form_match_list(sent, w, le, lw, re, rw);
+		for (; m!=NULL; m=m->next)
+		{
+			d = m->d;
+			/* mark_cost++;*/
+			left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) &&
+						  ((region_valid(sent, lw, w, le->next, d->left->next)) ||
+						   ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) ||
+						   ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) ||
+						   ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left))));
+			right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) &&
+						   ((region_valid(sent, w, rw, d->right->next,re->next)) ||
+							((d->right->multi) && region_valid(sent, w,rw,d->right,re->next))  ||
+							((re->multi) && region_valid(sent, w, rw, d->right->next, re)) ||
+							((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re))));
+			/* The following if statements could be restructured to avoid superfluous calls
+			   to mark_region.  It didn't seem a high priority, so I didn't optimize this.
+			   */
+			if (left_valid && region_valid(sent, w, rw, d->right, re))
+			{
+				d->marked = TRUE;
+				mark_region(sent, w, rw, d->right, re);
+				mark_region(sent, lw, w, le->next, d->left->next);
+				if (le->multi) mark_region(sent, lw, w, le, d->left->next);
+				if (d->left->multi) mark_region(sent, lw, w, le->next, d->left);
+				if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left);
+			}
+			if (right_valid && region_valid(sent, lw, w, le, d->left))
+			{
+				d->marked = TRUE;
+				mark_region(sent, lw, w, le, d->left);
+				mark_region(sent, w, rw, d->right->next,re->next);
+				if (d->right->multi) mark_region(sent, w,rw,d->right,re->next);
+				if (re->multi) mark_region(sent, w, rw, d->right->next, re);
+				if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re);
+			}
+			if (left_valid && right_valid)
+			{
+				d->marked = TRUE;
+				mark_region(sent, lw, w, le->next, d->left->next);
+				if (le->multi) mark_region(sent, lw, w, le, d->left->next);
+				if (d->left->multi) mark_region(sent, lw, w, le->next, d->left);
+				if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left);
+				mark_region(sent, w, rw, d->right->next,re->next);
+				if (d->right->multi) mark_region(sent, w,rw,d->right,re->next);
+				if (re->multi) mark_region(sent, w, rw, d->right->next, re);
+				if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re);
+			}
+		}
+		put_match_list(sent, m1);
+	}
+}
+void delete_unmarked_disjuncts(Sentence sent)
+{
+	int w;
+	Disjunct *d_head, *d, *dx;
+	for (w=0; w<sent->length; w++) {
+		d_head = NULL;
+		for (d=sent->word[w].d; d != NULL; d=dx) {
+			dx = d->next;
+			if (d->marked) {
+				d->next = d_head;
+				d_head = d;
+			} else {
+				d->next = NULL;
+				free_disjuncts(d);
+			}
+		}
+		sent->word[w].d = d_head;
+	}
+}
+/**
+ * We've already built the sentence disjuncts, and we've pruned them
+ * and power_pruned(GENTLE) them also.  The sentence contains a
+ * conjunction.  deletable[][] has been initialized to indicate the
+ * ranges which may be deleted in the final linkage.
+ *
+ * This routine deletes irrelevant disjuncts.  It finds them by first
+ * marking them all as irrelevant, and then marking the ones that
+ * might be useable.  Finally, the unmarked ones are removed.
+ */
+void conjunction_prune(Sentence sent, Parse_Options opts)
+{
+	Disjunct * d;
+	int w;
+	count_context_t *ctxt = sent->count_ctxt;
+	ctxt->current_resources = opts->resources;
+	ctxt->deletable = sent->deletable;
+	count_set_effective_distance(sent);
+	/* We begin by unmarking all disjuncts.  This would not be necessary if
+	   whenever we created a disjunct we cleared its marked field.
+	   I didn't want to search the program for all such places, so
+	   I did this way. XXX FIXME, someday ...
+	   */
+	for (w=0; w<sent->length; w++) {
+		for (d=sent->word[w].d; d != NULL; d=d->next) {
+			d->marked = FALSE;
+		}
+	}
+	init_fast_matcher(sent);
+	ctxt->local_sent = sent->word;
+	ctxt->null_links = (opts->min_null_count > 0);
+	/*
+	for (d = sent->word[0].d; d != NULL; d = d->next) {
+		if ((d->left == NULL) && region_valid(sent, 0, sent->length, d->right, NULL)) {
+			mark_region(sent, 0, sent->length, d->right, NULL);
+			d->marked = TRUE;
+		}
+	}
+	mark_region(sent, 0, sent->length, NULL, NULL);
+	*/
+	if (ctxt->null_links) {
+		mark_region(sent, -1, sent->length, NULL, NULL);
+	} else {
+		for (w=0; w<sent->length; w++) {
+		  /* consider removing the words [0,w-1] from the beginning
+			 of the sentence */
+			if (ctxt->deletable[-1][w]) {
+				for (d = sent->word[w].d; d != NULL; d = d->next) {
+					if ((d->left == NULL) && region_valid(sent, w, sent->length, d->right, NULL)) {
+						mark_region(sent, w, sent->length, d->right, NULL);
+						d->marked = TRUE;
+					}
+				}
+			}
+		}
+	}
+	delete_unmarked_disjuncts(sent);
+	free_fast_matcher(sent);
+	ctxt->local_sent = NULL;
+	ctxt->current_resources = NULL;
+	ctxt->deletable = NULL;
+	count_unset_effective_distance(sent);
+}
+void init_count(Sentence sent)
+{
+	if (NULL == sent->count_ctxt)
+		sent->count_ctxt = (count_context_t *) malloc (sizeof(count_context_t));
+	memset(sent->count_ctxt, 0, sizeof(count_context_t));
+	init_table(sent);
+}
+void free_count(Sentence sent)
+{
+	if (NULL == sent->count_ctxt) return;
+	free_table(sent->count_ctxt);
+	free(sent->count_ctxt);
+	sent->count_ctxt = NULL;
+}