grammar_police 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/c/.DS_Store +0 -0
- data/c/link-grammar.c +65 -0
- data/c/link-grammar.h +60 -0
- data/c/link-grammar.o +0 -0
- data/c/link-grammar.so +0 -0
- data/c/link-grammar/.DS_Store +0 -0
- data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/c/link-grammar/.deps/and.Plo +202 -0
- data/c/link-grammar/.deps/api.Plo +244 -0
- data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/c/link-grammar/.deps/command-line.Plo +201 -0
- data/c/link-grammar/.deps/constituents.Plo +201 -0
- data/c/link-grammar/.deps/count.Plo +202 -0
- data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/c/link-grammar/.deps/disjuncts.Plo +123 -0
- data/c/link-grammar/.deps/error.Plo +121 -0
- data/c/link-grammar/.deps/expand.Plo +133 -0
- data/c/link-grammar/.deps/extract-links.Plo +198 -0
- data/c/link-grammar/.deps/fast-match.Plo +200 -0
- data/c/link-grammar/.deps/idiom.Plo +200 -0
- data/c/link-grammar/.deps/jni-client.Plo +217 -0
- data/c/link-grammar/.deps/link-parser.Po +1 -0
- data/c/link-grammar/.deps/massage.Plo +202 -0
- data/c/link-grammar/.deps/post-process.Plo +202 -0
- data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/c/link-grammar/.deps/prefix.Plo +102 -0
- data/c/link-grammar/.deps/preparation.Plo +202 -0
- data/c/link-grammar/.deps/print-util.Plo +200 -0
- data/c/link-grammar/.deps/print.Plo +201 -0
- data/c/link-grammar/.deps/prune.Plo +202 -0
- data/c/link-grammar/.deps/read-dict.Plo +223 -0
- data/c/link-grammar/.deps/read-regex.Plo +123 -0
- data/c/link-grammar/.deps/regex-morph.Plo +131 -0
- data/c/link-grammar/.deps/resources.Plo +203 -0
- data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/c/link-grammar/.deps/string-set.Plo +198 -0
- data/c/link-grammar/.deps/tokenize.Plo +160 -0
- data/c/link-grammar/.deps/utilities.Plo +222 -0
- data/c/link-grammar/.deps/word-file.Plo +201 -0
- data/c/link-grammar/.deps/word-utils.Plo +212 -0
- data/c/link-grammar/.libs/analyze-linkage.o +0 -0
- data/c/link-grammar/.libs/and.o +0 -0
- data/c/link-grammar/.libs/api.o +0 -0
- data/c/link-grammar/.libs/build-disjuncts.o +0 -0
- data/c/link-grammar/.libs/command-line.o +0 -0
- data/c/link-grammar/.libs/constituents.o +0 -0
- data/c/link-grammar/.libs/count.o +0 -0
- data/c/link-grammar/.libs/disjunct-utils.o +0 -0
- data/c/link-grammar/.libs/disjuncts.o +0 -0
- data/c/link-grammar/.libs/error.o +0 -0
- data/c/link-grammar/.libs/expand.o +0 -0
- data/c/link-grammar/.libs/extract-links.o +0 -0
- data/c/link-grammar/.libs/fast-match.o +0 -0
- data/c/link-grammar/.libs/idiom.o +0 -0
- data/c/link-grammar/.libs/jni-client.o +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.la +41 -0
- data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/c/link-grammar/.libs/massage.o +0 -0
- data/c/link-grammar/.libs/post-process.o +0 -0
- data/c/link-grammar/.libs/pp_knowledge.o +0 -0
- data/c/link-grammar/.libs/pp_lexer.o +0 -0
- data/c/link-grammar/.libs/pp_linkset.o +0 -0
- data/c/link-grammar/.libs/prefix.o +0 -0
- data/c/link-grammar/.libs/preparation.o +0 -0
- data/c/link-grammar/.libs/print-util.o +0 -0
- data/c/link-grammar/.libs/print.o +0 -0
- data/c/link-grammar/.libs/prune.o +0 -0
- data/c/link-grammar/.libs/read-dict.o +0 -0
- data/c/link-grammar/.libs/read-regex.o +0 -0
- data/c/link-grammar/.libs/regex-morph.o +0 -0
- data/c/link-grammar/.libs/resources.o +0 -0
- data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/c/link-grammar/.libs/string-set.o +0 -0
- data/c/link-grammar/.libs/tokenize.o +0 -0
- data/c/link-grammar/.libs/utilities.o +0 -0
- data/c/link-grammar/.libs/word-file.o +0 -0
- data/c/link-grammar/.libs/word-utils.o +0 -0
- data/c/link-grammar/Makefile +900 -0
- data/c/link-grammar/Makefile.am +202 -0
- data/c/link-grammar/Makefile.in +900 -0
- data/c/link-grammar/analyze-linkage.c +1317 -0
- data/c/link-grammar/analyze-linkage.h +24 -0
- data/c/link-grammar/and.c +1603 -0
- data/c/link-grammar/and.h +27 -0
- data/c/link-grammar/api-structures.h +362 -0
- data/c/link-grammar/api-types.h +72 -0
- data/c/link-grammar/api.c +1887 -0
- data/c/link-grammar/api.h +96 -0
- data/c/link-grammar/autoit/.DS_Store +0 -0
- data/c/link-grammar/autoit/README +10 -0
- data/c/link-grammar/autoit/_LGTest.au3 +22 -0
- data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/c/link-grammar/build-disjuncts.c +487 -0
- data/c/link-grammar/build-disjuncts.h +21 -0
- data/c/link-grammar/command-line.c +458 -0
- data/c/link-grammar/command-line.h +15 -0
- data/c/link-grammar/constituents.c +1836 -0
- data/c/link-grammar/constituents.h +26 -0
- data/c/link-grammar/corpus/.DS_Store +0 -0
- data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/c/link-grammar/corpus/Makefile +527 -0
- data/c/link-grammar/corpus/Makefile.am +46 -0
- data/c/link-grammar/corpus/Makefile.in +527 -0
- data/c/link-grammar/corpus/README +17 -0
- data/c/link-grammar/corpus/cluster.c +286 -0
- data/c/link-grammar/corpus/cluster.h +32 -0
- data/c/link-grammar/corpus/corpus.c +483 -0
- data/c/link-grammar/corpus/corpus.h +46 -0
- data/c/link-grammar/count.c +828 -0
- data/c/link-grammar/count.h +25 -0
- data/c/link-grammar/disjunct-utils.c +261 -0
- data/c/link-grammar/disjunct-utils.h +27 -0
- data/c/link-grammar/disjuncts.c +138 -0
- data/c/link-grammar/disjuncts.h +13 -0
- data/c/link-grammar/error.c +92 -0
- data/c/link-grammar/error.h +35 -0
- data/c/link-grammar/expand.c +67 -0
- data/c/link-grammar/expand.h +13 -0
- data/c/link-grammar/externs.h +22 -0
- data/c/link-grammar/extract-links.c +625 -0
- data/c/link-grammar/extract-links.h +16 -0
- data/c/link-grammar/fast-match.c +309 -0
- data/c/link-grammar/fast-match.h +17 -0
- data/c/link-grammar/idiom.c +373 -0
- data/c/link-grammar/idiom.h +15 -0
- data/c/link-grammar/jni-client.c +779 -0
- data/c/link-grammar/jni-client.h +236 -0
- data/c/link-grammar/liblink-grammar-java.la +42 -0
- data/c/link-grammar/liblink-grammar.la +41 -0
- data/c/link-grammar/link-features.h +37 -0
- data/c/link-grammar/link-features.h.in +37 -0
- data/c/link-grammar/link-grammar-java.def +31 -0
- data/c/link-grammar/link-grammar.def +194 -0
- data/c/link-grammar/link-includes.h +465 -0
- data/c/link-grammar/link-parser.c +849 -0
- data/c/link-grammar/massage.c +329 -0
- data/c/link-grammar/massage.h +13 -0
- data/c/link-grammar/post-process.c +1113 -0
- data/c/link-grammar/post-process.h +45 -0
- data/c/link-grammar/pp_knowledge.c +376 -0
- data/c/link-grammar/pp_knowledge.h +14 -0
- data/c/link-grammar/pp_lexer.c +1920 -0
- data/c/link-grammar/pp_lexer.h +19 -0
- data/c/link-grammar/pp_linkset.c +158 -0
- data/c/link-grammar/pp_linkset.h +20 -0
- data/c/link-grammar/prefix.c +482 -0
- data/c/link-grammar/prefix.h +139 -0
- data/c/link-grammar/preparation.c +412 -0
- data/c/link-grammar/preparation.h +20 -0
- data/c/link-grammar/print-util.c +87 -0
- data/c/link-grammar/print-util.h +32 -0
- data/c/link-grammar/print.c +1085 -0
- data/c/link-grammar/print.h +16 -0
- data/c/link-grammar/prune.c +1864 -0
- data/c/link-grammar/prune.h +17 -0
- data/c/link-grammar/read-dict.c +1785 -0
- data/c/link-grammar/read-dict.h +29 -0
- data/c/link-grammar/read-regex.c +161 -0
- data/c/link-grammar/read-regex.h +12 -0
- data/c/link-grammar/regex-morph.c +126 -0
- data/c/link-grammar/regex-morph.h +17 -0
- data/c/link-grammar/resources.c +180 -0
- data/c/link-grammar/resources.h +23 -0
- data/c/link-grammar/sat-solver/.DS_Store +0 -0
- data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/c/link-grammar/sat-solver/Makefile +527 -0
- data/c/link-grammar/sat-solver/Makefile.am +29 -0
- data/c/link-grammar/sat-solver/Makefile.in +527 -0
- data/c/link-grammar/sat-solver/clock.hpp +33 -0
- data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/c/link-grammar/sat-solver/guiding.hpp +244 -0
- data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/c/link-grammar/sat-solver/trie.hpp +118 -0
- data/c/link-grammar/sat-solver/util.cpp +23 -0
- data/c/link-grammar/sat-solver/util.hpp +14 -0
- data/c/link-grammar/sat-solver/variables.cpp +5 -0
- data/c/link-grammar/sat-solver/variables.hpp +829 -0
- data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/c/link-grammar/spellcheck-aspell.c +148 -0
- data/c/link-grammar/spellcheck-hun.c +136 -0
- data/c/link-grammar/spellcheck.h +34 -0
- data/c/link-grammar/string-set.c +169 -0
- data/c/link-grammar/string-set.h +16 -0
- data/c/link-grammar/structures.h +498 -0
- data/c/link-grammar/tokenize.c +1049 -0
- data/c/link-grammar/tokenize.h +15 -0
- data/c/link-grammar/utilities.c +847 -0
- data/c/link-grammar/utilities.h +281 -0
- data/c/link-grammar/word-file.c +124 -0
- data/c/link-grammar/word-file.h +15 -0
- data/c/link-grammar/word-utils.c +526 -0
- data/c/link-grammar/word-utils.h +152 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/grammar_police.gemspec +23 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_police.rb +11 -0
- data/lib/grammar_police/.DS_Store +0 -0
- data/lib/grammar_police/dictionary.rb +30 -0
- data/lib/grammar_police/linkage.rb +26 -0
- data/lib/grammar_police/parse_options.rb +32 -0
- data/lib/grammar_police/sentence.rb +44 -0
- data/lib/grammar_police/version.rb +3 -0
- data/tests/.DS_Store +0 -0
- data/tests/count_linkages.rb +29 -0
- data/tests/sentences.txt +86 -0
- metadata +408 -0
@@ -0,0 +1,13 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2008, 2009 Linas Vepstas */
|
3
|
+
/* All rights reserved */
|
4
|
+
/* */
|
5
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
6
|
+
/* license set forth in the LICENSE file included with this software, */
|
7
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
8
|
+
/* This license allows free redistribution and use in source and binary */
|
9
|
+
/* forms, with or without modification, subject to certain conditions. */
|
10
|
+
/* */
|
11
|
+
/*************************************************************************/
|
12
|
+
|
13
|
+
void lg_compute_disjunct_strings(Sentence, Linkage_info *);
|
@@ -0,0 +1,92 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#include <stdio.h>
|
15
|
+
#include <stdlib.h>
|
16
|
+
#include <string.h>
|
17
|
+
#include <stdarg.h>
|
18
|
+
|
19
|
+
#ifdef USE_PTHREADS
|
20
|
+
#include <pthread.h>
|
21
|
+
#endif
|
22
|
+
|
23
|
+
#include "error.h"
|
24
|
+
#include "structures.h"
|
25
|
+
#include "api-structures.h"
|
26
|
+
|
27
|
+
#ifdef _MSC_VER
|
28
|
+
#define DLLEXPORT __declspec(dllexport)
|
29
|
+
#else
|
30
|
+
#define DLLEXPORT
|
31
|
+
#endif
|
32
|
+
|
33
|
+
/* ============================================================ */
|
34
|
+
|
35
|
+
static void verr_msg(err_ctxt *ec, severity sev, const char *fmt, va_list args)
|
36
|
+
{
|
37
|
+
fprintf(stderr, "link-grammar: ");
|
38
|
+
vfprintf(stderr, fmt, args);
|
39
|
+
|
40
|
+
if ((Info != sev) && ec->sent != NULL)
|
41
|
+
{
|
42
|
+
int i;
|
43
|
+
fprintf(stderr, "\tFailing sentence was:\n\t");
|
44
|
+
for (i=0; i<ec->sent->length; i++)
|
45
|
+
{
|
46
|
+
fprintf(stderr, "%s ", ec->sent->word[i].string);
|
47
|
+
}
|
48
|
+
fprintf(stderr, "\n");
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
void err_msg(err_ctxt *ec, severity sev, const char *fmt, ...)
|
53
|
+
{
|
54
|
+
va_list args;
|
55
|
+
va_start(args, fmt);
|
56
|
+
verr_msg(ec, sev, fmt, args);
|
57
|
+
va_end(args);
|
58
|
+
}
|
59
|
+
|
60
|
+
void prt_error(const char *fmt, ...)
|
61
|
+
{
|
62
|
+
severity sev;
|
63
|
+
err_ctxt ec;
|
64
|
+
va_list args;
|
65
|
+
|
66
|
+
sev = Error;
|
67
|
+
if (0 == strncmp(fmt, "Fatal", 5)) sev = Fatal;
|
68
|
+
if (0 == strncmp(fmt, "Error:", 6)) sev = Error;
|
69
|
+
if (0 == strncmp(fmt, "Warn", 4)) sev = Warn;
|
70
|
+
if (0 == strncmp(fmt, "Info:", 5)) sev = Info;
|
71
|
+
|
72
|
+
ec.sent = NULL;
|
73
|
+
va_start(args, fmt);
|
74
|
+
verr_msg(&ec, sev, fmt, args);
|
75
|
+
va_end(args);
|
76
|
+
}
|
77
|
+
|
78
|
+
/* ============================================================ */
|
79
|
+
/* These are deprecated, obsolete, and unused, but are still here
|
80
|
+
* because these are exported in the public API. Do not use these.
|
81
|
+
*/
|
82
|
+
DLLEXPORT int lperrno = 0;
|
83
|
+
DLLEXPORT char lperrmsg[1];
|
84
|
+
|
85
|
+
extern void lperror_clear(void);
|
86
|
+
void lperror_clear(void)
|
87
|
+
{
|
88
|
+
lperrmsg[0] = 0x0;
|
89
|
+
lperrno = 0;
|
90
|
+
}
|
91
|
+
|
92
|
+
/* ============================================================ */
|
@@ -0,0 +1,35 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
#ifndef _LINK_GRAMMAR_ERROR_H_
|
14
|
+
#define _LINK_GRAMMAR_ERROR_H_
|
15
|
+
|
16
|
+
#include "link-includes.h"
|
17
|
+
|
18
|
+
typedef struct
|
19
|
+
{
|
20
|
+
Sentence sent;
|
21
|
+
} err_ctxt;
|
22
|
+
|
23
|
+
typedef enum
|
24
|
+
{
|
25
|
+
Fatal = 1,
|
26
|
+
Error,
|
27
|
+
Warn,
|
28
|
+
Info,
|
29
|
+
Debug
|
30
|
+
} severity;
|
31
|
+
|
32
|
+
void err_msg(err_ctxt *, severity, const char *fmt, ...) GNUC_PRINTF(3,4);
|
33
|
+
|
34
|
+
#endif
|
35
|
+
|
@@ -0,0 +1,67 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2009 Linas Vepstas */
|
3
|
+
/* All rights reserved */
|
4
|
+
/* */
|
5
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
6
|
+
/* license set forth in the LICENSE file included with this software, */
|
7
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
8
|
+
/* This license allows free redistribution and use in source and binary */
|
9
|
+
/* forms, with or without modification, subject to certain conditions. */
|
10
|
+
/* */
|
11
|
+
/*************************************************************************/
|
12
|
+
/*
|
13
|
+
* expand.c
|
14
|
+
*
|
15
|
+
* Enlarge the range of possible disjunct to consider while parsing.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#include "api-structures.h"
|
19
|
+
#include "expand.h"
|
20
|
+
#include "disjunct-utils.h"
|
21
|
+
#include "word-utils.h"
|
22
|
+
#include "corpus/cluster.h"
|
23
|
+
|
24
|
+
/* ========================================================= */
|
25
|
+
|
26
|
+
static Disjunct * build_expansion_disjuncts(Cluster *clu, X_node *x)
|
27
|
+
{
|
28
|
+
Disjunct *dj;
|
29
|
+
dj = lg_cluster_get_disjuncts(clu, x->string);
|
30
|
+
if (dj) printf("Expanded %s \n", x->string);
|
31
|
+
return dj;
|
32
|
+
}
|
33
|
+
|
34
|
+
/**
|
35
|
+
* Increase the number of disjuncts associated to each word in the
|
36
|
+
* sentence by working with word-clusters. Return true if the number
|
37
|
+
* of disjuncts were expanded, else return false.
|
38
|
+
*/
|
39
|
+
int lg_expand_disjunct_list(Sentence sent)
|
40
|
+
{
|
41
|
+
int w;
|
42
|
+
|
43
|
+
Cluster *clu = lg_cluster_new();
|
44
|
+
|
45
|
+
int expanded = FALSE;
|
46
|
+
for (w = 0; w < sent->length; w++)
|
47
|
+
{
|
48
|
+
X_node * x;
|
49
|
+
Disjunct * d = sent->word[w].d;
|
50
|
+
for (x = sent->word[w].x; x != NULL; x = x->next)
|
51
|
+
{
|
52
|
+
Disjunct *dx = build_expansion_disjuncts(clu, x);
|
53
|
+
if (dx)
|
54
|
+
{
|
55
|
+
int cnt = count_disjuncts(d);
|
56
|
+
d = catenate_disjuncts(dx, d);
|
57
|
+
d = eliminate_duplicate_disjuncts(d);
|
58
|
+
if (cnt < count_disjuncts(d)) expanded = TRUE;
|
59
|
+
}
|
60
|
+
}
|
61
|
+
sent->word[w].d = d;
|
62
|
+
}
|
63
|
+
lg_cluster_delete(clu);
|
64
|
+
|
65
|
+
return expanded;
|
66
|
+
}
|
67
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2009 Linas Vepstas */
|
3
|
+
/* All rights reserved */
|
4
|
+
/* */
|
5
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
6
|
+
/* license set forth in the LICENSE file included with this software, */
|
7
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
8
|
+
/* This license allows free redistribution and use in source and binary */
|
9
|
+
/* forms, with or without modification, subject to certain conditions. */
|
10
|
+
/* */
|
11
|
+
/*************************************************************************/
|
12
|
+
|
13
|
+
int lg_expand_disjunct_list(Sentence sent);
|
@@ -0,0 +1,22 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
/* verbosity global is held in utilities.c */
|
15
|
+
extern int verbosity; /* the verbosity level for error messages */
|
16
|
+
|
17
|
+
/* size of random table for computing the
|
18
|
+
hash functions. must be a power of 2 */
|
19
|
+
#define RTSIZE 256
|
20
|
+
|
21
|
+
extern unsigned int randtable[RTSIZE]; /* random table for hashing */
|
22
|
+
|
@@ -0,0 +1,625 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* Copyright (c) 2010 Linas Vepstas */
|
5
|
+
/* All rights reserved */
|
6
|
+
/* */
|
7
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
8
|
+
/* license set forth in the LICENSE file included with this software, */
|
9
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
10
|
+
/* This license allows free redistribution and use in source and binary */
|
11
|
+
/* forms, with or without modification, subject to certain conditions. */
|
12
|
+
/* */
|
13
|
+
/*************************************************************************/
|
14
|
+
|
15
|
+
#include "api.h"
|
16
|
+
|
17
|
+
/**
|
18
|
+
* The first thing we do is we build a data structure to represent the
|
19
|
+
* result of the entire parse search. There will be a set of nodes
|
20
|
+
* built for each call to the count() function that returned a non-zero
|
21
|
+
* value, AND which is part of a valid linkage. Each of these nodes
|
22
|
+
* represents a valid continuation, and contains pointers to two other
|
23
|
+
* sets (one for the left continuation and one for the right
|
24
|
+
* continuation).
|
25
|
+
*/
|
26
|
+
|
27
|
+
static Parse_set * dummy_set(void)
|
28
|
+
{
|
29
|
+
static Parse_set ds;
|
30
|
+
ds.first = ds.current = NULL;
|
31
|
+
ds.count = 1;
|
32
|
+
return &ds;
|
33
|
+
}
|
34
|
+
|
35
|
+
/** Returns an empty set of parses */
|
36
|
+
static Parse_set * empty_set(void)
|
37
|
+
{
|
38
|
+
Parse_set *s;
|
39
|
+
s = (Parse_set *) xalloc(sizeof(Parse_set));
|
40
|
+
s->first = s->current = NULL;
|
41
|
+
s->count = 0;
|
42
|
+
return s;
|
43
|
+
}
|
44
|
+
|
45
|
+
static void free_set(Parse_set *s)
|
46
|
+
{
|
47
|
+
Parse_choice *p, *xp;
|
48
|
+
if (s == NULL) return;
|
49
|
+
for (p=s->first; p != NULL; p = xp) {
|
50
|
+
xp = p->next;
|
51
|
+
xfree((void *)p, sizeof(*p));
|
52
|
+
}
|
53
|
+
xfree((void *)s, sizeof(*s));
|
54
|
+
}
|
55
|
+
|
56
|
+
static Parse_choice * make_choice(Parse_set *lset, int llw, int lrw, Connector * llc, Connector * lrc,
|
57
|
+
Parse_set *rset, int rlw, int rrw, Connector * rlc, Connector * rrc,
|
58
|
+
Disjunct *ld, Disjunct *md, Disjunct *rd)
|
59
|
+
{
|
60
|
+
Parse_choice *pc;
|
61
|
+
pc = (Parse_choice *) xalloc(sizeof(*pc));
|
62
|
+
pc->next = NULL;
|
63
|
+
pc->set[0] = lset;
|
64
|
+
pc->link[0].l = llw;
|
65
|
+
pc->link[0].r = lrw;
|
66
|
+
pc->link[0].lc = llc;
|
67
|
+
pc->link[0].rc = lrc;
|
68
|
+
pc->set[1] = rset;
|
69
|
+
pc->link[1].l = rlw;
|
70
|
+
pc->link[1].r = rrw;
|
71
|
+
pc->link[1].lc = rlc;
|
72
|
+
pc->link[1].rc = rrc;
|
73
|
+
pc->ld = ld;
|
74
|
+
pc->md = md;
|
75
|
+
pc->rd = rd;
|
76
|
+
return pc;
|
77
|
+
}
|
78
|
+
|
79
|
+
/**
|
80
|
+
* Put this parse_choice into a given set. The current pointer is always
|
81
|
+
* left pointing to the end of the list.
|
82
|
+
*/
|
83
|
+
static void put_choice_in_set(Parse_set *s, Parse_choice *pc)
|
84
|
+
{
|
85
|
+
if (s->first == NULL)
|
86
|
+
{
|
87
|
+
s->first = pc;
|
88
|
+
}
|
89
|
+
else
|
90
|
+
{
|
91
|
+
s->current->next = pc;
|
92
|
+
}
|
93
|
+
s->current = pc;
|
94
|
+
pc->next = NULL;
|
95
|
+
}
|
96
|
+
|
97
|
+
/**
|
98
|
+
* Allocate the parse info struct
|
99
|
+
*
|
100
|
+
* A piecewise exponential function determines the size of the hash
|
101
|
+
* table. Probably should make use of the actual number of disjuncts,
|
102
|
+
* rather than just the number of words.
|
103
|
+
*/
|
104
|
+
Parse_info parse_info_new(int nwords)
|
105
|
+
{
|
106
|
+
int log2_table_size;
|
107
|
+
Parse_info pi;
|
108
|
+
|
109
|
+
pi = (Parse_info) xalloc(sizeof(struct Parse_info_struct));
|
110
|
+
memset(pi, 0, sizeof(struct Parse_info_struct));
|
111
|
+
pi->N_words = nwords;
|
112
|
+
pi->parse_set = NULL;
|
113
|
+
|
114
|
+
pi->chosen_disjuncts = (Disjunct **) xalloc(nwords * sizeof(Disjunct *));
|
115
|
+
memset(pi->chosen_disjuncts, 0, nwords * sizeof(Disjunct *));
|
116
|
+
|
117
|
+
pi->image_array = (Image_node **) xalloc(nwords * sizeof(Image_node *));
|
118
|
+
memset(pi->image_array, 0, nwords * sizeof(Image_node *));
|
119
|
+
|
120
|
+
pi->has_fat_down = (char *) xalloc(nwords * sizeof(Boolean));
|
121
|
+
memset(pi->has_fat_down, 0, nwords * sizeof(Boolean));
|
122
|
+
|
123
|
+
/* Alloc the x_table */
|
124
|
+
if (nwords >= 10) {
|
125
|
+
log2_table_size = 14;
|
126
|
+
} else if (nwords >= 4) {
|
127
|
+
log2_table_size = nwords;
|
128
|
+
} else {
|
129
|
+
log2_table_size = 4;
|
130
|
+
}
|
131
|
+
pi->log2_x_table_size = log2_table_size;
|
132
|
+
pi->x_table_size = (1 << log2_table_size);
|
133
|
+
|
134
|
+
/*printf("Allocating x_table of size %d\n", x_table_size);*/
|
135
|
+
pi->x_table = (X_table_connector**) xalloc(pi->x_table_size * sizeof(X_table_connector*));
|
136
|
+
memset(pi->x_table, 0, pi->x_table_size * sizeof(X_table_connector*));
|
137
|
+
|
138
|
+
return pi;
|
139
|
+
}
|
140
|
+
|
141
|
+
/**
|
142
|
+
* This is the function that should be used to free the set structure. Since
|
143
|
+
* it's a dag, a recursive free function won't work. Every time we create
|
144
|
+
* a set element, we put it in the hash table, so this is OK.
|
145
|
+
*/
|
146
|
+
void free_parse_info(Parse_info pi)
|
147
|
+
{
|
148
|
+
int i, len;
|
149
|
+
X_table_connector *t, *x;
|
150
|
+
|
151
|
+
len = pi->N_words;
|
152
|
+
xfree(pi->chosen_disjuncts, len * sizeof(Disjunct *));
|
153
|
+
xfree(pi->image_array, len * sizeof(Image_node*));
|
154
|
+
xfree(pi->has_fat_down, len * sizeof(Boolean));
|
155
|
+
|
156
|
+
for (i=0; i<pi->x_table_size; i++)
|
157
|
+
{
|
158
|
+
for(t = pi->x_table[i]; t!= NULL; t=x)
|
159
|
+
{
|
160
|
+
x = t->next;
|
161
|
+
free_set(t->set);
|
162
|
+
xfree((void *) t, sizeof(X_table_connector));
|
163
|
+
}
|
164
|
+
}
|
165
|
+
pi->parse_set = NULL;
|
166
|
+
|
167
|
+
/*printf("Freeing x_table of size %d\n", x_table_size);*/
|
168
|
+
xfree((void *) pi->x_table, pi->x_table_size * sizeof(X_table_connector*));
|
169
|
+
pi->x_table_size = 0;
|
170
|
+
pi->x_table = NULL;
|
171
|
+
|
172
|
+
xfree((void *) pi, sizeof(struct Parse_info_struct));
|
173
|
+
}
|
174
|
+
|
175
|
+
/**
|
176
|
+
* Returns the pointer to this info, NULL if not there.
|
177
|
+
*/
|
178
|
+
static X_table_connector * x_table_pointer(int lw, int rw, Connector *le, Connector *re,
|
179
|
+
int cost, Parse_info pi)
|
180
|
+
{
|
181
|
+
X_table_connector *t;
|
182
|
+
t = pi->x_table[pair_hash(pi->log2_x_table_size, lw, rw, le, re, cost)];
|
183
|
+
for (; t != NULL; t = t->next) {
|
184
|
+
if ((t->lw == lw) && (t->rw == rw) && (t->le == le) && (t->re == re) && (t->cost == cost)) return t;
|
185
|
+
}
|
186
|
+
return NULL;
|
187
|
+
}
|
188
|
+
|
189
|
+
#if DEAD_CODE
|
190
|
+
Parse_set * x_table_lookup(int lw, int rw, Connector *le, Connector *re,
|
191
|
+
int cost, Parse_info pi) {
|
192
|
+
/* returns the count for this quintuple if there, -1 otherwise */
|
193
|
+
X_table_connector *t = x_table_pointer(lw, rw, le, re, cost, pi);
|
194
|
+
|
195
|
+
if (t == NULL) return -1; else return t->set;
|
196
|
+
}
|
197
|
+
#endif
|
198
|
+
|
199
|
+
/**
|
200
|
+
* Stores the value in the x_table. Assumes it's not already there.
|
201
|
+
*/
|
202
|
+
static X_table_connector * x_table_store(int lw, int rw, Connector *le, Connector *re,
|
203
|
+
int cost, Parse_set * set, Parse_info pi)
|
204
|
+
{
|
205
|
+
X_table_connector *t, *n;
|
206
|
+
int h;
|
207
|
+
|
208
|
+
n = (X_table_connector *) xalloc(sizeof(X_table_connector));
|
209
|
+
n->set = set;
|
210
|
+
n->lw = lw; n->rw = rw; n->le = le; n->re = re; n->cost = cost;
|
211
|
+
h = pair_hash(pi->log2_x_table_size, lw, rw, le, re, cost);
|
212
|
+
t = pi->x_table[h];
|
213
|
+
n->next = t;
|
214
|
+
pi->x_table[h] = n;
|
215
|
+
return n;
|
216
|
+
}
|
217
|
+
|
218
|
+
#ifdef UNUSED_FUNCTION
|
219
|
+
static void x_table_update(int lw, int rw, Connector *le, Connector *re,
|
220
|
+
int cost, Parse_set * set, Parse_info pi) {
|
221
|
+
/* Stores the value in the x_table. Unlike x_table_store, it assumes it's already there */
|
222
|
+
X_table_connector *t = x_table_pointer(lw, rw, le, re, cost, pi);
|
223
|
+
|
224
|
+
assert(t != NULL, "This entry is supposed to be in the x_table.");
|
225
|
+
t->set = set;
|
226
|
+
}
|
227
|
+
#endif
|
228
|
+
|
229
|
+
|
230
|
+
/**
|
231
|
+
* returns NULL if there are no ways to parse, or returns a pointer
|
232
|
+
* to a set structure representing all the ways to parse.
|
233
|
+
*
|
234
|
+
* This code is similar to code in count.c
|
235
|
+
* (grep for end_word in these files).
|
236
|
+
*/
|
237
|
+
static Parse_set * parse_set(Sentence sent,
|
238
|
+
Disjunct *ld, Disjunct *rd, int lw, int rw,
|
239
|
+
Connector *le, Connector *re, int cost,
|
240
|
+
int islands_ok, Parse_info pi)
|
241
|
+
{
|
242
|
+
Disjunct * d, * dis;
|
243
|
+
int start_word, end_word, w;
|
244
|
+
int lcost, rcost, Lmatch, Rmatch;
|
245
|
+
int i, j;
|
246
|
+
Parse_set *ls[4], *rs[4], *lset, *rset;
|
247
|
+
Parse_choice * a_choice;
|
248
|
+
|
249
|
+
Match_node * m, *m1;
|
250
|
+
X_table_connector *xt;
|
251
|
+
s64 count;
|
252
|
+
|
253
|
+
assert(cost >= 0, "parse_set() called with cost < 0.");
|
254
|
+
|
255
|
+
count = table_lookup(sent, lw, rw, le, re, cost);
|
256
|
+
|
257
|
+
/*
|
258
|
+
assert(count >= 0, "parse_set() called on params that were not in the table.");
|
259
|
+
Actually, we can't assert this, because of the pseudocount technique that's
|
260
|
+
used in count(). It's not the case that every call to parse_set() has already
|
261
|
+
been put into the table.
|
262
|
+
*/
|
263
|
+
|
264
|
+
if ((count == 0) || (count == -1)) return NULL;
|
265
|
+
|
266
|
+
xt = x_table_pointer(lw, rw, le, re, cost, pi);
|
267
|
+
|
268
|
+
if (xt != NULL) return xt->set; /* we've already computed it */
|
269
|
+
|
270
|
+
/* Start it out with the empty set of options. */
|
271
|
+
/* This entry must be updated before we return. */
|
272
|
+
xt = x_table_store(lw, rw, le, re, cost, empty_set(), pi);
|
273
|
+
|
274
|
+
xt->set->count = count; /* the count we already computed */
|
275
|
+
/* this count is non-zero */
|
276
|
+
|
277
|
+
if (rw == 1 + lw) return xt->set;
|
278
|
+
|
279
|
+
if ((le == NULL) && (re == NULL))
|
280
|
+
{
|
281
|
+
if (!islands_ok && (lw != -1)) return xt->set;
|
282
|
+
|
283
|
+
if (cost == 0) return xt->set;
|
284
|
+
|
285
|
+
w = lw + 1;
|
286
|
+
for (dis = sent->word[w].d; dis != NULL; dis = dis->next)
|
287
|
+
{
|
288
|
+
if (dis->left == NULL)
|
289
|
+
{
|
290
|
+
rs[0] = parse_set(sent, dis, NULL, w, rw, dis->right,
|
291
|
+
NULL, cost-1, islands_ok, pi);
|
292
|
+
if (rs[0] == NULL) continue;
|
293
|
+
a_choice = make_choice(dummy_set(), lw, w, NULL, NULL,
|
294
|
+
rs[0], w, rw, NULL, NULL,
|
295
|
+
NULL, NULL, NULL);
|
296
|
+
put_choice_in_set(xt->set, a_choice);
|
297
|
+
}
|
298
|
+
}
|
299
|
+
rs[0] = parse_set(sent, NULL, NULL, w, rw, NULL, NULL,
|
300
|
+
cost-1, islands_ok, pi);
|
301
|
+
if (rs[0] != NULL)
|
302
|
+
{
|
303
|
+
a_choice = make_choice(dummy_set(), lw, w, NULL, NULL,
|
304
|
+
rs[0], w, rw, NULL, NULL,
|
305
|
+
NULL, NULL, NULL);
|
306
|
+
put_choice_in_set(xt->set, a_choice);
|
307
|
+
}
|
308
|
+
return xt->set;
|
309
|
+
}
|
310
|
+
|
311
|
+
if (le == NULL)
|
312
|
+
{
|
313
|
+
start_word = lw + 1;
|
314
|
+
}
|
315
|
+
else
|
316
|
+
{
|
317
|
+
start_word = le->word;
|
318
|
+
}
|
319
|
+
|
320
|
+
if (re == NULL)
|
321
|
+
{
|
322
|
+
end_word = rw;
|
323
|
+
}
|
324
|
+
else
|
325
|
+
{
|
326
|
+
end_word = re->word + 1;
|
327
|
+
}
|
328
|
+
|
329
|
+
for (w = start_word; w < end_word; w++)
|
330
|
+
{
|
331
|
+
m1 = m = form_match_list(sent, w, le, lw, re, rw);
|
332
|
+
for (; m!=NULL; m=m->next)
|
333
|
+
{
|
334
|
+
d = m->d;
|
335
|
+
for (lcost = 0; lcost <= cost; lcost++)
|
336
|
+
{
|
337
|
+
rcost = cost-lcost;
|
338
|
+
/* now lcost and rcost are the costs we're assigning to
|
339
|
+
* those parts respectively */
|
340
|
+
|
341
|
+
/* Now, we determine if (based on table only) we can see that
|
342
|
+
the current range is not parsable. */
|
343
|
+
|
344
|
+
Lmatch = (le != NULL) && (d->left != NULL) && do_match(sent, le, d->left, lw, w);
|
345
|
+
Rmatch = (d->right != NULL) && (re != NULL) && do_match(sent, d->right, re, w, rw);
|
346
|
+
for (i=0; i<4; i++) {ls[i] = rs[i] = NULL;}
|
347
|
+
if (Lmatch)
|
348
|
+
{
|
349
|
+
ls[0] = parse_set(sent, ld, d, lw, w, le->next, d->left->next, lcost, islands_ok, pi);
|
350
|
+
if (le->multi) ls[1] = parse_set(sent, ld, d, lw, w, le, d->left->next, lcost, islands_ok, pi);
|
351
|
+
if (d->left->multi) ls[2] = parse_set(sent, ld, d, lw, w, le->next, d->left, lcost, islands_ok, pi);
|
352
|
+
if (le->multi && d->left->multi) ls[3] = parse_set(sent, ld, d, lw, w, le, d->left, lcost, islands_ok, pi);
|
353
|
+
}
|
354
|
+
if (Rmatch)
|
355
|
+
{
|
356
|
+
rs[0] = parse_set(sent, d, rd, w, rw, d->right->next, re->next, rcost, islands_ok, pi);
|
357
|
+
if (d->right->multi) rs[1] = parse_set(sent, d, rd, w,rw,d->right,re->next, rcost, islands_ok, pi);
|
358
|
+
if (re->multi) rs[2] = parse_set(sent, d, rd, w, rw, d->right->next, re, rcost, islands_ok, pi);
|
359
|
+
if (d->right->multi && re->multi) rs[3] = parse_set(sent, d, rd, w, rw, d->right, re, rcost, islands_ok, pi);
|
360
|
+
}
|
361
|
+
|
362
|
+
for (i=0; i<4; i++)
|
363
|
+
{
|
364
|
+
/* this ordering is probably not consistent with that
|
365
|
+
* needed to use list_links */
|
366
|
+
if (ls[i] == NULL) continue;
|
367
|
+
for (j=0; j<4; j++)
|
368
|
+
{
|
369
|
+
if (rs[j] == NULL) continue;
|
370
|
+
a_choice = make_choice(ls[i], lw, w, le, d->left,
|
371
|
+
rs[j], w, rw, d->right, re,
|
372
|
+
ld, d, rd);
|
373
|
+
put_choice_in_set(xt->set, a_choice);
|
374
|
+
}
|
375
|
+
}
|
376
|
+
|
377
|
+
if (ls[0] != NULL || ls[1] != NULL || ls[2] != NULL || ls[3] != NULL)
|
378
|
+
{
|
379
|
+
/* evaluate using the left match, but not the right */
|
380
|
+
rset = parse_set(sent, d, rd, w, rw, d->right, re, rcost, islands_ok, pi);
|
381
|
+
if (rset != NULL)
|
382
|
+
{
|
383
|
+
for (i=0; i<4; i++)
|
384
|
+
{
|
385
|
+
if (ls[i] == NULL) continue;
|
386
|
+
/* this ordering is probably not consistent with
|
387
|
+
* that needed to use list_links */
|
388
|
+
a_choice = make_choice(ls[i], lw, w, le, d->left,
|
389
|
+
rset, w, rw, NULL /* d->right */,
|
390
|
+
re, /* the NULL indicates no link*/
|
391
|
+
ld, d, rd);
|
392
|
+
put_choice_in_set(xt->set, a_choice);
|
393
|
+
}
|
394
|
+
}
|
395
|
+
}
|
396
|
+
if ((le == NULL) && (rs[0] != NULL ||
|
397
|
+
rs[1] != NULL || rs[2] != NULL || rs[3] != NULL))
|
398
|
+
{
|
399
|
+
/* evaluate using the right match, but not the left */
|
400
|
+
lset = parse_set(sent, ld, d, lw, w, le, d->left, lcost, islands_ok, pi);
|
401
|
+
|
402
|
+
if (lset != NULL)
|
403
|
+
{
|
404
|
+
for (i=0; i<4; i++)
|
405
|
+
{
|
406
|
+
if (rs[i] == NULL) continue;
|
407
|
+
/* this ordering is probably not consistent with
|
408
|
+
* that needed to use list_links */
|
409
|
+
a_choice = make_choice(lset, lw, w, NULL /* le */,
|
410
|
+
d->left, /* NULL indicates no link */
|
411
|
+
rs[i], w, rw, d->right, re,
|
412
|
+
ld, d, rd);
|
413
|
+
put_choice_in_set(xt->set, a_choice);
|
414
|
+
}
|
415
|
+
}
|
416
|
+
}
|
417
|
+
}
|
418
|
+
}
|
419
|
+
put_match_list(sent, m1);
|
420
|
+
}
|
421
|
+
xt->set->current = xt->set->first;
|
422
|
+
return xt->set;
|
423
|
+
}
|
424
|
+
|
425
|
+
/**
|
426
|
+
* return TRUE if and only if overflow in the number of parses occured.
|
427
|
+
* Use a 64-bit int for counting.
|
428
|
+
*/
|
429
|
+
static int verify_set_node(Parse_set *set)
|
430
|
+
{
|
431
|
+
Parse_choice *pc;
|
432
|
+
s64 n;
|
433
|
+
if (set == NULL || set->first == NULL) return FALSE;
|
434
|
+
n = 0;
|
435
|
+
for (pc = set->first; pc != NULL; pc = pc->next)
|
436
|
+
{
|
437
|
+
n += pc->set[0]->count * pc->set[1]->count;
|
438
|
+
if (PARSE_NUM_OVERFLOW < n) return TRUE;
|
439
|
+
}
|
440
|
+
return FALSE;
|
441
|
+
}
|
442
|
+
|
443
|
+
static int verify_set(Parse_info pi)
|
444
|
+
{
|
445
|
+
int i;
|
446
|
+
|
447
|
+
assert(pi->x_table != NULL, "called verify_set when x_table==NULL");
|
448
|
+
for (i=0; i<pi->x_table_size; i++)
|
449
|
+
{
|
450
|
+
X_table_connector *t;
|
451
|
+
for(t = pi->x_table[i]; t != NULL; t = t->next)
|
452
|
+
{
|
453
|
+
if (verify_set_node(t->set)) return TRUE;
|
454
|
+
}
|
455
|
+
}
|
456
|
+
return FALSE;
|
457
|
+
}
|
458
|
+
|
459
|
+
/**
|
460
|
+
* This is the top level call that computes the whole parse_set. It
|
461
|
+
* points whole_set at the result. It creates the necessary hash
|
462
|
+
* table (x_table) which will be freed at the same time the
|
463
|
+
* whole_set is freed.
|
464
|
+
*
|
465
|
+
* It also assumes that count() has been run, and that hash table is
|
466
|
+
* filled with the values thus computed. Therefore this function
|
467
|
+
* must be structured just like parse() (the main function for
|
468
|
+
* count()).
|
469
|
+
*
|
470
|
+
* If the number of linkages gets huge, then the counts can overflow.
|
471
|
+
* We check if this has happened when verifying the parse set.
|
472
|
+
* This routine returns TRUE iff overflowed occurred.
|
473
|
+
*/
|
474
|
+
|
475
|
+
int build_parse_set(Sentence sent, int cost, Parse_Options opts)
|
476
|
+
{
|
477
|
+
Parse_set * whole_set;
|
478
|
+
|
479
|
+
whole_set =
|
480
|
+
parse_set(sent, NULL, NULL, -1, sent->length, NULL, NULL, cost+1,
|
481
|
+
opts->islands_ok, sent->parse_info);
|
482
|
+
|
483
|
+
if ((whole_set != NULL) && (whole_set->current != NULL)) {
|
484
|
+
whole_set->current = whole_set->first;
|
485
|
+
}
|
486
|
+
|
487
|
+
sent->parse_info->parse_set = whole_set;
|
488
|
+
|
489
|
+
return verify_set(sent->parse_info);
|
490
|
+
}
|
491
|
+
|
492
|
+
static void initialize_links(Parse_info pi)
|
493
|
+
{
|
494
|
+
pi->N_links = 0;
|
495
|
+
memset(pi->chosen_disjuncts, 0, pi->N_words * sizeof(Disjunct *));
|
496
|
+
}
|
497
|
+
|
498
|
+
static void issue_link(Parse_info pi, Disjunct * ld, Disjunct * rd, Link link)
|
499
|
+
{
|
500
|
+
assert(pi->N_links <= MAX_LINKS-1, "Too many links");
|
501
|
+
pi->link_array[pi->N_links] = link;
|
502
|
+
pi->N_links++;
|
503
|
+
|
504
|
+
pi->chosen_disjuncts[link.l] = ld;
|
505
|
+
pi->chosen_disjuncts[link.r] = rd;
|
506
|
+
}
|
507
|
+
|
508
|
+
static void issue_links_for_choice(Parse_info pi, Parse_choice *pc)
|
509
|
+
{
|
510
|
+
if (pc->link[0].lc != NULL) { /* there is a link to generate */
|
511
|
+
issue_link(pi, pc->ld, pc->md, pc->link[0]);
|
512
|
+
}
|
513
|
+
if (pc->link[1].lc != NULL) {
|
514
|
+
issue_link(pi, pc->md, pc->rd, pc->link[1]);
|
515
|
+
}
|
516
|
+
}
|
517
|
+
|
518
|
+
#ifdef NOT_USED_ANYWHERE
|
519
|
+
static void build_current_linkage_recursive(Parse_info pi, Parse_set *set)
|
520
|
+
{
|
521
|
+
if (set == NULL) return;
|
522
|
+
if (set->current == NULL) return;
|
523
|
+
|
524
|
+
issue_links_for_choice(pi, set->current);
|
525
|
+
build_current_linkage_recursive(pi, set->current->set[0]);
|
526
|
+
build_current_linkage_recursive(pi, set->current->set[1]);
|
527
|
+
}
|
528
|
+
|
529
|
+
/**
|
530
|
+
* This function takes the "current" point in the given set and
|
531
|
+
* generates the linkage that it represents.
|
532
|
+
*/
|
533
|
+
void build_current_linkage(Parse_info pi)
|
534
|
+
{
|
535
|
+
initialize_links(pi);
|
536
|
+
build_current_linkage_recursive(pi, pi->parse_set);
|
537
|
+
}
|
538
|
+
|
539
|
+
/**
|
540
|
+
* Advance the "current" linkage to the next one
|
541
|
+
* return 1 if there's a "carry" from this node,
|
542
|
+
* which indicates that the scan of this node has
|
543
|
+
* just been completed, and it's now back to it's
|
544
|
+
* starting state.
|
545
|
+
*/
|
546
|
+
static int advance_linkage(Parse_info pi, Parse_set * set)
|
547
|
+
{
|
548
|
+
if (set == NULL) return 1; /* probably can't happen */
|
549
|
+
if (set->first == NULL) return 1; /* the empty set */
|
550
|
+
if (advance_linkage(pi, set->current->set[0]) == 1) {
|
551
|
+
if (advance_linkage(pi, set->current->set[1]) == 1) {
|
552
|
+
if (set->current->next == NULL) {
|
553
|
+
set->current = set->first;
|
554
|
+
return 1;
|
555
|
+
}
|
556
|
+
set->current = set->current->next;
|
557
|
+
}
|
558
|
+
}
|
559
|
+
return 0;
|
560
|
+
}
|
561
|
+
|
562
|
+
static void advance_parse_set(Parse_info pi)
|
563
|
+
{
|
564
|
+
advance_linkage(pi, pi->parse_set);
|
565
|
+
}
|
566
|
+
#endif
|
567
|
+
|
568
|
+
static void list_links(Parse_info pi, Parse_set * set, int index)
|
569
|
+
{
|
570
|
+
Parse_choice *pc;
|
571
|
+
s64 n;
|
572
|
+
|
573
|
+
if (set == NULL || set->first == NULL) return;
|
574
|
+
for (pc = set->first; pc != NULL; pc = pc->next) {
|
575
|
+
n = pc->set[0]->count * pc->set[1]->count;
|
576
|
+
if (index < n) break;
|
577
|
+
index -= n;
|
578
|
+
}
|
579
|
+
assert(pc != NULL, "walked off the end in list_links");
|
580
|
+
issue_links_for_choice(pi, pc);
|
581
|
+
list_links(pi, pc->set[0], index % pc->set[0]->count);
|
582
|
+
list_links(pi, pc->set[1], index / pc->set[0]->count);
|
583
|
+
}
|
584
|
+
|
585
|
+
static void list_random_links(Parse_info pi, Parse_set * set)
|
586
|
+
{
|
587
|
+
Parse_choice *pc;
|
588
|
+
int num_pc, new_index;
|
589
|
+
|
590
|
+
if (set == NULL || set->first == NULL) return;
|
591
|
+
num_pc = 0;
|
592
|
+
for (pc = set->first; pc != NULL; pc = pc->next) {
|
593
|
+
num_pc++;
|
594
|
+
}
|
595
|
+
|
596
|
+
new_index = rand_r(&pi->rand_state) % num_pc;
|
597
|
+
|
598
|
+
num_pc = 0;
|
599
|
+
for (pc = set->first; pc != NULL; pc = pc->next) {
|
600
|
+
if (new_index == num_pc) break;
|
601
|
+
num_pc++;
|
602
|
+
}
|
603
|
+
|
604
|
+
assert(pc != NULL, "Couldn't get a random parse choice");
|
605
|
+
issue_links_for_choice(pi, pc);
|
606
|
+
list_random_links(pi, pc->set[0]);
|
607
|
+
list_random_links(pi, pc->set[1]);
|
608
|
+
}
|
609
|
+
|
610
|
+
/**
|
611
|
+
* Generate the list of all links of the index'th parsing of the
|
612
|
+
* sentence. For this to work, you must have already called parse, and
|
613
|
+
* already built the whole_set.
|
614
|
+
*/
|
615
|
+
void extract_links(int index, int cost, Parse_info pi)
|
616
|
+
{
|
617
|
+
initialize_links(pi);
|
618
|
+
pi->rand_state = index;
|
619
|
+
if (index < 0) {
|
620
|
+
list_random_links(pi, pi->parse_set);
|
621
|
+
}
|
622
|
+
else {
|
623
|
+
list_links(pi, pi->parse_set, index);
|
624
|
+
}
|
625
|
+
}
|