grammar_police 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/c/.DS_Store +0 -0
- data/c/link-grammar.c +65 -0
- data/c/link-grammar.h +60 -0
- data/c/link-grammar.o +0 -0
- data/c/link-grammar.so +0 -0
- data/c/link-grammar/.DS_Store +0 -0
- data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/c/link-grammar/.deps/and.Plo +202 -0
- data/c/link-grammar/.deps/api.Plo +244 -0
- data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/c/link-grammar/.deps/command-line.Plo +201 -0
- data/c/link-grammar/.deps/constituents.Plo +201 -0
- data/c/link-grammar/.deps/count.Plo +202 -0
- data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/c/link-grammar/.deps/disjuncts.Plo +123 -0
- data/c/link-grammar/.deps/error.Plo +121 -0
- data/c/link-grammar/.deps/expand.Plo +133 -0
- data/c/link-grammar/.deps/extract-links.Plo +198 -0
- data/c/link-grammar/.deps/fast-match.Plo +200 -0
- data/c/link-grammar/.deps/idiom.Plo +200 -0
- data/c/link-grammar/.deps/jni-client.Plo +217 -0
- data/c/link-grammar/.deps/link-parser.Po +1 -0
- data/c/link-grammar/.deps/massage.Plo +202 -0
- data/c/link-grammar/.deps/post-process.Plo +202 -0
- data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/c/link-grammar/.deps/prefix.Plo +102 -0
- data/c/link-grammar/.deps/preparation.Plo +202 -0
- data/c/link-grammar/.deps/print-util.Plo +200 -0
- data/c/link-grammar/.deps/print.Plo +201 -0
- data/c/link-grammar/.deps/prune.Plo +202 -0
- data/c/link-grammar/.deps/read-dict.Plo +223 -0
- data/c/link-grammar/.deps/read-regex.Plo +123 -0
- data/c/link-grammar/.deps/regex-morph.Plo +131 -0
- data/c/link-grammar/.deps/resources.Plo +203 -0
- data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/c/link-grammar/.deps/string-set.Plo +198 -0
- data/c/link-grammar/.deps/tokenize.Plo +160 -0
- data/c/link-grammar/.deps/utilities.Plo +222 -0
- data/c/link-grammar/.deps/word-file.Plo +201 -0
- data/c/link-grammar/.deps/word-utils.Plo +212 -0
- data/c/link-grammar/.libs/analyze-linkage.o +0 -0
- data/c/link-grammar/.libs/and.o +0 -0
- data/c/link-grammar/.libs/api.o +0 -0
- data/c/link-grammar/.libs/build-disjuncts.o +0 -0
- data/c/link-grammar/.libs/command-line.o +0 -0
- data/c/link-grammar/.libs/constituents.o +0 -0
- data/c/link-grammar/.libs/count.o +0 -0
- data/c/link-grammar/.libs/disjunct-utils.o +0 -0
- data/c/link-grammar/.libs/disjuncts.o +0 -0
- data/c/link-grammar/.libs/error.o +0 -0
- data/c/link-grammar/.libs/expand.o +0 -0
- data/c/link-grammar/.libs/extract-links.o +0 -0
- data/c/link-grammar/.libs/fast-match.o +0 -0
- data/c/link-grammar/.libs/idiom.o +0 -0
- data/c/link-grammar/.libs/jni-client.o +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.la +41 -0
- data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/c/link-grammar/.libs/massage.o +0 -0
- data/c/link-grammar/.libs/post-process.o +0 -0
- data/c/link-grammar/.libs/pp_knowledge.o +0 -0
- data/c/link-grammar/.libs/pp_lexer.o +0 -0
- data/c/link-grammar/.libs/pp_linkset.o +0 -0
- data/c/link-grammar/.libs/prefix.o +0 -0
- data/c/link-grammar/.libs/preparation.o +0 -0
- data/c/link-grammar/.libs/print-util.o +0 -0
- data/c/link-grammar/.libs/print.o +0 -0
- data/c/link-grammar/.libs/prune.o +0 -0
- data/c/link-grammar/.libs/read-dict.o +0 -0
- data/c/link-grammar/.libs/read-regex.o +0 -0
- data/c/link-grammar/.libs/regex-morph.o +0 -0
- data/c/link-grammar/.libs/resources.o +0 -0
- data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/c/link-grammar/.libs/string-set.o +0 -0
- data/c/link-grammar/.libs/tokenize.o +0 -0
- data/c/link-grammar/.libs/utilities.o +0 -0
- data/c/link-grammar/.libs/word-file.o +0 -0
- data/c/link-grammar/.libs/word-utils.o +0 -0
- data/c/link-grammar/Makefile +900 -0
- data/c/link-grammar/Makefile.am +202 -0
- data/c/link-grammar/Makefile.in +900 -0
- data/c/link-grammar/analyze-linkage.c +1317 -0
- data/c/link-grammar/analyze-linkage.h +24 -0
- data/c/link-grammar/and.c +1603 -0
- data/c/link-grammar/and.h +27 -0
- data/c/link-grammar/api-structures.h +362 -0
- data/c/link-grammar/api-types.h +72 -0
- data/c/link-grammar/api.c +1887 -0
- data/c/link-grammar/api.h +96 -0
- data/c/link-grammar/autoit/.DS_Store +0 -0
- data/c/link-grammar/autoit/README +10 -0
- data/c/link-grammar/autoit/_LGTest.au3 +22 -0
- data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/c/link-grammar/build-disjuncts.c +487 -0
- data/c/link-grammar/build-disjuncts.h +21 -0
- data/c/link-grammar/command-line.c +458 -0
- data/c/link-grammar/command-line.h +15 -0
- data/c/link-grammar/constituents.c +1836 -0
- data/c/link-grammar/constituents.h +26 -0
- data/c/link-grammar/corpus/.DS_Store +0 -0
- data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/c/link-grammar/corpus/Makefile +527 -0
- data/c/link-grammar/corpus/Makefile.am +46 -0
- data/c/link-grammar/corpus/Makefile.in +527 -0
- data/c/link-grammar/corpus/README +17 -0
- data/c/link-grammar/corpus/cluster.c +286 -0
- data/c/link-grammar/corpus/cluster.h +32 -0
- data/c/link-grammar/corpus/corpus.c +483 -0
- data/c/link-grammar/corpus/corpus.h +46 -0
- data/c/link-grammar/count.c +828 -0
- data/c/link-grammar/count.h +25 -0
- data/c/link-grammar/disjunct-utils.c +261 -0
- data/c/link-grammar/disjunct-utils.h +27 -0
- data/c/link-grammar/disjuncts.c +138 -0
- data/c/link-grammar/disjuncts.h +13 -0
- data/c/link-grammar/error.c +92 -0
- data/c/link-grammar/error.h +35 -0
- data/c/link-grammar/expand.c +67 -0
- data/c/link-grammar/expand.h +13 -0
- data/c/link-grammar/externs.h +22 -0
- data/c/link-grammar/extract-links.c +625 -0
- data/c/link-grammar/extract-links.h +16 -0
- data/c/link-grammar/fast-match.c +309 -0
- data/c/link-grammar/fast-match.h +17 -0
- data/c/link-grammar/idiom.c +373 -0
- data/c/link-grammar/idiom.h +15 -0
- data/c/link-grammar/jni-client.c +779 -0
- data/c/link-grammar/jni-client.h +236 -0
- data/c/link-grammar/liblink-grammar-java.la +42 -0
- data/c/link-grammar/liblink-grammar.la +41 -0
- data/c/link-grammar/link-features.h +37 -0
- data/c/link-grammar/link-features.h.in +37 -0
- data/c/link-grammar/link-grammar-java.def +31 -0
- data/c/link-grammar/link-grammar.def +194 -0
- data/c/link-grammar/link-includes.h +465 -0
- data/c/link-grammar/link-parser.c +849 -0
- data/c/link-grammar/massage.c +329 -0
- data/c/link-grammar/massage.h +13 -0
- data/c/link-grammar/post-process.c +1113 -0
- data/c/link-grammar/post-process.h +45 -0
- data/c/link-grammar/pp_knowledge.c +376 -0
- data/c/link-grammar/pp_knowledge.h +14 -0
- data/c/link-grammar/pp_lexer.c +1920 -0
- data/c/link-grammar/pp_lexer.h +19 -0
- data/c/link-grammar/pp_linkset.c +158 -0
- data/c/link-grammar/pp_linkset.h +20 -0
- data/c/link-grammar/prefix.c +482 -0
- data/c/link-grammar/prefix.h +139 -0
- data/c/link-grammar/preparation.c +412 -0
- data/c/link-grammar/preparation.h +20 -0
- data/c/link-grammar/print-util.c +87 -0
- data/c/link-grammar/print-util.h +32 -0
- data/c/link-grammar/print.c +1085 -0
- data/c/link-grammar/print.h +16 -0
- data/c/link-grammar/prune.c +1864 -0
- data/c/link-grammar/prune.h +17 -0
- data/c/link-grammar/read-dict.c +1785 -0
- data/c/link-grammar/read-dict.h +29 -0
- data/c/link-grammar/read-regex.c +161 -0
- data/c/link-grammar/read-regex.h +12 -0
- data/c/link-grammar/regex-morph.c +126 -0
- data/c/link-grammar/regex-morph.h +17 -0
- data/c/link-grammar/resources.c +180 -0
- data/c/link-grammar/resources.h +23 -0
- data/c/link-grammar/sat-solver/.DS_Store +0 -0
- data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/c/link-grammar/sat-solver/Makefile +527 -0
- data/c/link-grammar/sat-solver/Makefile.am +29 -0
- data/c/link-grammar/sat-solver/Makefile.in +527 -0
- data/c/link-grammar/sat-solver/clock.hpp +33 -0
- data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/c/link-grammar/sat-solver/guiding.hpp +244 -0
- data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/c/link-grammar/sat-solver/trie.hpp +118 -0
- data/c/link-grammar/sat-solver/util.cpp +23 -0
- data/c/link-grammar/sat-solver/util.hpp +14 -0
- data/c/link-grammar/sat-solver/variables.cpp +5 -0
- data/c/link-grammar/sat-solver/variables.hpp +829 -0
- data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/c/link-grammar/spellcheck-aspell.c +148 -0
- data/c/link-grammar/spellcheck-hun.c +136 -0
- data/c/link-grammar/spellcheck.h +34 -0
- data/c/link-grammar/string-set.c +169 -0
- data/c/link-grammar/string-set.h +16 -0
- data/c/link-grammar/structures.h +498 -0
- data/c/link-grammar/tokenize.c +1049 -0
- data/c/link-grammar/tokenize.h +15 -0
- data/c/link-grammar/utilities.c +847 -0
- data/c/link-grammar/utilities.h +281 -0
- data/c/link-grammar/word-file.c +124 -0
- data/c/link-grammar/word-file.h +15 -0
- data/c/link-grammar/word-utils.c +526 -0
- data/c/link-grammar/word-utils.h +152 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/grammar_police.gemspec +23 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_police.rb +11 -0
- data/lib/grammar_police/.DS_Store +0 -0
- data/lib/grammar_police/dictionary.rb +30 -0
- data/lib/grammar_police/linkage.rb +26 -0
- data/lib/grammar_police/parse_options.rb +32 -0
- data/lib/grammar_police/sentence.rb +44 -0
- data/lib/grammar_police/version.rb +3 -0
- data/tests/.DS_Store +0 -0
- data/tests/count_linkages.rb +29 -0
- data/tests/sentences.txt +86 -0
- metadata +408 -0
@@ -0,0 +1,1317 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
|
15
|
+
#include <stdarg.h>
|
16
|
+
#include "api.h"
|
17
|
+
|
18
|
+
/**
|
19
|
+
* The functions in this file do several things: () take a linkage
|
20
|
+
* involving fat links and expand it into a sequence of linkages
|
21
|
+
* (involving a subset of the given words), one for each way of
|
22
|
+
* eliminating the conjunctions. () determine if a linkage involving
|
23
|
+
* fat links has a structural violation. () make sure each of the expanded
|
24
|
+
* linkages has a consistent post-processing behavior. () compute the
|
25
|
+
* cost of the linkage.
|
26
|
+
*/
|
27
|
+
|
28
|
+
typedef struct patch_element_struct Patch_element;
|
29
|
+
struct patch_element_struct
|
30
|
+
{
|
31
|
+
char used; /* TRUE if this link is used, else FALSE */
|
32
|
+
char changed;/* TRUE if this link changed, else FALSE */
|
33
|
+
int newl; /* the new value of the left end */
|
34
|
+
int newr; /* the new value of the right end */
|
35
|
+
};
|
36
|
+
|
37
|
+
struct analyze_context_s
|
38
|
+
{
|
39
|
+
List_o_links *word_links[MAX_SENTENCE]; /* ptr to l.o.l. out of word */
|
40
|
+
int structure_violation;
|
41
|
+
|
42
|
+
int dfs_root_word[MAX_SENTENCE]; /* for the depth-first search */
|
43
|
+
int dfs_height[MAX_SENTENCE]; /* to determine the order to do the root word dfs */
|
44
|
+
int height_perm[MAX_SENTENCE]; /* permute the vertices from highest to lowest */
|
45
|
+
|
46
|
+
/* The following are all for computing the cost of and lists */
|
47
|
+
int visited[MAX_SENTENCE];
|
48
|
+
int and_element_sizes[MAX_SENTENCE];
|
49
|
+
int and_element[MAX_SENTENCE];
|
50
|
+
int N_and_elements;
|
51
|
+
int outside_word[MAX_SENTENCE];
|
52
|
+
int N_outside_words;
|
53
|
+
Patch_element patch_array[MAX_LINKS];
|
54
|
+
};
|
55
|
+
|
56
|
+
typedef struct CON_node_struct CON_node;
|
57
|
+
typedef struct CON_list_struct CON_list;
|
58
|
+
typedef struct DIS_list_struct DIS_list;
|
59
|
+
typedef struct Links_to_patch_struct Links_to_patch;
|
60
|
+
|
61
|
+
struct DIS_node_struct
|
62
|
+
{
|
63
|
+
CON_list * cl; /* the list of children */
|
64
|
+
List_o_links * lol;/* the links that comprise this region of the graph */
|
65
|
+
int word; /* the word defining this node */
|
66
|
+
};
|
67
|
+
|
68
|
+
struct CON_node_struct
|
69
|
+
{
|
70
|
+
DIS_list * dl; /* the list of children */
|
71
|
+
DIS_list * current;/* defines the current child */
|
72
|
+
int word; /* the word defining this node */
|
73
|
+
};
|
74
|
+
|
75
|
+
struct DIS_list_struct
|
76
|
+
{
|
77
|
+
DIS_list * next;
|
78
|
+
DIS_node * dn;
|
79
|
+
};
|
80
|
+
|
81
|
+
struct CON_list_struct
|
82
|
+
{
|
83
|
+
CON_list * next;
|
84
|
+
CON_node * cn;
|
85
|
+
};
|
86
|
+
|
87
|
+
struct Links_to_patch_struct
|
88
|
+
{
|
89
|
+
Links_to_patch * next;
|
90
|
+
int link;
|
91
|
+
char dir; /* this is 'r' or 'l' depending on which end of the link
|
92
|
+
is to be patched. */
|
93
|
+
};
|
94
|
+
|
95
|
+
void zero_sublinkage(Sublinkage *s)
|
96
|
+
{
|
97
|
+
int i;
|
98
|
+
s->pp_info = NULL;
|
99
|
+
s->violation = NULL;
|
100
|
+
for (i=0; i<s->num_links; i++) s->link[i] = NULL;
|
101
|
+
|
102
|
+
memset(&s->pp_data, 0, sizeof(PP_data));
|
103
|
+
}
|
104
|
+
|
105
|
+
static Sublinkage * x_create_sublinkage(Parse_info pi)
|
106
|
+
{
|
107
|
+
Sublinkage *s = (Sublinkage *) xalloc (sizeof(Sublinkage));
|
108
|
+
s->link = (Link **) xalloc(MAX_LINKS*sizeof(Link *));
|
109
|
+
s->num_links = MAX_LINKS;
|
110
|
+
|
111
|
+
zero_sublinkage(s);
|
112
|
+
|
113
|
+
s->num_links = pi->N_links;
|
114
|
+
assert(pi->N_links < MAX_LINKS, "Too many links");
|
115
|
+
return s;
|
116
|
+
}
|
117
|
+
|
118
|
+
static Sublinkage * ex_create_sublinkage(Parse_info pi)
|
119
|
+
{
|
120
|
+
Sublinkage *s = (Sublinkage *) exalloc (sizeof(Sublinkage));
|
121
|
+
s->link = (Link **) exalloc(pi->N_links*sizeof(Link *));
|
122
|
+
s->num_links = pi->N_links;
|
123
|
+
|
124
|
+
zero_sublinkage(s);
|
125
|
+
|
126
|
+
assert(pi->N_links < MAX_LINKS, "Too many links");
|
127
|
+
return s;
|
128
|
+
}
|
129
|
+
|
130
|
+
static void free_sublinkage(Sublinkage *s)
|
131
|
+
{
|
132
|
+
int i;
|
133
|
+
for (i=0; i<MAX_LINKS; i++) {
|
134
|
+
if (s->link[i]!=NULL) exfree_link(s->link[i]);
|
135
|
+
}
|
136
|
+
xfree(s->link, MAX_LINKS*sizeof(Link));
|
137
|
+
xfree(s, sizeof(Sublinkage));
|
138
|
+
}
|
139
|
+
|
140
|
+
static void replace_link_name(Link *l, const char *s)
|
141
|
+
{
|
142
|
+
/* XXX can get some perf improvement by avoiding strlen! */
|
143
|
+
char * t;
|
144
|
+
exfree((char *) l->name, sizeof(char)*(strlen(l->name)+1));
|
145
|
+
t = (char *) exalloc(sizeof(char)*(strlen(s)+1));
|
146
|
+
strcpy(t, s);
|
147
|
+
l->name = t;
|
148
|
+
}
|
149
|
+
|
150
|
+
static void copy_full_link(Link **dest, Link *src)
|
151
|
+
{
|
152
|
+
if (*dest != NULL) exfree_link(*dest);
|
153
|
+
*dest = excopy_link(src);
|
154
|
+
}
|
155
|
+
|
156
|
+
/* end new code 9/97 ALB */
|
157
|
+
|
158
|
+
|
159
|
+
/**
|
160
|
+
* Constructs a graph in the wordlinks array based on the contents of
|
161
|
+
* the global link_array. Makes the wordlinks array point to a list of
|
162
|
+
* words neighboring each word (actually a list of links). This is a
|
163
|
+
* directed graph, constructed for dealing with "and". For a link in
|
164
|
+
* which the priorities are UP or DOWN_priority, the edge goes from the
|
165
|
+
* one labeled DOWN to the one labeled UP.
|
166
|
+
* Don't generate links edges for the bogus comma connectors.
|
167
|
+
*/
|
168
|
+
static void build_digraph(analyze_context_t *actx, Parse_info pi)
|
169
|
+
{
|
170
|
+
int i, link, N_fat;
|
171
|
+
Link *lp;
|
172
|
+
List_o_links * lol;
|
173
|
+
N_fat = 0;
|
174
|
+
|
175
|
+
for (i = 0; i < pi->N_words; i++)
|
176
|
+
{
|
177
|
+
actx->word_links[i] = NULL;
|
178
|
+
}
|
179
|
+
|
180
|
+
for (link = 0; link < pi->N_links; link++)
|
181
|
+
{
|
182
|
+
lp = &(pi->link_array[link]);
|
183
|
+
i = lp->lc->label;
|
184
|
+
if (i < NORMAL_LABEL) { /* one of those special links for either-or, etc */
|
185
|
+
continue;
|
186
|
+
}
|
187
|
+
|
188
|
+
lol = (List_o_links *) xalloc(sizeof(List_o_links));
|
189
|
+
lol->next = actx->word_links[lp->l];
|
190
|
+
actx->word_links[lp->l] = lol;
|
191
|
+
lol->link = link;
|
192
|
+
lol->word = lp->r;
|
193
|
+
i = lp->lc->priority;
|
194
|
+
if (i == THIN_priority) {
|
195
|
+
lol->dir = 0;
|
196
|
+
} else if (i == DOWN_priority) {
|
197
|
+
lol->dir = 1;
|
198
|
+
} else {
|
199
|
+
lol->dir = -1;
|
200
|
+
}
|
201
|
+
|
202
|
+
lol = (List_o_links *) xalloc(sizeof(List_o_links));
|
203
|
+
lol->next = actx->word_links[lp->r];
|
204
|
+
actx->word_links[lp->r] = lol;
|
205
|
+
lol->link = link;
|
206
|
+
lol->word = lp->l;
|
207
|
+
i = lp->rc->priority;
|
208
|
+
if (i == THIN_priority) {
|
209
|
+
lol->dir = 0;
|
210
|
+
} else if (i == DOWN_priority) {
|
211
|
+
lol->dir = 1;
|
212
|
+
} else {
|
213
|
+
lol->dir = -1;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
218
|
+
/**
|
219
|
+
* Returns TRUE if there is at least one fat link pointing out of this word.
|
220
|
+
*/
|
221
|
+
static int is_CON_word(int w, List_o_links **wordlinks)
|
222
|
+
{
|
223
|
+
List_o_links * lol;
|
224
|
+
for (lol = wordlinks[w]; lol != NULL; lol = lol->next)
|
225
|
+
{
|
226
|
+
if (lol->dir == 1) return TRUE;
|
227
|
+
}
|
228
|
+
return FALSE;
|
229
|
+
}
|
230
|
+
|
231
|
+
static DIS_node * build_DIS_node(analyze_context_t*, int);
|
232
|
+
|
233
|
+
/**
|
234
|
+
* This word is a CON word (has fat links down). Build the tree for it.
|
235
|
+
*/
|
236
|
+
static CON_node * build_CON_node(analyze_context_t *actx, int w)
|
237
|
+
{
|
238
|
+
List_o_links * lol;
|
239
|
+
CON_node * a;
|
240
|
+
DIS_list * d, *dx;
|
241
|
+
d = NULL;
|
242
|
+
for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
|
243
|
+
{
|
244
|
+
if (lol->dir == 1)
|
245
|
+
{
|
246
|
+
dx = (DIS_list *) xalloc (sizeof (DIS_list));
|
247
|
+
dx->next = d;
|
248
|
+
d = dx;
|
249
|
+
d->dn = build_DIS_node(actx, lol->word);
|
250
|
+
}
|
251
|
+
}
|
252
|
+
a = (CON_node *) xalloc(sizeof (CON_node));
|
253
|
+
a->dl = a->current = d;
|
254
|
+
a->word = w;
|
255
|
+
return a;
|
256
|
+
}
|
257
|
+
|
258
|
+
/**
|
259
|
+
* Does a depth-first-search starting from w. Puts on the front of the
|
260
|
+
* list pointed to by c all of the CON nodes it finds, and returns the
|
261
|
+
* result. Also construct the list of all edges reached as part of this
|
262
|
+
* DIS_node search and append it to the lol list of start_dn.
|
263
|
+
*
|
264
|
+
* Both of the structure violations actually occur, and represent
|
265
|
+
* linkages that have improper structure. Fortunately, they
|
266
|
+
* seem to be rather rare.
|
267
|
+
*/
|
268
|
+
static CON_list * c_dfs(analyze_context_t *actx,
|
269
|
+
int w, DIS_node * start_dn, CON_list * c)
|
270
|
+
{
|
271
|
+
CON_list *cx;
|
272
|
+
List_o_links * lol, *lolx;
|
273
|
+
if (actx->dfs_root_word[w] != -1)
|
274
|
+
{
|
275
|
+
if (actx->dfs_root_word[w] != start_dn->word)
|
276
|
+
{
|
277
|
+
actx->structure_violation = TRUE;
|
278
|
+
}
|
279
|
+
return c;
|
280
|
+
}
|
281
|
+
actx->dfs_root_word[w] = start_dn->word;
|
282
|
+
for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
|
283
|
+
{
|
284
|
+
if (lol->dir < 0) /* a backwards link */
|
285
|
+
{
|
286
|
+
if (actx->dfs_root_word[lol->word] == -1)
|
287
|
+
{
|
288
|
+
actx->structure_violation = TRUE;
|
289
|
+
}
|
290
|
+
}
|
291
|
+
else if (lol->dir == 0)
|
292
|
+
{
|
293
|
+
/* Make a copy of the link */
|
294
|
+
lolx = (List_o_links *) xalloc(sizeof(List_o_links));
|
295
|
+
lolx->word = lol->word;
|
296
|
+
lolx->dir = lol->dir;
|
297
|
+
lolx->link = lol->link;
|
298
|
+
|
299
|
+
/* Chain it into place */
|
300
|
+
lolx->next = start_dn->lol;
|
301
|
+
start_dn->lol = lolx;
|
302
|
+
c = c_dfs(actx, lol->word, start_dn, c);
|
303
|
+
}
|
304
|
+
}
|
305
|
+
|
306
|
+
/* if the current node is CON, put it first */
|
307
|
+
if (is_CON_word(w, actx->word_links))
|
308
|
+
{
|
309
|
+
cx = (CON_list *) xalloc(sizeof(CON_list));
|
310
|
+
cx->next = c;
|
311
|
+
c = cx;
|
312
|
+
c->cn = build_CON_node(actx, w);
|
313
|
+
}
|
314
|
+
return c;
|
315
|
+
}
|
316
|
+
|
317
|
+
/**
|
318
|
+
* This node is connected to its parent via a fat link. Search the
|
319
|
+
* region reachable via thin links, and put all reachable nodes with fat
|
320
|
+
* links out of them in its list of children.
|
321
|
+
*/
|
322
|
+
static DIS_node * build_DIS_node(analyze_context_t *actx,
|
323
|
+
int w)
|
324
|
+
{
|
325
|
+
DIS_node * dn;
|
326
|
+
dn = (DIS_node *) xalloc(sizeof (DIS_node));
|
327
|
+
dn->word = w; /* must do this before dfs so it knows the start word */
|
328
|
+
dn->lol = NULL;
|
329
|
+
dn->cl = c_dfs(actx, w, dn, NULL);
|
330
|
+
return dn;
|
331
|
+
}
|
332
|
+
|
333
|
+
static void height_dfs(analyze_context_t *actx, int w, int height)
|
334
|
+
{
|
335
|
+
List_o_links * lol;
|
336
|
+
if (actx->dfs_height[w] != 0) return;
|
337
|
+
|
338
|
+
actx->dfs_height[w] = height;
|
339
|
+
|
340
|
+
for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
|
341
|
+
{
|
342
|
+
/* The dir is 1 for a down link. */
|
343
|
+
height_dfs(actx, lol->word, height - lol->dir);
|
344
|
+
}
|
345
|
+
}
|
346
|
+
|
347
|
+
/**
|
348
|
+
* Simple insertion sort; should be plenty fast enough, since sentences
|
349
|
+
* are almost always shorter than 30 words or so. In fact, this is
|
350
|
+
* almost surely faster than qsort for such small arrays.
|
351
|
+
*/
|
352
|
+
static void insort (analyze_context_t *actx, int nwords)
|
353
|
+
{
|
354
|
+
int i, j;
|
355
|
+
for (i=1; i<nwords; i++)
|
356
|
+
{
|
357
|
+
int heig = actx->dfs_height[i];
|
358
|
+
int perm = actx->height_perm[i];
|
359
|
+
j = i;
|
360
|
+
while (j>0 && (heig > actx->dfs_height[j-1]))
|
361
|
+
{
|
362
|
+
actx->dfs_height[j] = actx->dfs_height[j-1];
|
363
|
+
actx->height_perm[j] = actx->height_perm[j-1];
|
364
|
+
j--;
|
365
|
+
}
|
366
|
+
actx->dfs_height[j] = heig;
|
367
|
+
actx->height_perm[j] = perm;
|
368
|
+
}
|
369
|
+
}
|
370
|
+
|
371
|
+
static DIS_node * build_DIS_CON_tree(analyze_context_t *actx, Parse_info pi)
|
372
|
+
{
|
373
|
+
int xw, w;
|
374
|
+
DIS_node * dnroot, * dn;
|
375
|
+
CON_list * child, * xchild;
|
376
|
+
List_o_links * lol, * xlol;
|
377
|
+
|
378
|
+
/* The algorithm used here to build the DIS_CON tree depends on
|
379
|
+
* the search percolating down from the "top" of the tree. The
|
380
|
+
* original version of this started its search at the wall. This
|
381
|
+
* was fine because doing a DFS from the wall explores the tree in
|
382
|
+
* the right order.
|
383
|
+
*
|
384
|
+
* However, in order to handle null links correctly, a more careful
|
385
|
+
* ordering process must be used to explore the tree. We use
|
386
|
+
* dfs_height[] for this, and sort in height order.
|
387
|
+
*
|
388
|
+
* XXX Is the sort order correct here? This is not obvious; I think
|
389
|
+
* we want highest to lowest ... XXX is the height being calculated
|
390
|
+
* correctly? Looks weird to me ... XXX
|
391
|
+
*/
|
392
|
+
|
393
|
+
for (w=0; w < pi->N_words; w++) actx->dfs_height[w] = 0;
|
394
|
+
for (w=0; w < pi->N_words; w++) height_dfs(actx, w, MAX_SENTENCE);
|
395
|
+
|
396
|
+
for (w=0; w < pi->N_words; w++) actx->height_perm[w] = w;
|
397
|
+
|
398
|
+
/* Sort the heights, keeping only the permuted order. */
|
399
|
+
insort (actx, pi->N_words);
|
400
|
+
|
401
|
+
for (w=0; w<pi->N_words; w++) actx->dfs_root_word[w] = -1;
|
402
|
+
|
403
|
+
dnroot = NULL;
|
404
|
+
for (xw = 0; xw < pi->N_words; xw++)
|
405
|
+
{
|
406
|
+
w = actx->height_perm[xw];
|
407
|
+
if (actx->dfs_root_word[w] == -1)
|
408
|
+
{
|
409
|
+
dn = build_DIS_node(actx, w);
|
410
|
+
if (dnroot == NULL)
|
411
|
+
{
|
412
|
+
dnroot = dn;
|
413
|
+
}
|
414
|
+
else
|
415
|
+
{
|
416
|
+
for (child = dn->cl; child != NULL; child = xchild)
|
417
|
+
{
|
418
|
+
xchild = child->next;
|
419
|
+
child->next = dnroot->cl;
|
420
|
+
dnroot->cl = child;
|
421
|
+
}
|
422
|
+
for (lol = dn->lol; lol != NULL; lol = xlol)
|
423
|
+
{
|
424
|
+
xlol = lol->next;
|
425
|
+
lol->next = dnroot->lol;
|
426
|
+
dnroot->lol = lol;
|
427
|
+
}
|
428
|
+
xfree((void *) dn, sizeof(DIS_node));
|
429
|
+
}
|
430
|
+
}
|
431
|
+
}
|
432
|
+
return dnroot;
|
433
|
+
}
|
434
|
+
|
435
|
+
static int advance_CON(CON_node *);
|
436
|
+
|
437
|
+
/**
|
438
|
+
* Cyclically advance the current state of this DIS node.
|
439
|
+
* If it's now at the beginning of its cycle, return FALSE;
|
440
|
+
* otherwise return TRUE. Together with the advance_CON()
|
441
|
+
* function, this can be used to iterate over the entire
|
442
|
+
* DIS_CON tree.
|
443
|
+
*/
|
444
|
+
static int advance_DIS(DIS_node * dn)
|
445
|
+
{
|
446
|
+
CON_list * cl;
|
447
|
+
for (cl = dn->cl; cl != NULL; cl = cl->next)
|
448
|
+
{
|
449
|
+
if (advance_CON(cl->cn)) return TRUE;
|
450
|
+
}
|
451
|
+
return FALSE;
|
452
|
+
}
|
453
|
+
|
454
|
+
/**
|
455
|
+
* Cyclically advance the current state of this CON node.
|
456
|
+
* If it's now at the beginning of its cycle return FALSE,
|
457
|
+
* otherwise return TRUE. Together with the advance_CON()
|
458
|
+
* function, this can be used to iterate over the entire
|
459
|
+
* DIS_CON tree.
|
460
|
+
*/
|
461
|
+
static int advance_CON(CON_node * cn)
|
462
|
+
{
|
463
|
+
if (advance_DIS(cn->current->dn))
|
464
|
+
{
|
465
|
+
return TRUE;
|
466
|
+
}
|
467
|
+
else
|
468
|
+
{
|
469
|
+
if (cn->current->next == NULL)
|
470
|
+
{
|
471
|
+
cn->current = cn->dl;
|
472
|
+
return FALSE;
|
473
|
+
}
|
474
|
+
else
|
475
|
+
{
|
476
|
+
cn->current = cn->current->next;
|
477
|
+
return TRUE;
|
478
|
+
}
|
479
|
+
}
|
480
|
+
}
|
481
|
+
|
482
|
+
static void fill_patch_array_CON(analyze_context_t *, CON_node *, Links_to_patch *);
|
483
|
+
|
484
|
+
/**
|
485
|
+
* Patches up appropriate links in the patch_array for this DIS_node
|
486
|
+
* and this patch list.
|
487
|
+
*/
|
488
|
+
static void fill_patch_array_DIS(analyze_context_t *actx,
|
489
|
+
DIS_node * dn, Links_to_patch * ltp)
|
490
|
+
{
|
491
|
+
CON_list * cl;
|
492
|
+
List_o_links * lol;
|
493
|
+
Links_to_patch * ltpx;
|
494
|
+
|
495
|
+
for (lol = dn->lol; lol != NULL; lol = lol->next)
|
496
|
+
{
|
497
|
+
actx->patch_array[lol->link].used = TRUE;
|
498
|
+
}
|
499
|
+
|
500
|
+
if ((dn->cl == NULL) || (dn->cl->cn->word != dn->word))
|
501
|
+
{
|
502
|
+
for (; ltp != NULL; ltp = ltpx)
|
503
|
+
{
|
504
|
+
ltpx = ltp->next;
|
505
|
+
actx->patch_array[ltp->link].changed = TRUE;
|
506
|
+
if (ltp->dir == 'l')
|
507
|
+
{
|
508
|
+
actx->patch_array[ltp->link].newl = dn->word;
|
509
|
+
}
|
510
|
+
else
|
511
|
+
{
|
512
|
+
actx->patch_array[ltp->link].newr = dn->word;
|
513
|
+
}
|
514
|
+
xfree((void *) ltp, sizeof(Links_to_patch));
|
515
|
+
}
|
516
|
+
}
|
517
|
+
|
518
|
+
/* ltp != NULL at this point means that dn has child which is a cn
|
519
|
+
which is the same word */
|
520
|
+
for (cl = dn->cl; cl != NULL; cl = cl->next)
|
521
|
+
{
|
522
|
+
fill_patch_array_CON(actx, cl->cn, ltp);
|
523
|
+
ltp = NULL;
|
524
|
+
}
|
525
|
+
}
|
526
|
+
|
527
|
+
static void fill_patch_array_CON(analyze_context_t *actx,
|
528
|
+
CON_node * cn, Links_to_patch * ltp)
|
529
|
+
{
|
530
|
+
List_o_links * lol;
|
531
|
+
Links_to_patch *ltpx;
|
532
|
+
|
533
|
+
for (lol = actx->word_links[cn->word]; lol != NULL; lol = lol->next)
|
534
|
+
{
|
535
|
+
if (lol->dir == 0)
|
536
|
+
{
|
537
|
+
ltpx = (Links_to_patch *) xalloc(sizeof(Links_to_patch));
|
538
|
+
ltpx->next = ltp;
|
539
|
+
ltp = ltpx;
|
540
|
+
ltp->link = lol->link;
|
541
|
+
if (lol->word > cn->word) {
|
542
|
+
ltp->dir = 'l';
|
543
|
+
} else {
|
544
|
+
ltp->dir = 'r';
|
545
|
+
}
|
546
|
+
}
|
547
|
+
}
|
548
|
+
fill_patch_array_DIS(actx, cn->current->dn, ltp);
|
549
|
+
}
|
550
|
+
|
551
|
+
static void free_digraph(analyze_context_t *actx, Parse_info pi)
|
552
|
+
{
|
553
|
+
List_o_links * lol, *lolx;
|
554
|
+
int i;
|
555
|
+
for (i = 0; i < pi->N_words; i++)
|
556
|
+
{
|
557
|
+
for (lol = actx->word_links[i]; lol != NULL; lol = lolx)
|
558
|
+
{
|
559
|
+
lolx = lol->next;
|
560
|
+
xfree((void *) lol, sizeof(List_o_links));
|
561
|
+
}
|
562
|
+
}
|
563
|
+
}
|
564
|
+
|
565
|
+
static void free_CON_tree(CON_node *);
|
566
|
+
|
567
|
+
void free_DIS_tree(DIS_node * dn)
|
568
|
+
{
|
569
|
+
List_o_links * lol, *lolx;
|
570
|
+
CON_list *cl, *clx;
|
571
|
+
for (lol = dn->lol; lol != NULL; lol = lolx)
|
572
|
+
{
|
573
|
+
lolx = lol->next;
|
574
|
+
xfree((void *) lol, sizeof(List_o_links));
|
575
|
+
}
|
576
|
+
for (cl = dn->cl; cl != NULL; cl = clx)
|
577
|
+
{
|
578
|
+
clx = cl->next;
|
579
|
+
free_CON_tree(cl->cn);
|
580
|
+
xfree((void *) cl, sizeof(CON_list));
|
581
|
+
}
|
582
|
+
xfree((void *) dn, sizeof(DIS_node));
|
583
|
+
}
|
584
|
+
|
585
|
+
static void free_CON_tree(CON_node * cn)
|
586
|
+
{
|
587
|
+
DIS_list *dl, *dlx;
|
588
|
+
for (dl = cn->dl; dl != NULL; dl = dlx)
|
589
|
+
{
|
590
|
+
dlx = dl->next;
|
591
|
+
free_DIS_tree(dl->dn);
|
592
|
+
xfree((void *) dl, sizeof(DIS_list));
|
593
|
+
}
|
594
|
+
xfree((void *) cn, sizeof(CON_node));
|
595
|
+
}
|
596
|
+
|
597
|
+
/** scope out this and element */
|
598
|
+
static void and_dfs_full(analyze_context_t *actx, int w)
|
599
|
+
{
|
600
|
+
List_o_links *lol;
|
601
|
+
if (actx->visited[w]) return;
|
602
|
+
actx->visited[w] = TRUE;
|
603
|
+
actx->and_element_sizes[actx->N_and_elements]++;
|
604
|
+
|
605
|
+
for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
|
606
|
+
{
|
607
|
+
if (lol->dir >= 0)
|
608
|
+
{
|
609
|
+
and_dfs_full(actx, lol->word);
|
610
|
+
}
|
611
|
+
}
|
612
|
+
}
|
613
|
+
|
614
|
+
/** get down the tree past all the commas */
|
615
|
+
static void and_dfs_commas(analyze_context_t *actx, Sentence sent, int w)
|
616
|
+
{
|
617
|
+
List_o_links *lol;
|
618
|
+
if (actx->visited[w]) return;
|
619
|
+
|
620
|
+
actx->visited[w] = TRUE;
|
621
|
+
|
622
|
+
for (lol = actx->word_links[w]; lol != NULL; lol = lol->next)
|
623
|
+
{
|
624
|
+
/* we only consider UP or DOWN priority links here */
|
625
|
+
if (lol->dir == 1)
|
626
|
+
{
|
627
|
+
if (strcmp(sent->word[lol->word].string, ",") == 0)
|
628
|
+
{
|
629
|
+
/* pointing to a comma */
|
630
|
+
and_dfs_commas(actx, sent, lol->word);
|
631
|
+
}
|
632
|
+
else
|
633
|
+
{
|
634
|
+
actx->and_element[actx->N_and_elements] = lol->word;
|
635
|
+
and_dfs_full(actx, lol->word);
|
636
|
+
actx->N_and_elements++;
|
637
|
+
}
|
638
|
+
}
|
639
|
+
if (lol->dir == 0)
|
640
|
+
{
|
641
|
+
actx->outside_word[actx->N_outside_words] = lol->word;
|
642
|
+
actx->N_outside_words++;
|
643
|
+
}
|
644
|
+
}
|
645
|
+
}
|
646
|
+
|
647
|
+
/**
|
648
|
+
* This function computes the "and cost", resulting from inequalities
|
649
|
+
* in the length of and-list elements. It also computes other
|
650
|
+
* information used to construct the "andlist" structure of linkage_info.
|
651
|
+
*/
|
652
|
+
static Andlist * build_andlist(analyze_context_t *actx, Sentence sent)
|
653
|
+
{
|
654
|
+
int w, i, min, max, j, cost;
|
655
|
+
char * s;
|
656
|
+
Andlist * new_andlist, * old_andlist;
|
657
|
+
Parse_info pi = sent->parse_info;
|
658
|
+
|
659
|
+
old_andlist = NULL;
|
660
|
+
cost = 0;
|
661
|
+
|
662
|
+
for(w = 0; w<pi->N_words; w++)
|
663
|
+
{
|
664
|
+
s = sent->word[w].string;
|
665
|
+
if (sent->is_conjunction[w])
|
666
|
+
{
|
667
|
+
actx->N_and_elements = 0;
|
668
|
+
actx->N_outside_words = 0;
|
669
|
+
for(i=0; i<pi->N_words; i++)
|
670
|
+
{
|
671
|
+
actx->visited[i] = FALSE;
|
672
|
+
actx->and_element_sizes[i] = 0;
|
673
|
+
}
|
674
|
+
if (sent->dict->left_wall_defined)
|
675
|
+
actx->visited[0] = TRUE;
|
676
|
+
|
677
|
+
and_dfs_commas(actx, sent, w);
|
678
|
+
if (actx->N_and_elements == 0) continue;
|
679
|
+
|
680
|
+
new_andlist = (Andlist *) xalloc(sizeof(Andlist));
|
681
|
+
new_andlist->num_elements = actx->N_and_elements;
|
682
|
+
new_andlist->num_outside_words = actx->N_outside_words;
|
683
|
+
|
684
|
+
for (i=0; i < actx->N_and_elements; i++)
|
685
|
+
{
|
686
|
+
new_andlist->element[i] = actx->and_element[i];
|
687
|
+
}
|
688
|
+
for (i=0; i < actx->N_outside_words; i++)
|
689
|
+
{
|
690
|
+
new_andlist->outside_word[i] = actx->outside_word[i];
|
691
|
+
}
|
692
|
+
new_andlist->conjunction = w;
|
693
|
+
new_andlist->next = old_andlist;
|
694
|
+
old_andlist = new_andlist;
|
695
|
+
|
696
|
+
if (actx->N_and_elements > 0)
|
697
|
+
{
|
698
|
+
min = MAX_SENTENCE;
|
699
|
+
max = 0;
|
700
|
+
for (i=0; i < actx->N_and_elements; i++)
|
701
|
+
{
|
702
|
+
j = actx->and_element_sizes[i];
|
703
|
+
if (j < min) min = j;
|
704
|
+
if (j > max) max = j;
|
705
|
+
}
|
706
|
+
cost += max-min;
|
707
|
+
}
|
708
|
+
}
|
709
|
+
}
|
710
|
+
if (old_andlist) old_andlist->cost = cost;
|
711
|
+
return old_andlist;
|
712
|
+
}
|
713
|
+
|
714
|
+
/**
|
715
|
+
* This function defines the cost of a link as a function of its length.
|
716
|
+
*/
|
717
|
+
static inline int cost_for_length(int length)
|
718
|
+
{
|
719
|
+
return length-1;
|
720
|
+
}
|
721
|
+
|
722
|
+
/**
|
723
|
+
* Computes the cost of the current parse of the current sentence,
|
724
|
+
* due to the length of the links.
|
725
|
+
*/
|
726
|
+
static int link_cost(Parse_info pi)
|
727
|
+
{
|
728
|
+
int lcost, i;
|
729
|
+
lcost = 0;
|
730
|
+
for (i = 0; i < pi->N_links; i++)
|
731
|
+
{
|
732
|
+
lcost += cost_for_length(pi->link_array[i].r - pi->link_array[i].l);
|
733
|
+
}
|
734
|
+
return lcost;
|
735
|
+
}
|
736
|
+
|
737
|
+
static int null_cost(Parse_info pi)
|
738
|
+
{
|
739
|
+
/* computes the number of null links in the linkage */
|
740
|
+
/* No one seems to care about this -- ALB */
|
741
|
+
return 0;
|
742
|
+
}
|
743
|
+
|
744
|
+
static int unused_word_cost(Parse_info pi)
|
745
|
+
{
|
746
|
+
int lcost, i;
|
747
|
+
lcost = 0;
|
748
|
+
for (i = 0; i < pi->N_words; i++)
|
749
|
+
lcost += (pi->chosen_disjuncts[i] == NULL);
|
750
|
+
return lcost;
|
751
|
+
}
|
752
|
+
|
753
|
+
/**
|
754
|
+
* Computes the cost of the current parse of the current sentence
|
755
|
+
* due to the cost of the chosen disjuncts.
|
756
|
+
*/
|
757
|
+
static float disjunct_cost(Parse_info pi)
|
758
|
+
{
|
759
|
+
int i;
|
760
|
+
float lcost;
|
761
|
+
lcost = 0.0;
|
762
|
+
for (i = 0; i < pi->N_words; i++)
|
763
|
+
{
|
764
|
+
if (pi->chosen_disjuncts[i] != NULL)
|
765
|
+
lcost += pi->chosen_disjuncts[i]->cost;
|
766
|
+
}
|
767
|
+
return lcost;
|
768
|
+
}
|
769
|
+
|
770
|
+
/**
|
771
|
+
* Returns TRUE if string s represents a strictly smaller match set
|
772
|
+
* than does t. An almost identical function appears in and.c.
|
773
|
+
* The difference is that here we don't require s and t to be the
|
774
|
+
* same length.
|
775
|
+
*/
|
776
|
+
static int strictly_smaller_name(const char * s, const char * t)
|
777
|
+
{
|
778
|
+
int strictness, ss, tt;
|
779
|
+
strictness = 0;
|
780
|
+
while ((*s!='\0') || (*t!='\0'))
|
781
|
+
{
|
782
|
+
if (*s == '\0') {
|
783
|
+
ss = '*';
|
784
|
+
} else {
|
785
|
+
ss = *s;
|
786
|
+
s++;
|
787
|
+
}
|
788
|
+
if (*t == '\0') {
|
789
|
+
tt = '*';
|
790
|
+
} else {
|
791
|
+
tt = *t;
|
792
|
+
t++;
|
793
|
+
}
|
794
|
+
if (ss == tt) continue;
|
795
|
+
if ((tt == '*') || (ss == '^')) {
|
796
|
+
strictness++;
|
797
|
+
} else {
|
798
|
+
return FALSE;
|
799
|
+
}
|
800
|
+
}
|
801
|
+
return (strictness > 0);
|
802
|
+
}
|
803
|
+
|
804
|
+
/**
|
805
|
+
* The name of the link is set to be the GCD of the names of
|
806
|
+
* its two endpoints. Must be called after each extract_links(),
|
807
|
+
* etc. since that call issues a brand-new set of links into
|
808
|
+
* parse_info.
|
809
|
+
*/
|
810
|
+
static void compute_link_names(Sentence sent)
|
811
|
+
{
|
812
|
+
int i;
|
813
|
+
Parse_info pi = sent->parse_info;
|
814
|
+
|
815
|
+
for (i = 0; i < pi->N_links; i++)
|
816
|
+
{
|
817
|
+
pi->link_array[i].name = intersect_strings(sent,
|
818
|
+
connector_get_string(pi->link_array[i].lc),
|
819
|
+
connector_get_string(pi->link_array[i].rc));
|
820
|
+
}
|
821
|
+
}
|
822
|
+
|
823
|
+
/**
|
824
|
+
* This fills in the sublinkage->link[].name field. We assume that
|
825
|
+
* link_array[].name have already been filled in. As above, in the
|
826
|
+
* standard case, the name is just the GCD of the two end points.
|
827
|
+
* If pluralization has occurred, then we want to use the name
|
828
|
+
* already in link_array[].name. We detect this in two ways.
|
829
|
+
* If the endpoints don't match, then we know pluralization
|
830
|
+
* has occured. If they do, but the name in link_array[].name
|
831
|
+
* is *less* restrictive, then pluralization must have occured.
|
832
|
+
*/
|
833
|
+
static void compute_pp_link_names(Sentence sent, Sublinkage *sublinkage)
|
834
|
+
{
|
835
|
+
int i;
|
836
|
+
const char * s;
|
837
|
+
Parse_info pi = sent->parse_info;
|
838
|
+
|
839
|
+
for (i = 0; i < pi->N_links; i++)
|
840
|
+
{
|
841
|
+
if (sublinkage->link[i]->l == -1) continue;
|
842
|
+
/* NULL's here are quite unexpected -- I think there's a bug
|
843
|
+
* elsewhere in the code. But for now, punt. Here's a sentence
|
844
|
+
* that triggers a NULL -- "His convalescence was relatively brief
|
845
|
+
* and he was able to return and fight at The Wilderness,
|
846
|
+
* Spotsylvania and Cold Harbor."
|
847
|
+
*/
|
848
|
+
if (NULL == sublinkage->link[i]->lc) continue;
|
849
|
+
if (NULL == sublinkage->link[i]->rc) continue;
|
850
|
+
if (!x_match(sent, sublinkage->link[i]->lc, sublinkage->link[i]->rc))
|
851
|
+
{
|
852
|
+
replace_link_name(sublinkage->link[i], pi->link_array[i].name);
|
853
|
+
}
|
854
|
+
else
|
855
|
+
{
|
856
|
+
s = intersect_strings(sent,
|
857
|
+
connector_get_string(sublinkage->link[i]->lc),
|
858
|
+
connector_get_string(sublinkage->link[i]->rc));
|
859
|
+
|
860
|
+
if (strictly_smaller_name(s, pi->link_array[i].name))
|
861
|
+
replace_link_name(sublinkage->link[i], pi->link_array[i].name);
|
862
|
+
else
|
863
|
+
replace_link_name(sublinkage->link[i], s);
|
864
|
+
}
|
865
|
+
}
|
866
|
+
}
|
867
|
+
|
868
|
+
/********************** exported functions *****************************/
|
869
|
+
|
870
|
+
void init_analyze(Sentence s)
|
871
|
+
{
|
872
|
+
analyze_context_t *actx = s->analyze_ctxt;
|
873
|
+
|
874
|
+
if (NULL == actx)
|
875
|
+
{
|
876
|
+
actx = (analyze_context_t *) malloc (sizeof(analyze_context_t));
|
877
|
+
s->analyze_ctxt = actx;
|
878
|
+
}
|
879
|
+
|
880
|
+
actx->structure_violation = FALSE;
|
881
|
+
}
|
882
|
+
|
883
|
+
void free_analyze(Sentence s)
|
884
|
+
{
|
885
|
+
if (s->analyze_ctxt != NULL) free(s->analyze_ctxt);
|
886
|
+
s->analyze_ctxt = NULL;
|
887
|
+
}
|
888
|
+
|
889
|
+
/**
|
890
|
+
* This uses link_array. It enumerates and post-processes
|
891
|
+
* all the linkages represented by this one. We know this contains
|
892
|
+
* at least one fat link.
|
893
|
+
*/
|
894
|
+
Linkage_info analyze_fat_linkage(Sentence sent, Parse_Options opts, int analyze_pass)
|
895
|
+
{
|
896
|
+
int i;
|
897
|
+
Linkage_info li;
|
898
|
+
DIS_node *d_root;
|
899
|
+
PP_node *pp;
|
900
|
+
Postprocessor *postprocessor;
|
901
|
+
Sublinkage *sublinkage;
|
902
|
+
Parse_info pi = sent->parse_info;
|
903
|
+
PP_node accum; /* for domain ancestry check */
|
904
|
+
D_type_list * dtl0, * dtl1; /* for domain ancestry check */
|
905
|
+
|
906
|
+
analyze_context_t *actx = sent->analyze_ctxt;
|
907
|
+
|
908
|
+
sublinkage = x_create_sublinkage(pi);
|
909
|
+
postprocessor = sent->dict->postprocessor;
|
910
|
+
build_digraph(actx, pi);
|
911
|
+
actx->structure_violation = FALSE;
|
912
|
+
d_root = build_DIS_CON_tree(actx, pi); /* may set structure_violation to TRUE */
|
913
|
+
|
914
|
+
memset(&li, 0, sizeof(li));
|
915
|
+
li.N_violations = 0;
|
916
|
+
li.improper_fat_linkage = actx->structure_violation;
|
917
|
+
li.inconsistent_domains = FALSE;
|
918
|
+
li.unused_word_cost = unused_word_cost(sent->parse_info);
|
919
|
+
if (opts->use_sat_solver)
|
920
|
+
{
|
921
|
+
li.disjunct_cost = 0.0;
|
922
|
+
}
|
923
|
+
else
|
924
|
+
{
|
925
|
+
li.disjunct_cost = disjunct_cost(pi);
|
926
|
+
}
|
927
|
+
li.null_cost = null_cost(pi);
|
928
|
+
li.link_cost = link_cost(pi);
|
929
|
+
li.corpus_cost = -1.0f;
|
930
|
+
li.and_cost = 0;
|
931
|
+
li.andlist = NULL;
|
932
|
+
|
933
|
+
if (actx->structure_violation)
|
934
|
+
{
|
935
|
+
li.N_violations++;
|
936
|
+
free_sublinkage(sublinkage);
|
937
|
+
free_digraph(actx, pi);
|
938
|
+
free_DIS_tree(d_root);
|
939
|
+
for (i = 0; i < pi->N_links; i++)
|
940
|
+
{
|
941
|
+
pi->link_array[i].name = "";
|
942
|
+
}
|
943
|
+
return li;
|
944
|
+
}
|
945
|
+
|
946
|
+
if (analyze_pass == PP_SECOND_PASS)
|
947
|
+
{
|
948
|
+
li.andlist = build_andlist(actx, sent);
|
949
|
+
li.and_cost = 0;
|
950
|
+
if (li.andlist) li.and_cost = li.andlist->cost;
|
951
|
+
}
|
952
|
+
else li.and_cost = 0;
|
953
|
+
|
954
|
+
compute_link_names(sent);
|
955
|
+
|
956
|
+
for (i=0; i<pi->N_links; i++) accum.d_type_array[i] = NULL;
|
957
|
+
|
958
|
+
/* loop through all the sub linkages */
|
959
|
+
for (;;)
|
960
|
+
{
|
961
|
+
for (i=0; i<pi->N_links; i++)
|
962
|
+
{
|
963
|
+
actx->patch_array[i].used = actx->patch_array[i].changed = FALSE;
|
964
|
+
actx->patch_array[i].newl = pi->link_array[i].l;
|
965
|
+
actx->patch_array[i].newr = pi->link_array[i].r;
|
966
|
+
copy_full_link(&sublinkage->link[i], &(pi->link_array[i]));
|
967
|
+
}
|
968
|
+
fill_patch_array_DIS(actx, d_root, NULL);
|
969
|
+
|
970
|
+
for (i=0; i<pi->N_links; i++)
|
971
|
+
{
|
972
|
+
if (actx->patch_array[i].changed || actx->patch_array[i].used)
|
973
|
+
{
|
974
|
+
sublinkage->link[i]->l = actx->patch_array[i].newl;
|
975
|
+
sublinkage->link[i]->r = actx->patch_array[i].newr;
|
976
|
+
}
|
977
|
+
else if ((actx->dfs_root_word[pi->link_array[i].l] != -1) &&
|
978
|
+
(actx->dfs_root_word[pi->link_array[i].r] != -1))
|
979
|
+
{
|
980
|
+
sublinkage->link[i]->l = -1;
|
981
|
+
}
|
982
|
+
}
|
983
|
+
|
984
|
+
if (0 == opts->use_sat_solver)
|
985
|
+
{
|
986
|
+
compute_pp_link_array_connectors(sent, sublinkage);
|
987
|
+
compute_pp_link_names(sent, sublinkage);
|
988
|
+
}
|
989
|
+
|
990
|
+
/* 'analyze_pass' logic added ALB 1/97 */
|
991
|
+
if (analyze_pass==PP_FIRST_PASS) {
|
992
|
+
post_process_scan_linkage(postprocessor,opts,sent,sublinkage);
|
993
|
+
if (!advance_DIS(d_root)) break;
|
994
|
+
else continue;
|
995
|
+
}
|
996
|
+
|
997
|
+
pp = post_process(postprocessor, opts, sent, sublinkage, TRUE);
|
998
|
+
|
999
|
+
if (pp==NULL) {
|
1000
|
+
if (postprocessor != NULL) li.N_violations = 1;
|
1001
|
+
}
|
1002
|
+
else if (pp->violation == NULL) {
|
1003
|
+
/* the purpose of this stuff is to make sure the domain
|
1004
|
+
ancestry for a link in each of its sentences is consistent. */
|
1005
|
+
|
1006
|
+
for (i=0; i<pi->N_links; i++) {
|
1007
|
+
if (sublinkage->link[i]->l == -1) continue;
|
1008
|
+
if (accum.d_type_array[i] == NULL) {
|
1009
|
+
accum.d_type_array[i] = copy_d_type(pp->d_type_array[i]);
|
1010
|
+
} else {
|
1011
|
+
dtl0 = pp->d_type_array[i];
|
1012
|
+
dtl1 = accum.d_type_array[i];
|
1013
|
+
while((dtl0 != NULL) && (dtl1 != NULL) && (dtl0->type == dtl1->type)) {
|
1014
|
+
dtl0 = dtl0->next;
|
1015
|
+
dtl1 = dtl1->next;
|
1016
|
+
}
|
1017
|
+
if ((dtl0 != NULL) || (dtl1 != NULL)) break;
|
1018
|
+
}
|
1019
|
+
}
|
1020
|
+
if (i != pi->N_links) {
|
1021
|
+
li.N_violations++;
|
1022
|
+
li.inconsistent_domains = TRUE;
|
1023
|
+
}
|
1024
|
+
}
|
1025
|
+
else if (pp->violation!=NULL) {
|
1026
|
+
li.N_violations++;
|
1027
|
+
}
|
1028
|
+
|
1029
|
+
if (!advance_DIS(d_root)) break;
|
1030
|
+
}
|
1031
|
+
|
1032
|
+
for (i=0; i<pi->N_links; ++i) {
|
1033
|
+
free_d_type(accum.d_type_array[i]);
|
1034
|
+
}
|
1035
|
+
|
1036
|
+
/* if (display_on && (li.N_violations != 0) &&
|
1037
|
+
(verbosity > 3) && should_print_messages)
|
1038
|
+
printf("P.P. violation in one part of conjunction.\n"); */
|
1039
|
+
free_sublinkage(sublinkage);
|
1040
|
+
free_digraph(actx, pi);
|
1041
|
+
free_DIS_tree(d_root);
|
1042
|
+
return li;
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
/**
|
1046
|
+
* This uses link_array. It post-processes
|
1047
|
+
* this linkage, and prints the appropriate thing. There are no fat
|
1048
|
+
* links in it.
|
1049
|
+
*/
|
1050
|
+
Linkage_info analyze_thin_linkage(Sentence sent, Parse_Options opts, int analyze_pass)
|
1051
|
+
{
|
1052
|
+
int i;
|
1053
|
+
Linkage_info li;
|
1054
|
+
PP_node * pp;
|
1055
|
+
Postprocessor * postprocessor;
|
1056
|
+
Sublinkage *sublinkage;
|
1057
|
+
Parse_info pi = sent->parse_info;
|
1058
|
+
analyze_context_t *actx = sent->analyze_ctxt;
|
1059
|
+
|
1060
|
+
sublinkage = x_create_sublinkage(pi);
|
1061
|
+
postprocessor = sent->dict->postprocessor;
|
1062
|
+
|
1063
|
+
compute_link_names(sent);
|
1064
|
+
for (i=0; i<pi->N_links; i++)
|
1065
|
+
{
|
1066
|
+
copy_full_link(&(sublinkage->link[i]), &(pi->link_array[i]));
|
1067
|
+
}
|
1068
|
+
|
1069
|
+
if (analyze_pass == PP_FIRST_PASS)
|
1070
|
+
{
|
1071
|
+
post_process_scan_linkage(postprocessor, opts, sent, sublinkage);
|
1072
|
+
free_sublinkage(sublinkage);
|
1073
|
+
memset(&li, 0, sizeof(li));
|
1074
|
+
return li;
|
1075
|
+
}
|
1076
|
+
|
1077
|
+
build_digraph(actx, pi);
|
1078
|
+
|
1079
|
+
/* The code below can be used to generate the "islands" array.
|
1080
|
+
* For this to work, however, you have to call "build_digraph"
|
1081
|
+
* first (as in analyze_fat_linkage). and then "free_digraph".
|
1082
|
+
*/
|
1083
|
+
pp = post_process(postprocessor, opts, sent, sublinkage, TRUE);
|
1084
|
+
|
1085
|
+
memset(&li, 0, sizeof(li));
|
1086
|
+
li.N_violations = 0;
|
1087
|
+
li.and_cost = 0;
|
1088
|
+
li.unused_word_cost = unused_word_cost(sent->parse_info);
|
1089
|
+
li.improper_fat_linkage = FALSE;
|
1090
|
+
li.inconsistent_domains = FALSE;
|
1091
|
+
if (opts->use_sat_solver)
|
1092
|
+
{
|
1093
|
+
li.disjunct_cost = 0.0;
|
1094
|
+
}
|
1095
|
+
else
|
1096
|
+
{
|
1097
|
+
li.disjunct_cost = disjunct_cost(pi);
|
1098
|
+
}
|
1099
|
+
li.null_cost = null_cost(pi);
|
1100
|
+
li.link_cost = link_cost(pi);
|
1101
|
+
li.corpus_cost = -1.0f;
|
1102
|
+
li.andlist = NULL;
|
1103
|
+
|
1104
|
+
if (pp == NULL)
|
1105
|
+
{
|
1106
|
+
if (postprocessor != NULL) li.N_violations = 1;
|
1107
|
+
}
|
1108
|
+
else if (pp->violation != NULL)
|
1109
|
+
{
|
1110
|
+
li.N_violations++;
|
1111
|
+
}
|
1112
|
+
|
1113
|
+
free_sublinkage(sublinkage);
|
1114
|
+
free_digraph(actx, pi);
|
1115
|
+
return li;
|
1116
|
+
}
|
1117
|
+
|
1118
|
+
void extract_thin_linkage(Sentence sent, Parse_Options opts, Linkage linkage)
|
1119
|
+
{
|
1120
|
+
int i;
|
1121
|
+
Parse_info pi = sent->parse_info;
|
1122
|
+
|
1123
|
+
linkage->num_sublinkages = 1;
|
1124
|
+
linkage->sublinkage = ex_create_sublinkage(pi);
|
1125
|
+
|
1126
|
+
compute_link_names(sent);
|
1127
|
+
for (i=0; i<pi->N_links; ++i)
|
1128
|
+
{
|
1129
|
+
linkage->sublinkage->link[i] = excopy_link(&(pi->link_array[i]));
|
1130
|
+
}
|
1131
|
+
}
|
1132
|
+
|
1133
|
+
#ifdef DBG
|
1134
|
+
static void prt_lol(Sentence sent , List_o_links *lol)
|
1135
|
+
{
|
1136
|
+
/* It appears that the list of links is always even in length:
|
1137
|
+
* The head word first, followed by a modifier.
|
1138
|
+
*/
|
1139
|
+
while (lol)
|
1140
|
+
{
|
1141
|
+
// printf ("%d ", lol->link);
|
1142
|
+
printf ("%s ", sent->word[lol->word].string);
|
1143
|
+
lol = lol->next;
|
1144
|
+
}
|
1145
|
+
}
|
1146
|
+
|
1147
|
+
static void prt_con_list(Sentence, CON_list *);
|
1148
|
+
static void prt_dis_list(Sentence sent, DIS_list *dis)
|
1149
|
+
{
|
1150
|
+
while(dis)
|
1151
|
+
{
|
1152
|
+
/* There are three possibilities:
|
1153
|
+
* Either there's another conjunction (and we should print it)
|
1154
|
+
* Or there's a head word, with its modifiers in its list-o-links,
|
1155
|
+
* Or there's just the bare, naked word by itself.
|
1156
|
+
*/
|
1157
|
+
if (dis->dn->cl)
|
1158
|
+
{
|
1159
|
+
prt_con_list(sent, dis->dn->cl);
|
1160
|
+
}
|
1161
|
+
else if (dis->dn->lol)
|
1162
|
+
{
|
1163
|
+
printf("[");
|
1164
|
+
prt_lol(sent, dis->dn->lol);
|
1165
|
+
printf("]");
|
1166
|
+
}
|
1167
|
+
else
|
1168
|
+
{
|
1169
|
+
int wd = dis->dn->word;
|
1170
|
+
printf("%s ", sent->word[wd].string);
|
1171
|
+
}
|
1172
|
+
dis = dis->next;
|
1173
|
+
}
|
1174
|
+
}
|
1175
|
+
|
1176
|
+
static void prt_con_list(Sentence sent, CON_list *con)
|
1177
|
+
{
|
1178
|
+
while(con)
|
1179
|
+
{
|
1180
|
+
int wd = con->cn->word;
|
1181
|
+
printf("(%s ", sent->word[wd].string);
|
1182
|
+
prt_dis_list(sent, con->cn->dl);
|
1183
|
+
printf(") ");
|
1184
|
+
con = con->next;
|
1185
|
+
}
|
1186
|
+
}
|
1187
|
+
static void prt_dis_con_tree(Sentence sent, DIS_node *dis)
|
1188
|
+
{
|
1189
|
+
prt_con_list(sent, dis->cl);
|
1190
|
+
printf ("\n");
|
1191
|
+
}
|
1192
|
+
#else
|
1193
|
+
static inline void prt_dis_con_tree(Sentence sent, DIS_node *dis) {}
|
1194
|
+
#endif
|
1195
|
+
|
1196
|
+
/**
|
1197
|
+
* This procedure mimics analyze_fat_linkage in order to
|
1198
|
+
* extract the sublinkages and copy them to the Linkage
|
1199
|
+
* data structure passed in.
|
1200
|
+
*/
|
1201
|
+
void extract_fat_linkage(Sentence sent, Parse_Options opts, Linkage linkage)
|
1202
|
+
{
|
1203
|
+
int i, j, N_thin_links;
|
1204
|
+
DIS_node *d_root;
|
1205
|
+
int num_sublinkages;
|
1206
|
+
Sublinkage * sublinkage;
|
1207
|
+
Parse_info pi = sent->parse_info;
|
1208
|
+
|
1209
|
+
analyze_context_t *actx = sent->analyze_ctxt;
|
1210
|
+
|
1211
|
+
build_digraph(actx, pi);
|
1212
|
+
actx->structure_violation = FALSE;
|
1213
|
+
d_root = build_DIS_CON_tree(actx, pi);
|
1214
|
+
|
1215
|
+
if (actx->structure_violation)
|
1216
|
+
{
|
1217
|
+
compute_link_names(sent);
|
1218
|
+
linkage->num_sublinkages=1;
|
1219
|
+
linkage->sublinkage = ex_create_sublinkage(pi);
|
1220
|
+
|
1221
|
+
/* This will have fat links! */
|
1222
|
+
for (i=0; i<pi->N_links; ++i)
|
1223
|
+
{
|
1224
|
+
linkage->sublinkage->link[i] = excopy_link(&(pi->link_array[i]));
|
1225
|
+
}
|
1226
|
+
|
1227
|
+
free_digraph(actx, pi);
|
1228
|
+
free_DIS_tree(d_root);
|
1229
|
+
return;
|
1230
|
+
}
|
1231
|
+
|
1232
|
+
/* first get number of sublinkages and allocate space */
|
1233
|
+
num_sublinkages = 0;
|
1234
|
+
for (;;) {
|
1235
|
+
num_sublinkages++;
|
1236
|
+
if (!advance_DIS(d_root)) break;
|
1237
|
+
}
|
1238
|
+
|
1239
|
+
linkage->num_sublinkages = num_sublinkages;
|
1240
|
+
linkage->sublinkage =
|
1241
|
+
(Sublinkage *) exalloc(sizeof(Sublinkage)*num_sublinkages);
|
1242
|
+
for (i=0; i<num_sublinkages; ++i) {
|
1243
|
+
linkage->sublinkage[i].link = NULL;
|
1244
|
+
linkage->sublinkage[i].pp_info = NULL;
|
1245
|
+
linkage->sublinkage[i].violation = NULL;
|
1246
|
+
}
|
1247
|
+
|
1248
|
+
/* now fill out the sublinkage arrays */
|
1249
|
+
compute_link_names(sent);
|
1250
|
+
|
1251
|
+
sublinkage = x_create_sublinkage(pi);
|
1252
|
+
num_sublinkages = 0;
|
1253
|
+
for (;;)
|
1254
|
+
{
|
1255
|
+
for (i = 0; i < pi->N_links; i++)
|
1256
|
+
{
|
1257
|
+
actx->patch_array[i].used = actx->patch_array[i].changed = FALSE;
|
1258
|
+
actx->patch_array[i].newl = pi->link_array[i].l;
|
1259
|
+
actx->patch_array[i].newr = pi->link_array[i].r;
|
1260
|
+
copy_full_link(&sublinkage->link[i], &(pi->link_array[i]));
|
1261
|
+
}
|
1262
|
+
fill_patch_array_DIS(actx, d_root, NULL);
|
1263
|
+
|
1264
|
+
for (i = 0; i < pi->N_links; i++)
|
1265
|
+
{
|
1266
|
+
if (actx->patch_array[i].changed || actx->patch_array[i].used)
|
1267
|
+
{
|
1268
|
+
sublinkage->link[i]->l = actx->patch_array[i].newl;
|
1269
|
+
sublinkage->link[i]->r = actx->patch_array[i].newr;
|
1270
|
+
}
|
1271
|
+
else if ((actx->dfs_root_word[pi->link_array[i].l] != -1) &&
|
1272
|
+
(actx->dfs_root_word[pi->link_array[i].r] != -1))
|
1273
|
+
{
|
1274
|
+
sublinkage->link[i]->l = -1;
|
1275
|
+
}
|
1276
|
+
}
|
1277
|
+
|
1278
|
+
if (0 == opts->use_sat_solver)
|
1279
|
+
{
|
1280
|
+
compute_pp_link_array_connectors(sent, sublinkage);
|
1281
|
+
compute_pp_link_names(sent, sublinkage);
|
1282
|
+
}
|
1283
|
+
|
1284
|
+
/* Don't copy the fat links into the linkage */
|
1285
|
+
N_thin_links = 0;
|
1286
|
+
for (i = 0; i < pi->N_links; ++i)
|
1287
|
+
{
|
1288
|
+
if (sublinkage->link[i]->l == -1) continue;
|
1289
|
+
N_thin_links++;
|
1290
|
+
}
|
1291
|
+
|
1292
|
+
linkage->sublinkage[num_sublinkages].num_links = N_thin_links;
|
1293
|
+
linkage->sublinkage[num_sublinkages].link =
|
1294
|
+
(Link **) exalloc(sizeof(Link *)*N_thin_links);
|
1295
|
+
linkage->sublinkage[num_sublinkages].pp_info = NULL;
|
1296
|
+
linkage->sublinkage[num_sublinkages].violation = NULL;
|
1297
|
+
|
1298
|
+
for (i = 0, j = 0; i < pi->N_links; ++i)
|
1299
|
+
{
|
1300
|
+
if (sublinkage->link[i]->l == -1) continue;
|
1301
|
+
linkage->sublinkage[num_sublinkages].link[j++] =
|
1302
|
+
excopy_link(sublinkage->link[i]);
|
1303
|
+
}
|
1304
|
+
|
1305
|
+
num_sublinkages++;
|
1306
|
+
if (!advance_DIS(d_root)) break;
|
1307
|
+
}
|
1308
|
+
|
1309
|
+
free_sublinkage(sublinkage);
|
1310
|
+
free_digraph(actx, pi);
|
1311
|
+
if (linkage->dis_con_tree)
|
1312
|
+
free_DIS_tree(linkage->dis_con_tree);
|
1313
|
+
linkage->dis_con_tree = d_root;
|
1314
|
+
|
1315
|
+
prt_dis_con_tree(sent, d_root);
|
1316
|
+
}
|
1317
|
+
|