grammar_police 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/c/.DS_Store +0 -0
- data/c/link-grammar.c +65 -0
- data/c/link-grammar.h +60 -0
- data/c/link-grammar.o +0 -0
- data/c/link-grammar.so +0 -0
- data/c/link-grammar/.DS_Store +0 -0
- data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/c/link-grammar/.deps/and.Plo +202 -0
- data/c/link-grammar/.deps/api.Plo +244 -0
- data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/c/link-grammar/.deps/command-line.Plo +201 -0
- data/c/link-grammar/.deps/constituents.Plo +201 -0
- data/c/link-grammar/.deps/count.Plo +202 -0
- data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/c/link-grammar/.deps/disjuncts.Plo +123 -0
- data/c/link-grammar/.deps/error.Plo +121 -0
- data/c/link-grammar/.deps/expand.Plo +133 -0
- data/c/link-grammar/.deps/extract-links.Plo +198 -0
- data/c/link-grammar/.deps/fast-match.Plo +200 -0
- data/c/link-grammar/.deps/idiom.Plo +200 -0
- data/c/link-grammar/.deps/jni-client.Plo +217 -0
- data/c/link-grammar/.deps/link-parser.Po +1 -0
- data/c/link-grammar/.deps/massage.Plo +202 -0
- data/c/link-grammar/.deps/post-process.Plo +202 -0
- data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/c/link-grammar/.deps/prefix.Plo +102 -0
- data/c/link-grammar/.deps/preparation.Plo +202 -0
- data/c/link-grammar/.deps/print-util.Plo +200 -0
- data/c/link-grammar/.deps/print.Plo +201 -0
- data/c/link-grammar/.deps/prune.Plo +202 -0
- data/c/link-grammar/.deps/read-dict.Plo +223 -0
- data/c/link-grammar/.deps/read-regex.Plo +123 -0
- data/c/link-grammar/.deps/regex-morph.Plo +131 -0
- data/c/link-grammar/.deps/resources.Plo +203 -0
- data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/c/link-grammar/.deps/string-set.Plo +198 -0
- data/c/link-grammar/.deps/tokenize.Plo +160 -0
- data/c/link-grammar/.deps/utilities.Plo +222 -0
- data/c/link-grammar/.deps/word-file.Plo +201 -0
- data/c/link-grammar/.deps/word-utils.Plo +212 -0
- data/c/link-grammar/.libs/analyze-linkage.o +0 -0
- data/c/link-grammar/.libs/and.o +0 -0
- data/c/link-grammar/.libs/api.o +0 -0
- data/c/link-grammar/.libs/build-disjuncts.o +0 -0
- data/c/link-grammar/.libs/command-line.o +0 -0
- data/c/link-grammar/.libs/constituents.o +0 -0
- data/c/link-grammar/.libs/count.o +0 -0
- data/c/link-grammar/.libs/disjunct-utils.o +0 -0
- data/c/link-grammar/.libs/disjuncts.o +0 -0
- data/c/link-grammar/.libs/error.o +0 -0
- data/c/link-grammar/.libs/expand.o +0 -0
- data/c/link-grammar/.libs/extract-links.o +0 -0
- data/c/link-grammar/.libs/fast-match.o +0 -0
- data/c/link-grammar/.libs/idiom.o +0 -0
- data/c/link-grammar/.libs/jni-client.o +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.la +41 -0
- data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/c/link-grammar/.libs/massage.o +0 -0
- data/c/link-grammar/.libs/post-process.o +0 -0
- data/c/link-grammar/.libs/pp_knowledge.o +0 -0
- data/c/link-grammar/.libs/pp_lexer.o +0 -0
- data/c/link-grammar/.libs/pp_linkset.o +0 -0
- data/c/link-grammar/.libs/prefix.o +0 -0
- data/c/link-grammar/.libs/preparation.o +0 -0
- data/c/link-grammar/.libs/print-util.o +0 -0
- data/c/link-grammar/.libs/print.o +0 -0
- data/c/link-grammar/.libs/prune.o +0 -0
- data/c/link-grammar/.libs/read-dict.o +0 -0
- data/c/link-grammar/.libs/read-regex.o +0 -0
- data/c/link-grammar/.libs/regex-morph.o +0 -0
- data/c/link-grammar/.libs/resources.o +0 -0
- data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/c/link-grammar/.libs/string-set.o +0 -0
- data/c/link-grammar/.libs/tokenize.o +0 -0
- data/c/link-grammar/.libs/utilities.o +0 -0
- data/c/link-grammar/.libs/word-file.o +0 -0
- data/c/link-grammar/.libs/word-utils.o +0 -0
- data/c/link-grammar/Makefile +900 -0
- data/c/link-grammar/Makefile.am +202 -0
- data/c/link-grammar/Makefile.in +900 -0
- data/c/link-grammar/analyze-linkage.c +1317 -0
- data/c/link-grammar/analyze-linkage.h +24 -0
- data/c/link-grammar/and.c +1603 -0
- data/c/link-grammar/and.h +27 -0
- data/c/link-grammar/api-structures.h +362 -0
- data/c/link-grammar/api-types.h +72 -0
- data/c/link-grammar/api.c +1887 -0
- data/c/link-grammar/api.h +96 -0
- data/c/link-grammar/autoit/.DS_Store +0 -0
- data/c/link-grammar/autoit/README +10 -0
- data/c/link-grammar/autoit/_LGTest.au3 +22 -0
- data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/c/link-grammar/build-disjuncts.c +487 -0
- data/c/link-grammar/build-disjuncts.h +21 -0
- data/c/link-grammar/command-line.c +458 -0
- data/c/link-grammar/command-line.h +15 -0
- data/c/link-grammar/constituents.c +1836 -0
- data/c/link-grammar/constituents.h +26 -0
- data/c/link-grammar/corpus/.DS_Store +0 -0
- data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/c/link-grammar/corpus/Makefile +527 -0
- data/c/link-grammar/corpus/Makefile.am +46 -0
- data/c/link-grammar/corpus/Makefile.in +527 -0
- data/c/link-grammar/corpus/README +17 -0
- data/c/link-grammar/corpus/cluster.c +286 -0
- data/c/link-grammar/corpus/cluster.h +32 -0
- data/c/link-grammar/corpus/corpus.c +483 -0
- data/c/link-grammar/corpus/corpus.h +46 -0
- data/c/link-grammar/count.c +828 -0
- data/c/link-grammar/count.h +25 -0
- data/c/link-grammar/disjunct-utils.c +261 -0
- data/c/link-grammar/disjunct-utils.h +27 -0
- data/c/link-grammar/disjuncts.c +138 -0
- data/c/link-grammar/disjuncts.h +13 -0
- data/c/link-grammar/error.c +92 -0
- data/c/link-grammar/error.h +35 -0
- data/c/link-grammar/expand.c +67 -0
- data/c/link-grammar/expand.h +13 -0
- data/c/link-grammar/externs.h +22 -0
- data/c/link-grammar/extract-links.c +625 -0
- data/c/link-grammar/extract-links.h +16 -0
- data/c/link-grammar/fast-match.c +309 -0
- data/c/link-grammar/fast-match.h +17 -0
- data/c/link-grammar/idiom.c +373 -0
- data/c/link-grammar/idiom.h +15 -0
- data/c/link-grammar/jni-client.c +779 -0
- data/c/link-grammar/jni-client.h +236 -0
- data/c/link-grammar/liblink-grammar-java.la +42 -0
- data/c/link-grammar/liblink-grammar.la +41 -0
- data/c/link-grammar/link-features.h +37 -0
- data/c/link-grammar/link-features.h.in +37 -0
- data/c/link-grammar/link-grammar-java.def +31 -0
- data/c/link-grammar/link-grammar.def +194 -0
- data/c/link-grammar/link-includes.h +465 -0
- data/c/link-grammar/link-parser.c +849 -0
- data/c/link-grammar/massage.c +329 -0
- data/c/link-grammar/massage.h +13 -0
- data/c/link-grammar/post-process.c +1113 -0
- data/c/link-grammar/post-process.h +45 -0
- data/c/link-grammar/pp_knowledge.c +376 -0
- data/c/link-grammar/pp_knowledge.h +14 -0
- data/c/link-grammar/pp_lexer.c +1920 -0
- data/c/link-grammar/pp_lexer.h +19 -0
- data/c/link-grammar/pp_linkset.c +158 -0
- data/c/link-grammar/pp_linkset.h +20 -0
- data/c/link-grammar/prefix.c +482 -0
- data/c/link-grammar/prefix.h +139 -0
- data/c/link-grammar/preparation.c +412 -0
- data/c/link-grammar/preparation.h +20 -0
- data/c/link-grammar/print-util.c +87 -0
- data/c/link-grammar/print-util.h +32 -0
- data/c/link-grammar/print.c +1085 -0
- data/c/link-grammar/print.h +16 -0
- data/c/link-grammar/prune.c +1864 -0
- data/c/link-grammar/prune.h +17 -0
- data/c/link-grammar/read-dict.c +1785 -0
- data/c/link-grammar/read-dict.h +29 -0
- data/c/link-grammar/read-regex.c +161 -0
- data/c/link-grammar/read-regex.h +12 -0
- data/c/link-grammar/regex-morph.c +126 -0
- data/c/link-grammar/regex-morph.h +17 -0
- data/c/link-grammar/resources.c +180 -0
- data/c/link-grammar/resources.h +23 -0
- data/c/link-grammar/sat-solver/.DS_Store +0 -0
- data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/c/link-grammar/sat-solver/Makefile +527 -0
- data/c/link-grammar/sat-solver/Makefile.am +29 -0
- data/c/link-grammar/sat-solver/Makefile.in +527 -0
- data/c/link-grammar/sat-solver/clock.hpp +33 -0
- data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/c/link-grammar/sat-solver/guiding.hpp +244 -0
- data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/c/link-grammar/sat-solver/trie.hpp +118 -0
- data/c/link-grammar/sat-solver/util.cpp +23 -0
- data/c/link-grammar/sat-solver/util.hpp +14 -0
- data/c/link-grammar/sat-solver/variables.cpp +5 -0
- data/c/link-grammar/sat-solver/variables.hpp +829 -0
- data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/c/link-grammar/spellcheck-aspell.c +148 -0
- data/c/link-grammar/spellcheck-hun.c +136 -0
- data/c/link-grammar/spellcheck.h +34 -0
- data/c/link-grammar/string-set.c +169 -0
- data/c/link-grammar/string-set.h +16 -0
- data/c/link-grammar/structures.h +498 -0
- data/c/link-grammar/tokenize.c +1049 -0
- data/c/link-grammar/tokenize.h +15 -0
- data/c/link-grammar/utilities.c +847 -0
- data/c/link-grammar/utilities.h +281 -0
- data/c/link-grammar/word-file.c +124 -0
- data/c/link-grammar/word-file.h +15 -0
- data/c/link-grammar/word-utils.c +526 -0
- data/c/link-grammar/word-utils.h +152 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/grammar_police.gemspec +23 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_police.rb +11 -0
- data/lib/grammar_police/.DS_Store +0 -0
- data/lib/grammar_police/dictionary.rb +30 -0
- data/lib/grammar_police/linkage.rb +26 -0
- data/lib/grammar_police/parse_options.rb +32 -0
- data/lib/grammar_police/sentence.rb +44 -0
- data/lib/grammar_police/version.rb +3 -0
- data/tests/.DS_Store +0 -0
- data/tests/count_linkages.rb +29 -0
- data/tests/sentences.txt +86 -0
- metadata +408 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
void prune(Sentence sent);
|
14
|
+
int power_prune(Sentence sent, int mode, Parse_Options opts);
|
15
|
+
void pp_and_power_prune(Sentence sent, int mode, Parse_Options opts);
|
16
|
+
int prune_match(int dist, Connector * left, Connector * right);
|
17
|
+
void expression_prune(Sentence sent);
|
@@ -0,0 +1,1785 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#include <limits.h>
|
15
|
+
#include <string.h>
|
16
|
+
#include <wchar.h>
|
17
|
+
#include <wctype.h>
|
18
|
+
#include "api.h"
|
19
|
+
#include "disjunct-utils.h"
|
20
|
+
#include "error.h"
|
21
|
+
|
22
|
+
const char * linkgrammar_get_version(void)
|
23
|
+
{
|
24
|
+
const char *s = "link-grammar-" LINK_VERSION_STRING;
|
25
|
+
return s;
|
26
|
+
}
|
27
|
+
|
28
|
+
const char * linkgrammar_get_dict_version(Dictionary dict)
|
29
|
+
{
|
30
|
+
static char * ver = NULL;
|
31
|
+
char * p;
|
32
|
+
Dict_node *dn;
|
33
|
+
Exp *e;
|
34
|
+
|
35
|
+
if (ver) return ver;
|
36
|
+
|
37
|
+
/* The newer dictionaries should contain a macro of the form:
|
38
|
+
* <dictionary-version-number>: V4v6v6+;
|
39
|
+
* which would indicate dictionary verison 4.6.6
|
40
|
+
* Older dictionaries contain no version info.
|
41
|
+
*/
|
42
|
+
dn = dictionary_lookup_list(dict, "<dictionary-version-number>");
|
43
|
+
if (NULL == dn) return "[unknown]";
|
44
|
+
|
45
|
+
e = dn->exp;
|
46
|
+
ver = strdup(&e->u.string[1]);
|
47
|
+
p = strchr(ver, 'v');
|
48
|
+
while (p)
|
49
|
+
{
|
50
|
+
*p = '.';
|
51
|
+
p = strchr(p+1, 'v');
|
52
|
+
}
|
53
|
+
|
54
|
+
free_lookup_list(dn);
|
55
|
+
return ver;
|
56
|
+
}
|
57
|
+
|
58
|
+
|
59
|
+
/*
|
60
|
+
The dictionary format:
|
61
|
+
|
62
|
+
In what follows:
|
63
|
+
Every "%" symbol and everything after it is ignored on every line.
|
64
|
+
Every newline or tab is replaced by a space.
|
65
|
+
|
66
|
+
The dictionary file is a sequence of ENTRIES. Each ENTRY is one or
|
67
|
+
more WORDS (a sequence of upper or lower case letters) separated by
|
68
|
+
spaces, followed by a ":", followed by an EXPRESSION followed by a
|
69
|
+
";". An EXPRESSION is an expression where the operators are "&"
|
70
|
+
or "and" or "|" or "or", and there are three types of parentheses:
|
71
|
+
"()", "{}", and "[]". The terminal symbols of this grammar are the
|
72
|
+
connectors, which are strings of letters or numbers or *s.
|
73
|
+
Expressions may be written in prefix or infix form. In prefix-form,
|
74
|
+
the expressions are lisp-like, with the operators &, | preceeding
|
75
|
+
the operands. In infix-form, the operators are in the middle. The
|
76
|
+
current dictionaries are in infix form. If the C preprocessor
|
77
|
+
constant INFIX_NOTATION is defined, then the dictionary is assumed
|
78
|
+
to be in infix form.
|
79
|
+
|
80
|
+
The connector begins with an optinal @, which is followed by an upper
|
81
|
+
case sequence of letters. Each subsequent *, lower case letter or
|
82
|
+
number is a subscript. At the end is a + or - sign. The "@" allows
|
83
|
+
this connector to attach to one or more other connectors.
|
84
|
+
|
85
|
+
Here is a sample dictionary entry (in infix form):
|
86
|
+
|
87
|
+
gone: T- & {@EV+};
|
88
|
+
|
89
|
+
(See our paper for more about how to interpret the meaning of the
|
90
|
+
dictionary expressions.)
|
91
|
+
|
92
|
+
A previously defined word (such as "gone" above) may be used instead
|
93
|
+
of a connector to specify the expression it was defined to be. Of
|
94
|
+
course, in this case, it must uniquely specify a word in the
|
95
|
+
dictionary, and have been previously defined.
|
96
|
+
|
97
|
+
If a word is of the form "/foo", then the file current-dir/foo
|
98
|
+
is a so-called word file, and is read in as a list of words.
|
99
|
+
A word file is just a list of words separted by blanks or newlines.
|
100
|
+
|
101
|
+
A word that contains the character "_" defines an idiomatic use of
|
102
|
+
the words separated by the "_". For example "kind of" is an idiomatic
|
103
|
+
expression, so a word "kind_of" is defined in the dictionary.
|
104
|
+
Idomatic expressions of any number of words can be defined in this way.
|
105
|
+
When the word "kind" is encountered, all the idiomatic uses of the word
|
106
|
+
are considered.
|
107
|
+
|
108
|
+
An expresion enclosed in "[..]" is give a cost of 1. This means
|
109
|
+
that if any of the connectors inside the square braces are used,
|
110
|
+
a cost of 1 is incurred. (This cost is the first element of the cost
|
111
|
+
vector printed when a sentence is parsed.) Of course if something is
|
112
|
+
inside of 10 levels of "[..]" then using it incurs a cost of 10.
|
113
|
+
These costs are called "disjunct costs". The linkages are printed out
|
114
|
+
in order of non-increasing disjunct cost.
|
115
|
+
|
116
|
+
The expression "(A+ or ())" means that you can choose either "A+" or
|
117
|
+
the empty expression "()", that is, that the connector "A+" is
|
118
|
+
optional. This is more compactly expressed as "{A+}". In other words,
|
119
|
+
curly braces indicate an optional expression.
|
120
|
+
|
121
|
+
The expression "(A+ or [])" is the same as that above, but there is a
|
122
|
+
cost of 1 incurred for choosing not to use "A+". The expression
|
123
|
+
"(EXP1 & [EXP2])" is exactly the same as "[EXP1 & EXP2]". The difference
|
124
|
+
between "({[A+]} & B+)" and "([{A+}] & B+)" is that the latter always
|
125
|
+
incurrs a cost of 1, while the former only gets a cost of 1 if "A+" is
|
126
|
+
used.
|
127
|
+
|
128
|
+
The dictionary writer is not allowed to use connectors that begin in
|
129
|
+
"ID". This is reserved for the connectors automatically
|
130
|
+
generated for idioms.
|
131
|
+
|
132
|
+
Dictionary words may be followed by a dot (period, "."), and a "subscript"
|
133
|
+
identifying the word type. The subscript may be one or more letters or
|
134
|
+
numbers, but must begin with a letter. Currently, the dictionary contains
|
135
|
+
(mostly?) subscripts consisting of a single letter, and these serve mostly
|
136
|
+
to identify the part-of-speech. In general, subscripts can also be used
|
137
|
+
to distinguish different word senses.
|
138
|
+
*/
|
139
|
+
|
140
|
+
static int link_advance(Dictionary dict);
|
141
|
+
|
142
|
+
static void dict_error2(Dictionary dict, const char * s, const char *s2)
|
143
|
+
{
|
144
|
+
int i;
|
145
|
+
char tokens[1024], t[128];
|
146
|
+
|
147
|
+
if (dict->recursive_error) return;
|
148
|
+
dict->recursive_error = TRUE;
|
149
|
+
|
150
|
+
tokens[0] = '\0';
|
151
|
+
for (i=0; i<5 && dict->token[0] != '\0' ; i++)
|
152
|
+
{
|
153
|
+
sprintf(t, "\"%s\" ", dict->token);
|
154
|
+
strcat(tokens, t);
|
155
|
+
link_advance(dict);
|
156
|
+
}
|
157
|
+
if (s2)
|
158
|
+
{
|
159
|
+
err_ctxt ec;
|
160
|
+
ec.sent = NULL;
|
161
|
+
err_msg(&ec, Error, "Error parsing dictionary %s.\n"
|
162
|
+
"%s %s\n\t line %d, tokens = %s\n",
|
163
|
+
dict->name,
|
164
|
+
s, s2, dict->line_number, tokens);
|
165
|
+
}
|
166
|
+
else
|
167
|
+
{
|
168
|
+
err_ctxt ec;
|
169
|
+
ec.sent = NULL;
|
170
|
+
err_msg(&ec, Error, "Error parsing dictionary %s.\n"
|
171
|
+
"%s\n\t line %d, tokens = %s\n",
|
172
|
+
dict->name,
|
173
|
+
s, dict->line_number, tokens);
|
174
|
+
}
|
175
|
+
dict->recursive_error = FALSE;
|
176
|
+
}
|
177
|
+
|
178
|
+
static void dict_error(Dictionary dict, const char * s)
|
179
|
+
{
|
180
|
+
dict_error2(dict, s, NULL);
|
181
|
+
}
|
182
|
+
|
183
|
+
static void warning(Dictionary dict, const char * s)
|
184
|
+
{
|
185
|
+
err_ctxt ec;
|
186
|
+
ec.sent = NULL;
|
187
|
+
err_msg(&ec, Warn, "Warning: %s\n"
|
188
|
+
"\tline %d, current token = \"%s\"\n",
|
189
|
+
s, dict->line_number, dict->token);
|
190
|
+
}
|
191
|
+
|
192
|
+
/**
|
193
|
+
* This gets the next character from the input, eliminating comments.
|
194
|
+
* If we're in quote mode, it does not consider the % character for
|
195
|
+
* comments.
|
196
|
+
*/
|
197
|
+
static wint_t get_character(Dictionary dict, int quote_mode)
|
198
|
+
{
|
199
|
+
wint_t c;
|
200
|
+
|
201
|
+
c = fgetwc(dict->fp);
|
202
|
+
if ((c == '%') && (!quote_mode)) {
|
203
|
+
while((c != WEOF) && (c != '\n')) c = fgetwc(dict->fp);
|
204
|
+
}
|
205
|
+
if (c == '\n') dict->line_number++;
|
206
|
+
return c;
|
207
|
+
}
|
208
|
+
|
209
|
+
|
210
|
+
/*
|
211
|
+
* This set of 10 characters are the ones defining the syntax of the
|
212
|
+
* dictionary.
|
213
|
+
*/
|
214
|
+
#define SPECIAL "(){};[]&|:"
|
215
|
+
|
216
|
+
/**
|
217
|
+
* Return true if the input wide-character is one of the special
|
218
|
+
* characters used to define the syntax of the dictionary.
|
219
|
+
*/
|
220
|
+
static int is_special(wint_t wc, mbstate_t *ps)
|
221
|
+
{
|
222
|
+
char buff[MB_LEN_MAX];
|
223
|
+
int nr = wcrtomb(buff, wc, ps);
|
224
|
+
if (1 != nr) return FALSE;
|
225
|
+
return (NULL != strchr(SPECIAL, buff[0]));
|
226
|
+
}
|
227
|
+
|
228
|
+
/**
|
229
|
+
* This reads the next token from the input into token.
|
230
|
+
* Return 1 if a character was read, else return 0 (and print a warning).
|
231
|
+
*/
|
232
|
+
static int link_advance(Dictionary dict)
|
233
|
+
{
|
234
|
+
wint_t c;
|
235
|
+
int nr, i;
|
236
|
+
int quote_mode;
|
237
|
+
|
238
|
+
dict->is_special = FALSE;
|
239
|
+
|
240
|
+
if (dict->already_got_it != '\0')
|
241
|
+
{
|
242
|
+
dict->is_special = is_special(dict->already_got_it, &dict->mbss);
|
243
|
+
if (dict->already_got_it == WEOF) {
|
244
|
+
dict->token[0] = '\0';
|
245
|
+
} else {
|
246
|
+
dict->token[0] = dict->already_got_it; /* specials are one byte */
|
247
|
+
dict->token[1] = '\0';
|
248
|
+
}
|
249
|
+
dict->already_got_it = '\0';
|
250
|
+
return 1;
|
251
|
+
}
|
252
|
+
|
253
|
+
do { c = get_character(dict, FALSE); } while (iswspace(c));
|
254
|
+
|
255
|
+
quote_mode = FALSE;
|
256
|
+
|
257
|
+
i = 0;
|
258
|
+
for (;;)
|
259
|
+
{
|
260
|
+
if (i > MAX_TOKEN_LENGTH-3) { /* 3 for multi-byte tokens */
|
261
|
+
dict_error(dict, "Token too long");
|
262
|
+
return 0;
|
263
|
+
}
|
264
|
+
if (quote_mode) {
|
265
|
+
if (c == '\"') {
|
266
|
+
quote_mode = FALSE;
|
267
|
+
dict->token[i] = '\0';
|
268
|
+
return 1;
|
269
|
+
}
|
270
|
+
if (iswspace(c)) {
|
271
|
+
dict_error(dict, "White space inside of token");
|
272
|
+
return 0;
|
273
|
+
}
|
274
|
+
|
275
|
+
/* Although we read wide chars, we store UTF8 internally, always. */
|
276
|
+
nr = wcrtomb(&dict->token[i], c, &dict->mbss);
|
277
|
+
if (nr < 0) {
|
278
|
+
#ifndef _WIN32
|
279
|
+
dict_error2(dict, "Unable to read UTF8 string in current locale",
|
280
|
+
nl_langinfo(CODESET));
|
281
|
+
fprintf (stderr, "\tTry setting the locale with \"export LANG=en_US.UTF-8\"\n");
|
282
|
+
#else
|
283
|
+
dict_error(dict, "Unable to read UTF8 string in current locale");
|
284
|
+
#endif
|
285
|
+
return 0;
|
286
|
+
}
|
287
|
+
i += nr;
|
288
|
+
} else {
|
289
|
+
if (is_special(c, &dict->mbss))
|
290
|
+
{
|
291
|
+
if (i == 0)
|
292
|
+
{
|
293
|
+
dict->token[0] = c; /* special toks are one char always */
|
294
|
+
dict->token[1] = '\0';
|
295
|
+
dict->is_special = TRUE;
|
296
|
+
return 1;
|
297
|
+
}
|
298
|
+
dict->token[i] = '\0';
|
299
|
+
dict->already_got_it = c;
|
300
|
+
return 1;
|
301
|
+
}
|
302
|
+
if (c == WEOF) {
|
303
|
+
if (i == 0) {
|
304
|
+
dict->token[0] = '\0';
|
305
|
+
return 1;
|
306
|
+
}
|
307
|
+
dict->token[i] = '\0';
|
308
|
+
dict->already_got_it = c;
|
309
|
+
return 1;
|
310
|
+
}
|
311
|
+
if (iswspace(c)) {
|
312
|
+
dict->token[i] = '\0';
|
313
|
+
return 1;
|
314
|
+
}
|
315
|
+
if (c == '\"') {
|
316
|
+
quote_mode = TRUE;
|
317
|
+
} else {
|
318
|
+
/* store UTF8 internally, always. */
|
319
|
+
nr = wctomb_check(&dict->token[i], c, &dict->mbss);
|
320
|
+
if (nr < 0) {
|
321
|
+
#ifndef _WIN32
|
322
|
+
dict_error2(dict, "Unable to read UTF8 string in current locale",
|
323
|
+
nl_langinfo(CODESET));
|
324
|
+
fprintf (stderr, "\tTry setting the locale with \"export LANG=en_US.UTF-8\"\n");
|
325
|
+
#else
|
326
|
+
dict_error(dict, "Unable to read UTF8 string in current locale");
|
327
|
+
#endif
|
328
|
+
return 0;
|
329
|
+
}
|
330
|
+
i += nr;
|
331
|
+
}
|
332
|
+
}
|
333
|
+
c = get_character(dict, quote_mode);
|
334
|
+
}
|
335
|
+
return 1;
|
336
|
+
}
|
337
|
+
|
338
|
+
/**
|
339
|
+
* Returns TRUE if this token is a special token and it is equal to c
|
340
|
+
*/
|
341
|
+
static int is_equal(Dictionary dict, wint_t c)
|
342
|
+
{
|
343
|
+
return (dict->is_special &&
|
344
|
+
wctob(c) == dict->token[0] &&
|
345
|
+
dict->token[1] == '\0');
|
346
|
+
}
|
347
|
+
|
348
|
+
/**
|
349
|
+
* Make sure the string s is a valid connector.
|
350
|
+
* Return 1 if the connector is valid, else return 0,
|
351
|
+
* and print an appropriate warning message.
|
352
|
+
*/
|
353
|
+
static int check_connector(Dictionary dict, const char * s)
|
354
|
+
{
|
355
|
+
int i;
|
356
|
+
i = strlen(s);
|
357
|
+
if (i < 1) {
|
358
|
+
dict_error(dict, "Expecting a connector.");
|
359
|
+
return 0;
|
360
|
+
}
|
361
|
+
i = s[i-1]; /* the last character of the token */
|
362
|
+
if ((i!='+') && (i!='-')) {
|
363
|
+
dict_error(dict, "A connector must end in a \"+\" or \"-\".");
|
364
|
+
return 0;
|
365
|
+
}
|
366
|
+
if (*s == '@') s++;
|
367
|
+
if (!isupper((int)*s)) {
|
368
|
+
dict_error(dict, "The first letter of a connector must be in [A--Z].");
|
369
|
+
return 0;
|
370
|
+
}
|
371
|
+
if ((*s == 'I') && (*(s+1) == 'D')) {
|
372
|
+
dict_error(dict, "Connectors beginning with \"ID\" are forbidden");
|
373
|
+
return 0;
|
374
|
+
}
|
375
|
+
while (*(s+1)) {
|
376
|
+
if ((!isalnum((int)*s)) && (*s != '*') && (*s != '^')) {
|
377
|
+
dict_error(dict, "All letters of a connector must be ASCII alpha-numeric.");
|
378
|
+
return 0;
|
379
|
+
}
|
380
|
+
s++;
|
381
|
+
}
|
382
|
+
return 1;
|
383
|
+
}
|
384
|
+
|
385
|
+
/* ======================================================================== */
|
386
|
+
/**
|
387
|
+
* Dictionary entry comparison and ordering functions.
|
388
|
+
*
|
389
|
+
* The data structure storing the dictionary is simply a binary tree.
|
390
|
+
* The entries in the binary tree are sorted by alphabetical order.
|
391
|
+
* There is one catch, however: words may have suffixes (a dot, followed
|
392
|
+
* by the suffix), and these suffixes are to be handled appripriately
|
393
|
+
* during sorting and comparison.
|
394
|
+
*
|
395
|
+
* The use of suffixes means that the ordering of the words is not
|
396
|
+
* exactly the order given by strcmp. The order must be such that, for
|
397
|
+
* example, "make" < "make.n" < "make-up" -- suffixed words come after
|
398
|
+
* the bare words, but before any other other words with non-ascii-alpha
|
399
|
+
* characters (such as the hyphen in "make-up", or possibly UTF8
|
400
|
+
* characters). Thus, stright "strcmp" can't be used to determine
|
401
|
+
* dictionary order.
|
402
|
+
*
|
403
|
+
* Thus, a set of specialized string comparison and ordering functions
|
404
|
+
* are provided. These "do the right thing" when matching string with
|
405
|
+
* and without suffixes.
|
406
|
+
*/
|
407
|
+
/**
|
408
|
+
* dict_order - order two dictionary words in proper sort order.
|
409
|
+
* Return zero if the strings match, else return standard
|
410
|
+
* (locale-dependent) UTF8 sort order.
|
411
|
+
*/
|
412
|
+
/* verbose version */
|
413
|
+
/*
|
414
|
+
int dict_order(char *s, char *t)
|
415
|
+
{
|
416
|
+
int ss, tt;
|
417
|
+
while (*s != '\0' && *s == *t) {
|
418
|
+
s++;
|
419
|
+
t++;
|
420
|
+
}
|
421
|
+
if (*s == '.') {
|
422
|
+
ss = 1;
|
423
|
+
} else {
|
424
|
+
ss = (*s)<<1;
|
425
|
+
}
|
426
|
+
if (*t == '.') {
|
427
|
+
tt = 1;
|
428
|
+
} else {
|
429
|
+
tt = (*t)<<1;
|
430
|
+
}
|
431
|
+
return (ss - tt);
|
432
|
+
}
|
433
|
+
*/
|
434
|
+
|
435
|
+
/* terse version */
|
436
|
+
static inline int dict_order(const char *s, const char *t)
|
437
|
+
{
|
438
|
+
while (*s != '\0' && *s == *t) {s++; t++;}
|
439
|
+
return (((*s == '.')?(1):((*s)<<1)) - ((*t == '.')?(1):((*t)<<1)));
|
440
|
+
}
|
441
|
+
|
442
|
+
/**
|
443
|
+
* dict_order_wild() -- order dictionary strings, with wildcard.
|
444
|
+
* Assuming that s is a pointer to a dictionary string, and that
|
445
|
+
* t is a pointer to a search string, this returns 0 if they
|
446
|
+
* match, >0 if s>t, and <0 if s<t.
|
447
|
+
*
|
448
|
+
* The matching is done as follows. Walk down the strings until
|
449
|
+
* you come to the end of one of them, or until you find unequal
|
450
|
+
* characters. A "*" matches anything. Otherwise, replace "."
|
451
|
+
* by "\0", and take the difference. This behavior matches that
|
452
|
+
* of the function dict_order().
|
453
|
+
*/
|
454
|
+
static inline int dict_order_wild(const char * s, const char * t)
|
455
|
+
{
|
456
|
+
while((*s != '\0') && (*s == *t)) {s++; t++;}
|
457
|
+
if ((*s == '*') || (*t == '*')) return 0;
|
458
|
+
return (((*s == '.')?('\0'):(*s)) - ((*t == '.')?('\0'):(*t)));
|
459
|
+
}
|
460
|
+
|
461
|
+
/**
|
462
|
+
* dict_match -- return true if strings match, else false.
|
463
|
+
* A "bare" string (one without a suffix) will match any corresponding
|
464
|
+
* string with a suffix; so, for example, "make" and "make.n" are
|
465
|
+
* a match. If both strings have suffixes, then the suffixes must match.
|
466
|
+
*
|
467
|
+
* A subscript is the part that followes the last "." in the word, and
|
468
|
+
* that does not begin with a digit.
|
469
|
+
*/
|
470
|
+
static int dict_match(const char * s, const char * t)
|
471
|
+
{
|
472
|
+
char *ds, *dt;
|
473
|
+
ds = strrchr(s, '.');
|
474
|
+
dt = strrchr(t, '.');
|
475
|
+
|
476
|
+
/* a dot at the end or a dot followed by a number is NOT
|
477
|
+
* considered a subscript */
|
478
|
+
if ((dt != NULL) && ((*(dt+1) == '\0') ||
|
479
|
+
(isdigit((int)*(dt+1))))) dt = NULL;
|
480
|
+
if ((ds != NULL) && ((*(ds+1) == '\0') ||
|
481
|
+
(isdigit((int)*(ds+1))))) ds = NULL;
|
482
|
+
|
483
|
+
/* dt is NULL when there's no prefix ... */
|
484
|
+
if (dt == NULL && ds != NULL) {
|
485
|
+
if (((int)strlen(t)) > (ds-s)) return FALSE; /* we need to do this to ensure that */
|
486
|
+
return (strncmp(s, t, ds-s) == 0); /* "i.e." does not match "i.e" */
|
487
|
+
} else if (dt != NULL && ds == NULL) {
|
488
|
+
if (((int)strlen(s)) > (dt-t)) return FALSE;
|
489
|
+
return (strncmp(s, t, dt-t) == 0);
|
490
|
+
} else {
|
491
|
+
return (strcmp(s, t) == 0);
|
492
|
+
}
|
493
|
+
}
|
494
|
+
|
495
|
+
/* ======================================================================== */
|
496
|
+
|
497
|
+
static inline Dict_node * dict_node_new(void)
|
498
|
+
{
|
499
|
+
return (Dict_node*) xalloc(sizeof(Dict_node));
|
500
|
+
}
|
501
|
+
|
502
|
+
static inline void free_dict_node(Dict_node *dn)
|
503
|
+
{
|
504
|
+
xfree((char *)dn, sizeof(Dict_node));
|
505
|
+
}
|
506
|
+
|
507
|
+
/**
|
508
|
+
* prune_lookup_list -- discard all list entries that don't match string
|
509
|
+
* Walk the lookup list (of right links), discarding all nodes that do
|
510
|
+
* not match the dictionary string s. The matching is dictionary matching:
|
511
|
+
* suffixed entries will match "bare" entries.
|
512
|
+
*/
|
513
|
+
static Dict_node * prune_lookup_list(Dict_node *llist, const char * s)
|
514
|
+
{
|
515
|
+
Dict_node *dn, *dnx, *list_new;
|
516
|
+
|
517
|
+
list_new = NULL;
|
518
|
+
for (dn = llist; dn != NULL; dn = dnx)
|
519
|
+
{
|
520
|
+
dnx = dn->right;
|
521
|
+
/* now put dn onto the answer list, or free it */
|
522
|
+
if (dict_match(dn->string, s))
|
523
|
+
{
|
524
|
+
dn->right = list_new;
|
525
|
+
list_new = dn;
|
526
|
+
}
|
527
|
+
else
|
528
|
+
{
|
529
|
+
free_dict_node(dn);
|
530
|
+
}
|
531
|
+
}
|
532
|
+
|
533
|
+
/* now reverse the list back */
|
534
|
+
llist = NULL;
|
535
|
+
for (dn = list_new; dn != NULL; dn = dnx)
|
536
|
+
{
|
537
|
+
dnx = dn->right;
|
538
|
+
dn->right = llist;
|
539
|
+
llist = dn;
|
540
|
+
}
|
541
|
+
return llist;
|
542
|
+
}
|
543
|
+
|
544
|
+
void free_lookup_list(Dict_node *llist)
|
545
|
+
{
|
546
|
+
Dict_node * n;
|
547
|
+
while(llist != NULL)
|
548
|
+
{
|
549
|
+
n = llist->right;
|
550
|
+
free_dict_node(llist);
|
551
|
+
llist = n;
|
552
|
+
}
|
553
|
+
}
|
554
|
+
|
555
|
+
static void free_dict_node_recursive(Dict_node * dn)
|
556
|
+
{
|
557
|
+
if (dn == NULL) return;
|
558
|
+
free_dict_node_recursive(dn->left);
|
559
|
+
free_dict_node_recursive(dn->right);
|
560
|
+
free_dict_node(dn);
|
561
|
+
}
|
562
|
+
|
563
|
+
/* ======================================================================== */
|
564
|
+
/**
|
565
|
+
* rdictionary_lookup() -- recursive dictionary lookup
|
566
|
+
* Walk binary tree, given by 'dn', looking for the string 's'.
|
567
|
+
* For every node in the tree where 's' matches (including wildcards)
|
568
|
+
* make a copy of that node, and append it to llist.
|
569
|
+
*/
|
570
|
+
static Dict_node * rdictionary_lookup(Dict_node *llist,
|
571
|
+
Dict_node * dn, const char * s, int match_idiom)
|
572
|
+
{
|
573
|
+
/* see comment in dictionary_lookup below */
|
574
|
+
int m;
|
575
|
+
Dict_node * dn_new;
|
576
|
+
if (dn == NULL) return llist;
|
577
|
+
m = dict_order_wild(s, dn->string);
|
578
|
+
if (m >= 0)
|
579
|
+
{
|
580
|
+
llist = rdictionary_lookup(llist, dn->right, s, match_idiom);
|
581
|
+
}
|
582
|
+
if ((m == 0) && (match_idiom || !is_idiom_word(dn->string)))
|
583
|
+
{
|
584
|
+
dn_new = dict_node_new();
|
585
|
+
*dn_new = *dn;
|
586
|
+
dn_new->right = llist;
|
587
|
+
llist = dn_new;
|
588
|
+
}
|
589
|
+
if (m <= 0)
|
590
|
+
{
|
591
|
+
llist = rdictionary_lookup(llist, dn->left, s, match_idiom);
|
592
|
+
}
|
593
|
+
return llist;
|
594
|
+
}
|
595
|
+
|
596
|
+
/**
|
597
|
+
* dictionary_lookup_list() - return lookup list of words in the dictionary
|
598
|
+
*
|
599
|
+
* Returns a pointer to a lookup list of the words in the dictionary.
|
600
|
+
* Matches include word that appear in idioms. Use
|
601
|
+
* abridged_lookup_list() to obtain matches, excluding idioms.
|
602
|
+
*
|
603
|
+
* This list is made up of Dict_nodes, linked by their right pointers.
|
604
|
+
* The node, file and string fields are copied from the dictionary.
|
605
|
+
*
|
606
|
+
* The returned list must be freed with free_lookup_list().
|
607
|
+
*/
|
608
|
+
Dict_node * dictionary_lookup_list(Dictionary dict, const char *s)
|
609
|
+
{
|
610
|
+
Dict_node * llist = rdictionary_lookup(NULL, dict->root, s, TRUE);
|
611
|
+
llist = prune_lookup_list(llist, s);
|
612
|
+
return llist;
|
613
|
+
}
|
614
|
+
|
615
|
+
/**
|
616
|
+
* abridged_lookup_list() - return lookup list of words in the dictionary
|
617
|
+
*
|
618
|
+
* Returns a pointer to a lookup list of the words in the dictionary.
|
619
|
+
* Excludes any idioms that contain the word; use
|
620
|
+
* dictionary_lookup_list() to obtain the complete list.
|
621
|
+
*
|
622
|
+
* This list is made up of Dict_nodes, linked by their right pointers.
|
623
|
+
* The node, file and string fields are copied from the dictionary.
|
624
|
+
*
|
625
|
+
* The returned list must be freed with free_lookup_list().
|
626
|
+
*/
|
627
|
+
Dict_node * abridged_lookup_list(Dictionary dict, const char *s)
|
628
|
+
{
|
629
|
+
Dict_node *llist;
|
630
|
+
llist = rdictionary_lookup(NULL, dict->root, s, FALSE);
|
631
|
+
llist = prune_lookup_list(llist, s);
|
632
|
+
return llist;
|
633
|
+
}
|
634
|
+
|
635
|
+
int boolean_dictionary_lookup(Dictionary dict, const char *s)
|
636
|
+
{
|
637
|
+
Dict_node *llist = dictionary_lookup_list(dict, s);
|
638
|
+
int boool = (llist != NULL);
|
639
|
+
free_lookup_list(llist);
|
640
|
+
return boool;
|
641
|
+
}
|
642
|
+
|
643
|
+
/* ======================================================================== */
|
644
|
+
/**
|
645
|
+
* Allocate a new Exp node and link it into the exp_list for freeing later.
|
646
|
+
*/
|
647
|
+
Exp * Exp_create(Dictionary dict)
|
648
|
+
{
|
649
|
+
Exp * e;
|
650
|
+
e = (Exp *) xalloc(sizeof(Exp));
|
651
|
+
e->next = dict->exp_list;
|
652
|
+
dict->exp_list = e;
|
653
|
+
return e;
|
654
|
+
}
|
655
|
+
|
656
|
+
static inline void exp_free(Exp * e)
|
657
|
+
{
|
658
|
+
xfree((char *)e, sizeof(Exp));
|
659
|
+
}
|
660
|
+
|
661
|
+
/* ======================================================================== */
|
662
|
+
/**
|
663
|
+
* This creates a node with one child (namely e). Initializes
|
664
|
+
* the cost to zero.
|
665
|
+
*/
|
666
|
+
static Exp * make_unary_node(Dictionary dict, Exp * e)
|
667
|
+
{
|
668
|
+
Exp * n;
|
669
|
+
n = Exp_create(dict);
|
670
|
+
n->type = AND_type; /* these must be AND types */
|
671
|
+
n->cost = 0.0f;
|
672
|
+
n->u.l = (E_list *) xalloc(sizeof(E_list));
|
673
|
+
n->u.l->next = NULL;
|
674
|
+
n->u.l->e = e;
|
675
|
+
return n;
|
676
|
+
}
|
677
|
+
|
678
|
+
/**
|
679
|
+
* connector() -- make a node for a connector or dictionary word.
|
680
|
+
*
|
681
|
+
* Assumes the current token is a connector or dictionary word.
|
682
|
+
*/
|
683
|
+
static Exp * connector(Dictionary dict)
|
684
|
+
{
|
685
|
+
Exp * n;
|
686
|
+
Dict_node *dn, *dn_head;
|
687
|
+
int i;
|
688
|
+
|
689
|
+
i = strlen(dict->token) - 1; /* this must be + or - if a connector */
|
690
|
+
if ((dict->token[i] != '+') && (dict->token[i] != '-'))
|
691
|
+
{
|
692
|
+
/* If we are here, token is a word */
|
693
|
+
dn_head = abridged_lookup_list(dict, dict->token);
|
694
|
+
dn = dn_head;
|
695
|
+
while ((dn != NULL) && (strcmp(dn->string, dict->token) != 0))
|
696
|
+
{
|
697
|
+
dn = dn->right;
|
698
|
+
}
|
699
|
+
if (dn == NULL)
|
700
|
+
{
|
701
|
+
free_lookup_list(dn_head);
|
702
|
+
dict_error(dict, "\nPerhaps missing + or - in a connector.\n"
|
703
|
+
"Or perhaps you forgot the suffix on a word.\n"
|
704
|
+
"Or perhaps a word is used before it is defined.\n");
|
705
|
+
return NULL;
|
706
|
+
}
|
707
|
+
n = make_unary_node(dict, dn->exp);
|
708
|
+
free_lookup_list(dn_head);
|
709
|
+
}
|
710
|
+
else
|
711
|
+
{
|
712
|
+
/* If we are here, token is a connector */
|
713
|
+
if (!check_connector(dict, dict->token))
|
714
|
+
{
|
715
|
+
return NULL;
|
716
|
+
}
|
717
|
+
n = Exp_create(dict);
|
718
|
+
n->dir = dict->token[i];
|
719
|
+
dict->token[i] = '\0'; /* get rid of the + or - */
|
720
|
+
if (dict->token[0] == '@')
|
721
|
+
{
|
722
|
+
n->u.string = string_set_add(dict->token+1, dict->string_set);
|
723
|
+
n->multi = TRUE;
|
724
|
+
}
|
725
|
+
else
|
726
|
+
{
|
727
|
+
n->u.string = string_set_add(dict->token, dict->string_set);
|
728
|
+
n->multi = FALSE;
|
729
|
+
}
|
730
|
+
n->type = CONNECTOR_type;
|
731
|
+
n->cost = 0.0f;
|
732
|
+
}
|
733
|
+
|
734
|
+
if (!link_advance(dict))
|
735
|
+
{
|
736
|
+
exp_free(n);
|
737
|
+
return NULL;
|
738
|
+
}
|
739
|
+
return n;
|
740
|
+
}
|
741
|
+
|
742
|
+
/**
|
743
|
+
* This creates a node with zero children. Initializes
|
744
|
+
* the cost to zero.
|
745
|
+
*/
|
746
|
+
static Exp * make_zeroary_node(Dictionary dict)
|
747
|
+
{
|
748
|
+
Exp * n;
|
749
|
+
n = Exp_create(dict);
|
750
|
+
n->type = AND_type; /* these must be AND types */
|
751
|
+
n->cost = 0.0f;
|
752
|
+
n->u.l = NULL;
|
753
|
+
return n;
|
754
|
+
}
|
755
|
+
|
756
|
+
/**
|
757
|
+
* This creates an OR node with two children, one the given node,
|
758
|
+
* and the other as zeroary node. This has the effect of creating
|
759
|
+
* what used to be called an optional node.
|
760
|
+
*/
|
761
|
+
static Exp * make_optional_node(Dictionary dict, Exp * e)
|
762
|
+
{
|
763
|
+
Exp * n;
|
764
|
+
E_list *el, *elx;
|
765
|
+
n = Exp_create(dict);
|
766
|
+
n->type = OR_type;
|
767
|
+
n->cost = 0.0f;
|
768
|
+
n->u.l = el = (E_list *) xalloc(sizeof(E_list));
|
769
|
+
el->e = make_zeroary_node(dict);
|
770
|
+
el->next = elx = (E_list *) xalloc(sizeof(E_list));
|
771
|
+
elx->next = NULL;
|
772
|
+
elx->e = e;
|
773
|
+
return n;
|
774
|
+
}
|
775
|
+
|
776
|
+
/* ======================================================================== */
|
777
|
+
|
778
|
+
#if ! defined INFIX_NOTATION
|
779
|
+
|
780
|
+
Exp * expression(Dictionary dict);
|
781
|
+
/**
|
782
|
+
* We're looking at the first of the stuff after an "and" or "or".
|
783
|
+
* Build a Exp node for this expression. Set the cost and optional
|
784
|
+
* fields to the default values. Set the type field according to type
|
785
|
+
*/
|
786
|
+
Exp * operator_exp(Dictionary dict, int type)
|
787
|
+
{
|
788
|
+
Exp * n;
|
789
|
+
E_list first;
|
790
|
+
E_list * elist;
|
791
|
+
n = Exp_create(dict);
|
792
|
+
n->type = type;
|
793
|
+
n->cost = 0.0f;
|
794
|
+
elist = &first;
|
795
|
+
while((!is_equal(dict, ')')) && (!is_equal(dict, ']')) && (!is_equal(dict, '}'))) {
|
796
|
+
elist->next = (E_list *) xalloc(sizeof(E_list));
|
797
|
+
elist = elist->next;
|
798
|
+
elist->next = NULL;
|
799
|
+
elist->e = expression(dict);
|
800
|
+
if (elist->e == NULL) {
|
801
|
+
return NULL;
|
802
|
+
}
|
803
|
+
}
|
804
|
+
if (elist == &first) {
|
805
|
+
dict_error(dict, "An \"or\" or \"and\" of nothing");
|
806
|
+
return NULL;
|
807
|
+
}
|
808
|
+
n->u.l = first.next;
|
809
|
+
return n;
|
810
|
+
}
|
811
|
+
|
812
|
+
/**
|
813
|
+
* Looks for the stuff that is allowed to be inside of parentheses
|
814
|
+
* either & or | followed by a list, or a terminal symbol.
|
815
|
+
*/
|
816
|
+
Exp * in_parens(Dictionary dict)
|
817
|
+
{
|
818
|
+
Exp * e;
|
819
|
+
|
820
|
+
if (is_equal(dict, '&') || (strcmp(token, "and")==0)) {
|
821
|
+
if (!link_advance(dict)) {
|
822
|
+
return NULL;
|
823
|
+
}
|
824
|
+
return operator_exp(dict, AND_type);
|
825
|
+
} else if (is_equal(dict, '|') || (strcmp(dict->token, "or")==0)) {
|
826
|
+
if (!link_advance(dict)) {
|
827
|
+
return NULL;
|
828
|
+
}
|
829
|
+
return operator_exp(dict, OR_type);
|
830
|
+
} else {
|
831
|
+
return expression(dict);
|
832
|
+
}
|
833
|
+
}
|
834
|
+
|
835
|
+
/**
|
836
|
+
* Build (and return the root of) the tree for the expression beginning
|
837
|
+
* with the current token. At the end, the token is the first one not
|
838
|
+
* part of this expression.
|
839
|
+
*/
|
840
|
+
Exp * expression(Dictionary dict)
|
841
|
+
{
|
842
|
+
Exp * n;
|
843
|
+
if (is_equal(dict, '(')) {
|
844
|
+
if (!link_advance(dict)) {
|
845
|
+
return NULL;
|
846
|
+
}
|
847
|
+
n = in_parens(dict);
|
848
|
+
if (!is_equal(dict, ')')) {
|
849
|
+
dict_error(dict, "Expecting a \")\".");
|
850
|
+
return NULL;
|
851
|
+
}
|
852
|
+
if (!link_advance(dict)) {
|
853
|
+
return NULL;
|
854
|
+
}
|
855
|
+
} else if (is_equal(dict, '{')) {
|
856
|
+
if (!link_advance(dict)) {
|
857
|
+
return NULL;
|
858
|
+
}
|
859
|
+
n = in_parens(dict);
|
860
|
+
if (!is_equal(dict, '}')) {
|
861
|
+
dict_error(dict, "Expecting a \"}\".");
|
862
|
+
return NULL;
|
863
|
+
}
|
864
|
+
if (!link_advance(dict)) {
|
865
|
+
return NULL;
|
866
|
+
}
|
867
|
+
n = make_optional_node(dict, n);
|
868
|
+
} else if (is_equal(dict, '[')) {
|
869
|
+
if (!link_advance(dict)) {
|
870
|
+
return NULL;
|
871
|
+
}
|
872
|
+
n = in_parens(dict);
|
873
|
+
if (!is_equal(dict, ']')) {
|
874
|
+
dict_error(dict, "Expecting a \"]\".");
|
875
|
+
return NULL;
|
876
|
+
}
|
877
|
+
if (!link_advance(dict)) {
|
878
|
+
return NULL;
|
879
|
+
}
|
880
|
+
n->cost += 1.0f;
|
881
|
+
} else if (!dict->is_special) {
|
882
|
+
n = connector(dict);
|
883
|
+
if (n == NULL) {
|
884
|
+
return NULL;
|
885
|
+
}
|
886
|
+
} else if (is_equal(dict, ')') || is_equal(dict, ']')) {
|
887
|
+
/* allows "()" or "[]" */
|
888
|
+
n = make_zeroary_node(dict);
|
889
|
+
} else {
|
890
|
+
dict_error(dict, "Connector, \"(\", \"[\", or \"{\" expected.");
|
891
|
+
return NULL;
|
892
|
+
}
|
893
|
+
return n;
|
894
|
+
}
|
895
|
+
|
896
|
+
/* ======================================================================== */
|
897
|
+
#else /* This is for infix notation */
|
898
|
+
|
899
|
+
static Exp * restricted_expression(Dictionary dict, int and_ok, int or_ok);
|
900
|
+
|
901
|
+
/**
|
902
|
+
* Build (and return the root of) the tree for the expression beginning
|
903
|
+
* with the current token. At the end, the token is the first one not
|
904
|
+
* part of this expression.
|
905
|
+
*/
|
906
|
+
static Exp * expression(Dictionary dict)
|
907
|
+
{
|
908
|
+
return restricted_expression(dict, TRUE, TRUE);
|
909
|
+
}
|
910
|
+
|
911
|
+
static Exp * restricted_expression(Dictionary dict, int and_ok, int or_ok)
|
912
|
+
{
|
913
|
+
Exp *nl = NULL, *nr;
|
914
|
+
E_list *ell, *elr;
|
915
|
+
|
916
|
+
if (is_equal(dict, '('))
|
917
|
+
{
|
918
|
+
if (!link_advance(dict)) {
|
919
|
+
return NULL;
|
920
|
+
}
|
921
|
+
nl = expression(dict);
|
922
|
+
if (nl == NULL) {
|
923
|
+
return NULL;
|
924
|
+
}
|
925
|
+
if (!is_equal(dict, ')')) {
|
926
|
+
dict_error(dict, "Expecting a \")\".");
|
927
|
+
return NULL;
|
928
|
+
}
|
929
|
+
if (!link_advance(dict)) {
|
930
|
+
return NULL;
|
931
|
+
}
|
932
|
+
}
|
933
|
+
else if (is_equal(dict, '{'))
|
934
|
+
{
|
935
|
+
if (!link_advance(dict)) {
|
936
|
+
return NULL;
|
937
|
+
}
|
938
|
+
nl = expression(dict);
|
939
|
+
if (nl == NULL) {
|
940
|
+
return NULL;
|
941
|
+
}
|
942
|
+
if (!is_equal(dict, '}')) {
|
943
|
+
dict_error(dict, "Expecting a \"}\".");
|
944
|
+
return NULL;
|
945
|
+
}
|
946
|
+
if (!link_advance(dict)) {
|
947
|
+
return NULL;
|
948
|
+
}
|
949
|
+
nl = make_optional_node(dict, nl);
|
950
|
+
}
|
951
|
+
else if (is_equal(dict, '['))
|
952
|
+
{
|
953
|
+
if (!link_advance(dict)) {
|
954
|
+
return NULL;
|
955
|
+
}
|
956
|
+
nl = expression(dict);
|
957
|
+
if (nl == NULL) {
|
958
|
+
return NULL;
|
959
|
+
}
|
960
|
+
if (!is_equal(dict, ']')) {
|
961
|
+
dict_error(dict, "Expecting a \"]\".");
|
962
|
+
return NULL;
|
963
|
+
}
|
964
|
+
if (!link_advance(dict)) {
|
965
|
+
return NULL;
|
966
|
+
}
|
967
|
+
nl->cost += 1.0f;
|
968
|
+
}
|
969
|
+
else if (!dict->is_special)
|
970
|
+
{
|
971
|
+
nl = connector(dict);
|
972
|
+
if (nl == NULL) {
|
973
|
+
return NULL;
|
974
|
+
}
|
975
|
+
}
|
976
|
+
else if (is_equal(dict, ')') || is_equal(dict, ']'))
|
977
|
+
{
|
978
|
+
/* allows "()" or "[]" */
|
979
|
+
nl = make_zeroary_node(dict);
|
980
|
+
}
|
981
|
+
else
|
982
|
+
{
|
983
|
+
dict_error(dict, "Connector, \"(\", \"[\", or \"{\" expected.");
|
984
|
+
return NULL;
|
985
|
+
}
|
986
|
+
|
987
|
+
if (is_equal(dict, '&') || (strcmp(dict->token, "and") == 0))
|
988
|
+
{
|
989
|
+
Exp *n;
|
990
|
+
|
991
|
+
if (!and_ok) {
|
992
|
+
warning(dict, "\"and\" and \"or\" at the same level in an expression");
|
993
|
+
}
|
994
|
+
if (!link_advance(dict)) {
|
995
|
+
return NULL;
|
996
|
+
}
|
997
|
+
nr = restricted_expression(dict, TRUE, FALSE);
|
998
|
+
if (nr == NULL) {
|
999
|
+
return NULL;
|
1000
|
+
}
|
1001
|
+
n = Exp_create(dict);
|
1002
|
+
n->u.l = ell = (E_list *) xalloc(sizeof(E_list));
|
1003
|
+
ell->next = elr = (E_list *) xalloc(sizeof(E_list));
|
1004
|
+
elr->next = NULL;
|
1005
|
+
|
1006
|
+
ell->e = nl;
|
1007
|
+
elr->e = nr;
|
1008
|
+
n->type = AND_type;
|
1009
|
+
n->cost = 0.0f;
|
1010
|
+
return n;
|
1011
|
+
}
|
1012
|
+
else if (is_equal(dict, '|') || (strcmp(dict->token, "or") == 0))
|
1013
|
+
{
|
1014
|
+
Exp *n;
|
1015
|
+
|
1016
|
+
if (!or_ok) {
|
1017
|
+
warning(dict, "\"and\" and \"or\" at the same level in an expression");
|
1018
|
+
}
|
1019
|
+
if (!link_advance(dict)) {
|
1020
|
+
return NULL;
|
1021
|
+
}
|
1022
|
+
nr = restricted_expression(dict, FALSE,TRUE);
|
1023
|
+
if (nr == NULL) {
|
1024
|
+
return NULL;
|
1025
|
+
}
|
1026
|
+
n = Exp_create(dict);
|
1027
|
+
n->u.l = ell = (E_list *) xalloc(sizeof(E_list));
|
1028
|
+
ell->next = elr = (E_list *) xalloc(sizeof(E_list));
|
1029
|
+
elr->next = NULL;
|
1030
|
+
|
1031
|
+
ell->e = nl;
|
1032
|
+
elr->e = nr;
|
1033
|
+
n->type = OR_type;
|
1034
|
+
n->cost = 0.0f;
|
1035
|
+
return n;
|
1036
|
+
}
|
1037
|
+
|
1038
|
+
return nl;
|
1039
|
+
}
|
1040
|
+
|
1041
|
+
#endif
|
1042
|
+
|
1043
|
+
/* ======================================================================== */
|
1044
|
+
/* Tree balancing utilities, used to implement an AVL tree.
|
1045
|
+
* Unfortunately, AVL tree insertion is very slowww, unusably
|
1046
|
+
* slow for creating the dictionary. The code is thus ifdef'ed out
|
1047
|
+
* but is left here for debugging and other sundry purposes.
|
1048
|
+
* A better way to rebalance the tree is the DSW algo, implemented
|
1049
|
+
* further below.
|
1050
|
+
*/
|
1051
|
+
|
1052
|
+
static Dict_node *rotate_right(Dict_node *root)
|
1053
|
+
{
|
1054
|
+
Dict_node *pivot = root->left;
|
1055
|
+
root->left = pivot->right;
|
1056
|
+
pivot->right = root;
|
1057
|
+
return pivot;
|
1058
|
+
}
|
1059
|
+
|
1060
|
+
#ifdef USE_AVL_TREE_FOR_INSERTION
|
1061
|
+
|
1062
|
+
static Dict_node *rotate_left(Dict_node *root)
|
1063
|
+
{
|
1064
|
+
Dict_node *pivot = root->right;
|
1065
|
+
root->right = pivot->left;
|
1066
|
+
pivot->left = root;
|
1067
|
+
return pivot;
|
1068
|
+
}
|
1069
|
+
|
1070
|
+
/* Return tree height. XXX this is not tail-recursive! */
|
1071
|
+
static int tree_depth (Dict_node *n)
|
1072
|
+
{
|
1073
|
+
int l, r;
|
1074
|
+
if (NULL == n) return 0;
|
1075
|
+
if (NULL == n->left) return 1+tree_depth(n->right);
|
1076
|
+
if (NULL == n->right) return 1+tree_depth(n->left);
|
1077
|
+
l = tree_depth(n->left);
|
1078
|
+
r = tree_depth(n->right);
|
1079
|
+
if (l < r) return r+1;
|
1080
|
+
return l+1;
|
1081
|
+
}
|
1082
|
+
|
1083
|
+
static int tree_balance(Dict_node *n)
|
1084
|
+
{
|
1085
|
+
int l = tree_depth(n->left);
|
1086
|
+
int r = tree_depth(n->right);
|
1087
|
+
return r-l;
|
1088
|
+
}
|
1089
|
+
|
1090
|
+
/**
|
1091
|
+
* Rebalance the dictionary tree.
|
1092
|
+
* This recomputes the tree depth wayy too often, but so what.. this
|
1093
|
+
* only wastes cpu time during the initial dictinary read.
|
1094
|
+
*/
|
1095
|
+
static Dict_node *rebalance(Dict_node *root)
|
1096
|
+
{
|
1097
|
+
int bal = tree_balance(root);
|
1098
|
+
if (2 == bal)
|
1099
|
+
{
|
1100
|
+
bal = tree_balance(root->right);
|
1101
|
+
if (-1 == bal)
|
1102
|
+
{
|
1103
|
+
root->right = rotate_right (root->right);
|
1104
|
+
}
|
1105
|
+
return rotate_left(root);
|
1106
|
+
}
|
1107
|
+
else if (-2 == bal)
|
1108
|
+
{
|
1109
|
+
bal = tree_balance(root->left);
|
1110
|
+
if (1 == bal)
|
1111
|
+
{
|
1112
|
+
root->left = rotate_left (root->left);
|
1113
|
+
}
|
1114
|
+
return rotate_right(root);
|
1115
|
+
}
|
1116
|
+
return root;
|
1117
|
+
}
|
1118
|
+
|
1119
|
+
#endif /* USE_AVL_TREE_FOR_INSERTION */
|
1120
|
+
|
1121
|
+
/* ======================================================================== */
|
1122
|
+
/* Implementation of the DSW algo for rebalancing a binary tree.
|
1123
|
+
* The point is -- after building the dictionary tree, we rebalance it
|
1124
|
+
* once at the end. This is a **LOT LOT** quicker than maintaing an
|
1125
|
+
* AVL tree along the way (less than quarter-of-a-second vs. about
|
1126
|
+
* a minute or more!) FWIW, the DSW tree is even more balanced than
|
1127
|
+
* the AVL tree is (its less deep, more full).
|
1128
|
+
*
|
1129
|
+
* The DSW algo, with C++ code, is described in
|
1130
|
+
*
|
1131
|
+
* Timothy J. Rolfe, "One-Time Binary Search Tree Balancing:
|
1132
|
+
* The Day/Stout/Warren (DSW) Algorithm", inroads, Vol. 34, No. 4
|
1133
|
+
* (December 2002), pp. 85-88
|
1134
|
+
* http://penguin.ewu.edu/~trolfe/DSWpaper/
|
1135
|
+
*/
|
1136
|
+
|
1137
|
+
static Dict_node * dsw_tree_to_vine (Dict_node *root)
|
1138
|
+
{
|
1139
|
+
Dict_node *vine_tail, *vine_head, *rest;
|
1140
|
+
Dict_node vh;
|
1141
|
+
|
1142
|
+
vine_head = &vh;
|
1143
|
+
vine_head->left = NULL;
|
1144
|
+
vine_head->right = root;
|
1145
|
+
vine_tail = vine_head;
|
1146
|
+
rest = root;
|
1147
|
+
|
1148
|
+
while (NULL != rest)
|
1149
|
+
{
|
1150
|
+
/* If no left, we are done, do the right */
|
1151
|
+
if (NULL == rest->left)
|
1152
|
+
{
|
1153
|
+
vine_tail = rest;
|
1154
|
+
rest = rest->right;
|
1155
|
+
}
|
1156
|
+
/* eliminate the left subtree */
|
1157
|
+
else
|
1158
|
+
{
|
1159
|
+
rest = rotate_right(rest);
|
1160
|
+
vine_tail->right = rest;
|
1161
|
+
}
|
1162
|
+
}
|
1163
|
+
|
1164
|
+
return vh.right;
|
1165
|
+
}
|
1166
|
+
|
1167
|
+
static void dsw_compression (Dict_node *root, unsigned int count)
|
1168
|
+
{
|
1169
|
+
unsigned int j;
|
1170
|
+
for (j = 0; j < count; j++)
|
1171
|
+
{
|
1172
|
+
/* Compound left rotation */
|
1173
|
+
Dict_node * pivot = root->right;
|
1174
|
+
root->right = pivot->right;
|
1175
|
+
root = pivot->right;
|
1176
|
+
pivot->right = root->left;
|
1177
|
+
root->left = pivot;
|
1178
|
+
}
|
1179
|
+
}
|
1180
|
+
|
1181
|
+
/* Return size of the full portion of the tree
|
1182
|
+
* Gets the next pow(2,k)-1
|
1183
|
+
*/
|
1184
|
+
static inline unsigned int full_tree_size (unsigned int size)
|
1185
|
+
{
|
1186
|
+
unsigned int pk = 1;
|
1187
|
+
while (pk < size) pk = 2*pk + 1;
|
1188
|
+
return pk/2;
|
1189
|
+
}
|
1190
|
+
|
1191
|
+
static Dict_node * dsw_vine_to_tree (Dict_node *root, int size)
|
1192
|
+
{
|
1193
|
+
Dict_node vine_head;
|
1194
|
+
unsigned int full_count = full_tree_size(size +1);
|
1195
|
+
|
1196
|
+
vine_head.left = NULL;
|
1197
|
+
vine_head.right = root;
|
1198
|
+
|
1199
|
+
dsw_compression(&vine_head, size - full_count);
|
1200
|
+
for (size = full_count ; size > 1 ; size /= 2)
|
1201
|
+
{
|
1202
|
+
dsw_compression(&vine_head, size / 2);
|
1203
|
+
}
|
1204
|
+
return vine_head.right;
|
1205
|
+
}
|
1206
|
+
|
1207
|
+
/* ======================================================================== */
|
1208
|
+
/**
|
1209
|
+
* Insert the new node into the dictionary below node n.
|
1210
|
+
* Give error message if the new element's string is already there.
|
1211
|
+
* Assumes that the "n" field of new is already set, and the left
|
1212
|
+
* and right fields of it are NULL.
|
1213
|
+
*
|
1214
|
+
* The resulting tree is highly unbalanced. It needs to be rebalanced
|
1215
|
+
* before used.
|
1216
|
+
*/
|
1217
|
+
Dict_node * insert_dict(Dictionary dict, Dict_node * n, Dict_node * newnode)
|
1218
|
+
{
|
1219
|
+
int comp;
|
1220
|
+
|
1221
|
+
if (NULL == n) return newnode;
|
1222
|
+
|
1223
|
+
comp = dict_order(newnode->string, n->string);
|
1224
|
+
if (comp < 0)
|
1225
|
+
{
|
1226
|
+
if (NULL == n->left)
|
1227
|
+
{
|
1228
|
+
n->left = newnode;
|
1229
|
+
return n;
|
1230
|
+
}
|
1231
|
+
n->left = insert_dict(dict, n->left, newnode);
|
1232
|
+
return n;
|
1233
|
+
/* return rebalance(n); Uncomment to get an AVL tree */
|
1234
|
+
}
|
1235
|
+
else if (comp > 0)
|
1236
|
+
{
|
1237
|
+
if (NULL == n->right)
|
1238
|
+
{
|
1239
|
+
n->right = newnode;
|
1240
|
+
return n;
|
1241
|
+
}
|
1242
|
+
n->right = insert_dict(dict, n->right, newnode);
|
1243
|
+
return n;
|
1244
|
+
/* return rebalance(n); Uncomment to get an AVL tree */
|
1245
|
+
}
|
1246
|
+
else
|
1247
|
+
{
|
1248
|
+
char t[256];
|
1249
|
+
snprintf(t, 256, "The word \"%s\" has been multiply defined\n", newnode->string);
|
1250
|
+
dict_error(dict, t);
|
1251
|
+
return NULL;
|
1252
|
+
}
|
1253
|
+
}
|
1254
|
+
|
1255
|
+
/**
|
1256
|
+
* insert_list() -
|
1257
|
+
* p points to a list of dict_nodes connected by their left pointers.
|
1258
|
+
* l is the length of this list (the last ptr may not be NULL).
|
1259
|
+
* It inserts the list into the dictionary.
|
1260
|
+
* It does the middle one first, then the left half, then the right.
|
1261
|
+
*
|
1262
|
+
* Note: I think this insert middle, then left, then right, has
|
1263
|
+
* its origins as a lame attempt to hack around the fact that the
|
1264
|
+
* resulting binary tree is rather badly unbalanced. This has been
|
1265
|
+
* fixed by using the DSW rebalancing algo. Now, that would seem
|
1266
|
+
* to render this crazy bisected-insertion algo obsoloete, but ..
|
1267
|
+
* oddly enough, it seems to make the DSW balancing go really fast!
|
1268
|
+
* Faster than a simple insertion. Go figure. I think this has
|
1269
|
+
* something to do with the fact that the dictionaries are in
|
1270
|
+
* alphabetical order! This subdivision helps randomize a bit.
|
1271
|
+
*/
|
1272
|
+
static void insert_list(Dictionary dict, Dict_node * p, int l)
|
1273
|
+
{
|
1274
|
+
Dict_node * dn, *dn_head, *dn_second_half;
|
1275
|
+
int k, i; /* length of first half */
|
1276
|
+
|
1277
|
+
if (l == 0) return;
|
1278
|
+
|
1279
|
+
k = (l-1)/2;
|
1280
|
+
dn = p;
|
1281
|
+
for (i = 0; i < k; i++)
|
1282
|
+
{
|
1283
|
+
dn = dn->left;
|
1284
|
+
}
|
1285
|
+
|
1286
|
+
/* dn now points to the middle element */
|
1287
|
+
dn_second_half = dn->left;
|
1288
|
+
dn->left = dn->right = NULL;
|
1289
|
+
|
1290
|
+
if (contains_underbar(dn->string))
|
1291
|
+
{
|
1292
|
+
insert_idiom(dict, dn);
|
1293
|
+
}
|
1294
|
+
else if (is_idiom_word(dn->string))
|
1295
|
+
{
|
1296
|
+
err_ctxt ec;
|
1297
|
+
ec.sent = NULL;
|
1298
|
+
err_msg(&ec, Warn, "Warning: Word \"%s\" found near line %d.\n"
|
1299
|
+
"\tWords ending \".Ix\" (x a number) are reserved for idioms.\n"
|
1300
|
+
"\tThis word will be ignored.\n",
|
1301
|
+
dn->string, dict->line_number);
|
1302
|
+
free_dict_node(dn);
|
1303
|
+
}
|
1304
|
+
else if ((dn_head = abridged_lookup_list(dict, dn->string)) != NULL)
|
1305
|
+
{
|
1306
|
+
Dict_node *dnx;
|
1307
|
+
err_ctxt ec;
|
1308
|
+
ec.sent = NULL;
|
1309
|
+
err_msg(&ec, Warn, "Warning: The word \"%s\" "
|
1310
|
+
"found near line %d of %s matches the following words:\n",
|
1311
|
+
dn->string, dict->line_number, dict->name);
|
1312
|
+
for (dnx = dn_head; dnx != NULL; dnx = dnx->right) {
|
1313
|
+
fprintf(stderr, "\t%s", dnx->string);
|
1314
|
+
}
|
1315
|
+
fprintf(stderr, "\n\tThis word will be ignored.\n");
|
1316
|
+
free_lookup_list(dn_head);
|
1317
|
+
free_dict_node(dn);
|
1318
|
+
}
|
1319
|
+
else
|
1320
|
+
{
|
1321
|
+
dict->root = insert_dict(dict, dict->root, dn);
|
1322
|
+
dict->num_entries++;
|
1323
|
+
}
|
1324
|
+
|
1325
|
+
insert_list(dict, p, k);
|
1326
|
+
insert_list(dict, dn_second_half, l-k-1);
|
1327
|
+
}
|
1328
|
+
|
1329
|
+
/**
|
1330
|
+
* read_entry() -- read one dictionary entry
|
1331
|
+
* Starting with the current token parse one dictionary entry.
|
1332
|
+
* Add these words to the dictionary.
|
1333
|
+
*/
|
1334
|
+
static int read_entry(Dictionary dict)
|
1335
|
+
{
|
1336
|
+
Exp *n;
|
1337
|
+
int i;
|
1338
|
+
|
1339
|
+
Dict_node *dn_new, *dnx, *dn = NULL;
|
1340
|
+
|
1341
|
+
/* Reset multi-byte shift state every line. */
|
1342
|
+
memset(&dict->mbss, 0, sizeof(dict->mbss));
|
1343
|
+
|
1344
|
+
while (!is_equal(dict, ':'))
|
1345
|
+
{
|
1346
|
+
if (dict->is_special)
|
1347
|
+
{
|
1348
|
+
dict_error(dict, "I expected a word but didn\'t get it.");
|
1349
|
+
return 0;
|
1350
|
+
}
|
1351
|
+
|
1352
|
+
/* if it's a word-file name */
|
1353
|
+
/* However, be careful to reject "/.v" which is the division symbol
|
1354
|
+
* used in equations (.v means verb-like) */
|
1355
|
+
if ((dict->token[0] == '/') && (dict->token[1] != '.'))
|
1356
|
+
{
|
1357
|
+
dn = read_word_file(dict, dn, dict->token);
|
1358
|
+
if (dn == NULL)
|
1359
|
+
{
|
1360
|
+
err_ctxt ec;
|
1361
|
+
ec.sent = NULL;
|
1362
|
+
err_msg(&ec, Error, "Error opening word file %s\n", dict->token);
|
1363
|
+
return 0;
|
1364
|
+
}
|
1365
|
+
}
|
1366
|
+
else
|
1367
|
+
{
|
1368
|
+
dn_new = dict_node_new();
|
1369
|
+
dn_new->left = dn;
|
1370
|
+
dn = dn_new;
|
1371
|
+
dn->file = NULL;
|
1372
|
+
dn->string = string_set_add(dict->token, dict->string_set);
|
1373
|
+
}
|
1374
|
+
|
1375
|
+
/* Advance to next entry, unless error */
|
1376
|
+
if (0 == link_advance(dict)) goto syntax_error;
|
1377
|
+
}
|
1378
|
+
|
1379
|
+
/* pass the : */
|
1380
|
+
if (!link_advance(dict))
|
1381
|
+
{
|
1382
|
+
goto syntax_error;
|
1383
|
+
}
|
1384
|
+
|
1385
|
+
n = expression(dict);
|
1386
|
+
if (n == NULL)
|
1387
|
+
{
|
1388
|
+
goto syntax_error;
|
1389
|
+
}
|
1390
|
+
|
1391
|
+
if (!is_equal(dict, ';'))
|
1392
|
+
{
|
1393
|
+
dict_error(dict, "Expecting \";\" at the end of an entry.");
|
1394
|
+
goto syntax_error;
|
1395
|
+
}
|
1396
|
+
|
1397
|
+
/* pass the ; */
|
1398
|
+
if (!link_advance(dict))
|
1399
|
+
{
|
1400
|
+
goto syntax_error;
|
1401
|
+
}
|
1402
|
+
|
1403
|
+
/* At this point, dn points to a list of Dict_nodes connected by
|
1404
|
+
* their left pointers. These are to be inserted into the dictionary */
|
1405
|
+
i = 0;
|
1406
|
+
for (dnx = dn; dnx != NULL; dnx = dnx->left)
|
1407
|
+
{
|
1408
|
+
dnx->exp = n;
|
1409
|
+
i++;
|
1410
|
+
}
|
1411
|
+
insert_list(dict, dn, i);
|
1412
|
+
return 1;
|
1413
|
+
|
1414
|
+
syntax_error:
|
1415
|
+
free_lookup_list(dn);
|
1416
|
+
return 0;
|
1417
|
+
}
|
1418
|
+
|
1419
|
+
#if ! defined INFIX_NOTATION
|
1420
|
+
/**
|
1421
|
+
* print the expression, in prefix-style
|
1422
|
+
*/
|
1423
|
+
void print_expression(Exp * n)
|
1424
|
+
{
|
1425
|
+
E_list * el;
|
1426
|
+
int i, icost;
|
1427
|
+
|
1428
|
+
if (n == NULL)
|
1429
|
+
{
|
1430
|
+
printf("NULL expression");
|
1431
|
+
return;
|
1432
|
+
}
|
1433
|
+
|
1434
|
+
icost = (int) (n->cost);
|
1435
|
+
if (n->type == CONNECTOR_type)
|
1436
|
+
{
|
1437
|
+
for (i=0; i<icost; i++) printf("[");
|
1438
|
+
if (n->multi) printf("@");
|
1439
|
+
printf("%s%c",n->u.string, n->dir);
|
1440
|
+
for (i=0; i<icost; i++) printf("]");
|
1441
|
+
if (icost > 0) printf(" ");
|
1442
|
+
}
|
1443
|
+
else
|
1444
|
+
{
|
1445
|
+
for (i=0; i<icost; i++) printf("[");
|
1446
|
+
if (icost == 0) printf("(");
|
1447
|
+
if (n->type == AND_type) printf("& ");
|
1448
|
+
if (n->type == OR_type) printf("or ");
|
1449
|
+
for (el = n->u.l; el != NULL; el = el->next)
|
1450
|
+
{
|
1451
|
+
print_expression(el->e);
|
1452
|
+
}
|
1453
|
+
for (i=0; i<icost; i++) printf("]");
|
1454
|
+
if (icost > 0) printf(" ");
|
1455
|
+
if (icost == 0) printf(") ");
|
1456
|
+
}
|
1457
|
+
}
|
1458
|
+
|
1459
|
+
#else /* INFIX_NOTATION */
|
1460
|
+
|
1461
|
+
/**
|
1462
|
+
* print the expression, in infix-style
|
1463
|
+
*/
|
1464
|
+
static void print_expression_parens(Exp * n, int need_parens)
|
1465
|
+
{
|
1466
|
+
E_list * el;
|
1467
|
+
int i, icost;
|
1468
|
+
|
1469
|
+
if (n == NULL)
|
1470
|
+
{
|
1471
|
+
printf("NULL expression");
|
1472
|
+
return;
|
1473
|
+
}
|
1474
|
+
|
1475
|
+
icost = (int) (n->cost);
|
1476
|
+
/* print the connector only */
|
1477
|
+
if (n->type == CONNECTOR_type)
|
1478
|
+
{
|
1479
|
+
for (i=0; i<icost; i++) printf("[");
|
1480
|
+
if (n->multi) printf("@");
|
1481
|
+
printf("%s%c",n->u.string, n->dir);
|
1482
|
+
for (i=0; i<icost; i++) printf("]");
|
1483
|
+
return;
|
1484
|
+
}
|
1485
|
+
|
1486
|
+
/* Look for optional, and print only that */
|
1487
|
+
el = n->u.l;
|
1488
|
+
if (el == NULL)
|
1489
|
+
{
|
1490
|
+
for (i=0; i<icost; i++) printf("[");
|
1491
|
+
printf ("()");
|
1492
|
+
for (i=0; i<icost; i++) printf("]");
|
1493
|
+
return;
|
1494
|
+
}
|
1495
|
+
|
1496
|
+
for (i=0; i<icost; i++) printf("[");
|
1497
|
+
if ((n->type == OR_type) &&
|
1498
|
+
el && el->e && (NULL == el->e->u.l))
|
1499
|
+
{
|
1500
|
+
printf ("{");
|
1501
|
+
print_expression_parens(el->next->e, FALSE);
|
1502
|
+
printf ("}");
|
1503
|
+
return;
|
1504
|
+
}
|
1505
|
+
|
1506
|
+
if ((icost == 0) && need_parens) printf("(");
|
1507
|
+
|
1508
|
+
/* print left side of binary expr */
|
1509
|
+
print_expression_parens(el->e, TRUE);
|
1510
|
+
|
1511
|
+
/* get a funny "and optional" when its a named expression thing. */
|
1512
|
+
if ((n->type == AND_type) && (el->next == NULL))
|
1513
|
+
{
|
1514
|
+
return;
|
1515
|
+
}
|
1516
|
+
|
1517
|
+
if (n->type == AND_type) printf(" & ");
|
1518
|
+
if (n->type == OR_type) printf(" or ");
|
1519
|
+
|
1520
|
+
/* print right side of binary expr */
|
1521
|
+
el = el->next;
|
1522
|
+
if (el == NULL)
|
1523
|
+
{
|
1524
|
+
printf ("()");
|
1525
|
+
}
|
1526
|
+
else
|
1527
|
+
{
|
1528
|
+
if (el->e->type == n->type)
|
1529
|
+
{
|
1530
|
+
print_expression_parens(el->e, FALSE);
|
1531
|
+
}
|
1532
|
+
else
|
1533
|
+
{
|
1534
|
+
print_expression_parens(el->e, TRUE);
|
1535
|
+
}
|
1536
|
+
if (el->next != NULL)
|
1537
|
+
printf ("\nERROR! Unexpected list!\n");
|
1538
|
+
}
|
1539
|
+
|
1540
|
+
for (i=0; i<icost; i++) printf("]");
|
1541
|
+
if ((icost == 0) && need_parens) printf(")");
|
1542
|
+
}
|
1543
|
+
|
1544
|
+
void print_expression(Exp * n)
|
1545
|
+
{
|
1546
|
+
print_expression_parens(n, FALSE);
|
1547
|
+
printf("\n");
|
1548
|
+
}
|
1549
|
+
#endif /* INFIX_NOTATION */
|
1550
|
+
|
1551
|
+
static void rprint_dictionary_data(Dict_node * n)
|
1552
|
+
{
|
1553
|
+
if (n == NULL) return;
|
1554
|
+
rprint_dictionary_data(n->left);
|
1555
|
+
printf("%s: ", n->string);
|
1556
|
+
print_expression(n->exp);
|
1557
|
+
printf("\n");
|
1558
|
+
rprint_dictionary_data(n->right);
|
1559
|
+
}
|
1560
|
+
|
1561
|
+
/**
|
1562
|
+
* Dump the entire contents of the dictionary
|
1563
|
+
* XXX This is not currently called by anything, but is a "good thing
|
1564
|
+
* to keep around".
|
1565
|
+
*/
|
1566
|
+
void print_dictionary_data(Dictionary dict)
|
1567
|
+
{
|
1568
|
+
rprint_dictionary_data(dict->root);
|
1569
|
+
}
|
1570
|
+
|
1571
|
+
int read_dictionary(Dictionary dict)
|
1572
|
+
{
|
1573
|
+
if (!link_advance(dict))
|
1574
|
+
{
|
1575
|
+
return 0;
|
1576
|
+
}
|
1577
|
+
while (dict->token[0] != '\0')
|
1578
|
+
{
|
1579
|
+
if (!read_entry(dict))
|
1580
|
+
{
|
1581
|
+
return 0;
|
1582
|
+
}
|
1583
|
+
}
|
1584
|
+
dict->root = dsw_tree_to_vine(dict->root);
|
1585
|
+
dict->root = dsw_vine_to_tree(dict->root, dict->num_entries);
|
1586
|
+
return 1;
|
1587
|
+
}
|
1588
|
+
|
1589
|
+
/* ======================================================================= */
|
1590
|
+
/* the following functions are for handling deletion */
|
1591
|
+
/**
|
1592
|
+
* Returns true if it finds a non-idiom dict_node in a file that matches
|
1593
|
+
* the string s.
|
1594
|
+
*
|
1595
|
+
** note: this now DOES include non-file words in its search.
|
1596
|
+
*
|
1597
|
+
* Also sets parent and to_be_deleted appropriately.
|
1598
|
+
*/
|
1599
|
+
static int find_one_non_idiom_node(Dict_node * p, Dict_node * dn,
|
1600
|
+
const char * s,
|
1601
|
+
Dict_node **parent, Dict_node **to_be_deleted)
|
1602
|
+
{
|
1603
|
+
int m;
|
1604
|
+
if (dn == NULL) return FALSE;
|
1605
|
+
m = dict_order_wild(s, dn->string);
|
1606
|
+
if (m <= 0) {
|
1607
|
+
if (find_one_non_idiom_node(dn,dn->left, s, parent, to_be_deleted)) return TRUE;
|
1608
|
+
}
|
1609
|
+
/* if ((m == 0) && (!is_idiom_word(dn->string)) && (dn->file != NULL)) { */
|
1610
|
+
if ((m == 0) && (!is_idiom_word(dn->string))) {
|
1611
|
+
*to_be_deleted = dn;
|
1612
|
+
*parent = p;
|
1613
|
+
return TRUE;
|
1614
|
+
}
|
1615
|
+
if (m >= 0) {
|
1616
|
+
if (find_one_non_idiom_node(dn,dn->right, s, parent, to_be_deleted)) return TRUE;
|
1617
|
+
}
|
1618
|
+
return FALSE;
|
1619
|
+
}
|
1620
|
+
|
1621
|
+
static void set_parent_of_node(Dictionary dict,
|
1622
|
+
Dict_node *p,
|
1623
|
+
Dict_node * del,
|
1624
|
+
Dict_node * newnode)
|
1625
|
+
{
|
1626
|
+
if (p == NULL) {
|
1627
|
+
dict->root = newnode;
|
1628
|
+
} else {
|
1629
|
+
if (p->left == del) {
|
1630
|
+
p->left = newnode;
|
1631
|
+
} else if (p->right == del) {
|
1632
|
+
p->right = newnode;
|
1633
|
+
} else {
|
1634
|
+
assert(FALSE, "Dictionary broken?");
|
1635
|
+
}
|
1636
|
+
}
|
1637
|
+
}
|
1638
|
+
|
1639
|
+
/**
|
1640
|
+
* This deletes all the non-idiom words of the dictionary that match
|
1641
|
+
* the given string. Returns TRUE if some deleted, FALSE otherwise.
|
1642
|
+
*
|
1643
|
+
* XXX Note: this function is not currently used anywhere in the code,
|
1644
|
+
* but it could be useful for general dictionary editing.
|
1645
|
+
*/
|
1646
|
+
int delete_dictionary_words(Dictionary dict, const char * s)
|
1647
|
+
{
|
1648
|
+
Dict_node *pred, *pred_parent;
|
1649
|
+
Dict_node *parent, *to_be_deleted;
|
1650
|
+
|
1651
|
+
if (!find_one_non_idiom_node(NULL, dict->root, s, &parent, &to_be_deleted)) return FALSE;
|
1652
|
+
for(;;) {
|
1653
|
+
/* now parent and to_be_deleted are set */
|
1654
|
+
if (to_be_deleted->file != NULL) {
|
1655
|
+
to_be_deleted->file->changed = TRUE;
|
1656
|
+
}
|
1657
|
+
if (to_be_deleted->left == NULL) {
|
1658
|
+
set_parent_of_node(dict, parent, to_be_deleted, to_be_deleted->right);
|
1659
|
+
free_dict_node(to_be_deleted);
|
1660
|
+
} else {
|
1661
|
+
pred_parent = to_be_deleted;
|
1662
|
+
pred = to_be_deleted->left;
|
1663
|
+
while(pred->right != NULL) {
|
1664
|
+
pred_parent = pred;
|
1665
|
+
pred = pred->right;
|
1666
|
+
}
|
1667
|
+
to_be_deleted->string = pred->string;
|
1668
|
+
to_be_deleted->file = pred->file;
|
1669
|
+
to_be_deleted->exp = pred->exp;
|
1670
|
+
set_parent_of_node(dict, pred_parent, pred, pred->left);
|
1671
|
+
free_dict_node(pred);
|
1672
|
+
}
|
1673
|
+
if (!find_one_non_idiom_node(NULL, dict->root, s, &parent, &to_be_deleted)) return TRUE;
|
1674
|
+
}
|
1675
|
+
}
|
1676
|
+
|
1677
|
+
static void free_Word_file(Word_file * wf)
|
1678
|
+
{
|
1679
|
+
Word_file *wf1;
|
1680
|
+
|
1681
|
+
for (;wf != NULL; wf = wf1) {
|
1682
|
+
wf1 = wf->next;
|
1683
|
+
xfree((char *) wf, sizeof(Word_file));
|
1684
|
+
}
|
1685
|
+
}
|
1686
|
+
|
1687
|
+
/**
|
1688
|
+
* The following two functions free the Exp s and the
|
1689
|
+
* E_lists of the dictionary. Not to be confused with
|
1690
|
+
* free_E_list in utilities.c
|
1691
|
+
*/
|
1692
|
+
static void free_Elist(E_list * l)
|
1693
|
+
{
|
1694
|
+
E_list * l1;
|
1695
|
+
|
1696
|
+
for (; l != NULL; l = l1) {
|
1697
|
+
l1 = l->next;
|
1698
|
+
xfree(l, sizeof(E_list));
|
1699
|
+
}
|
1700
|
+
}
|
1701
|
+
|
1702
|
+
static void free_Exp_list(Exp * e)
|
1703
|
+
{
|
1704
|
+
Exp * e1;
|
1705
|
+
for (; e != NULL; e = e1)
|
1706
|
+
{
|
1707
|
+
e1 = e->next;
|
1708
|
+
if (e->type != CONNECTOR_type)
|
1709
|
+
{
|
1710
|
+
free_Elist(e->u.l);
|
1711
|
+
}
|
1712
|
+
exp_free(e);
|
1713
|
+
}
|
1714
|
+
}
|
1715
|
+
|
1716
|
+
void free_dictionary(Dictionary dict)
|
1717
|
+
{
|
1718
|
+
free_dict_node_recursive(dict->root);
|
1719
|
+
free_Word_file(dict->word_file_header);
|
1720
|
+
free_Exp_list(dict->exp_list);
|
1721
|
+
}
|
1722
|
+
|
1723
|
+
/**
|
1724
|
+
* dict_display_word_info() - display the information about the given word.
|
1725
|
+
*/
|
1726
|
+
void dict_display_word_info(Dictionary dict, const char * s)
|
1727
|
+
{
|
1728
|
+
Dict_node *dn, *dn_head;
|
1729
|
+
Disjunct * d1, * d2;
|
1730
|
+
int len;
|
1731
|
+
dn_head = dictionary_lookup_list(dict, s);
|
1732
|
+
if (dn_head == NULL)
|
1733
|
+
{
|
1734
|
+
printf(" \"%s\" matches nothing in the dictionary.\n", s);
|
1735
|
+
return;
|
1736
|
+
}
|
1737
|
+
printf("Matches:\n");
|
1738
|
+
for (dn = dn_head; dn != NULL; dn = dn->right)
|
1739
|
+
{
|
1740
|
+
len = 0;
|
1741
|
+
d1 = build_disjuncts_for_dict_node(dn);
|
1742
|
+
for(d2 = d1 ; d2 != NULL; d2 = d2->next)
|
1743
|
+
{
|
1744
|
+
len++;
|
1745
|
+
}
|
1746
|
+
free_disjuncts(d1);
|
1747
|
+
printf(" ");
|
1748
|
+
left_print_string(stdout, dn->string,
|
1749
|
+
" ");
|
1750
|
+
printf(" %5d disjuncts ", len);
|
1751
|
+
if (dn->file != NULL)
|
1752
|
+
{
|
1753
|
+
printf("<%s>", dn->file->file);
|
1754
|
+
}
|
1755
|
+
printf("\n");
|
1756
|
+
}
|
1757
|
+
free_lookup_list(dn_head);
|
1758
|
+
return;
|
1759
|
+
}
|
1760
|
+
|
1761
|
+
/**
|
1762
|
+
* dict_display_word_expr() - display the connector info for a given word.
|
1763
|
+
*/
|
1764
|
+
void dict_display_word_expr(Dictionary dict, const char * s)
|
1765
|
+
{
|
1766
|
+
Dict_node *dn, *dn_head;
|
1767
|
+
|
1768
|
+
dn_head = dictionary_lookup_list(dict, s);
|
1769
|
+
if (dn_head == NULL)
|
1770
|
+
{
|
1771
|
+
printf(" \"%s\" matches nothing in the dictionary.\n", s);
|
1772
|
+
return;
|
1773
|
+
}
|
1774
|
+
printf("\nExpressions:\n");
|
1775
|
+
for (dn = dn_head; dn != NULL; dn = dn->right)
|
1776
|
+
{
|
1777
|
+
printf(" ");
|
1778
|
+
left_print_string(stdout, dn->string,
|
1779
|
+
" ");
|
1780
|
+
print_expression(dn->exp);
|
1781
|
+
printf("\n\n");
|
1782
|
+
}
|
1783
|
+
free_lookup_list(dn_head);
|
1784
|
+
return;
|
1785
|
+
}
|