grammar_police 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/c/.DS_Store +0 -0
- data/c/link-grammar.c +65 -0
- data/c/link-grammar.h +60 -0
- data/c/link-grammar.o +0 -0
- data/c/link-grammar.so +0 -0
- data/c/link-grammar/.DS_Store +0 -0
- data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/c/link-grammar/.deps/and.Plo +202 -0
- data/c/link-grammar/.deps/api.Plo +244 -0
- data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/c/link-grammar/.deps/command-line.Plo +201 -0
- data/c/link-grammar/.deps/constituents.Plo +201 -0
- data/c/link-grammar/.deps/count.Plo +202 -0
- data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/c/link-grammar/.deps/disjuncts.Plo +123 -0
- data/c/link-grammar/.deps/error.Plo +121 -0
- data/c/link-grammar/.deps/expand.Plo +133 -0
- data/c/link-grammar/.deps/extract-links.Plo +198 -0
- data/c/link-grammar/.deps/fast-match.Plo +200 -0
- data/c/link-grammar/.deps/idiom.Plo +200 -0
- data/c/link-grammar/.deps/jni-client.Plo +217 -0
- data/c/link-grammar/.deps/link-parser.Po +1 -0
- data/c/link-grammar/.deps/massage.Plo +202 -0
- data/c/link-grammar/.deps/post-process.Plo +202 -0
- data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/c/link-grammar/.deps/prefix.Plo +102 -0
- data/c/link-grammar/.deps/preparation.Plo +202 -0
- data/c/link-grammar/.deps/print-util.Plo +200 -0
- data/c/link-grammar/.deps/print.Plo +201 -0
- data/c/link-grammar/.deps/prune.Plo +202 -0
- data/c/link-grammar/.deps/read-dict.Plo +223 -0
- data/c/link-grammar/.deps/read-regex.Plo +123 -0
- data/c/link-grammar/.deps/regex-morph.Plo +131 -0
- data/c/link-grammar/.deps/resources.Plo +203 -0
- data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/c/link-grammar/.deps/string-set.Plo +198 -0
- data/c/link-grammar/.deps/tokenize.Plo +160 -0
- data/c/link-grammar/.deps/utilities.Plo +222 -0
- data/c/link-grammar/.deps/word-file.Plo +201 -0
- data/c/link-grammar/.deps/word-utils.Plo +212 -0
- data/c/link-grammar/.libs/analyze-linkage.o +0 -0
- data/c/link-grammar/.libs/and.o +0 -0
- data/c/link-grammar/.libs/api.o +0 -0
- data/c/link-grammar/.libs/build-disjuncts.o +0 -0
- data/c/link-grammar/.libs/command-line.o +0 -0
- data/c/link-grammar/.libs/constituents.o +0 -0
- data/c/link-grammar/.libs/count.o +0 -0
- data/c/link-grammar/.libs/disjunct-utils.o +0 -0
- data/c/link-grammar/.libs/disjuncts.o +0 -0
- data/c/link-grammar/.libs/error.o +0 -0
- data/c/link-grammar/.libs/expand.o +0 -0
- data/c/link-grammar/.libs/extract-links.o +0 -0
- data/c/link-grammar/.libs/fast-match.o +0 -0
- data/c/link-grammar/.libs/idiom.o +0 -0
- data/c/link-grammar/.libs/jni-client.o +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.la +41 -0
- data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/c/link-grammar/.libs/massage.o +0 -0
- data/c/link-grammar/.libs/post-process.o +0 -0
- data/c/link-grammar/.libs/pp_knowledge.o +0 -0
- data/c/link-grammar/.libs/pp_lexer.o +0 -0
- data/c/link-grammar/.libs/pp_linkset.o +0 -0
- data/c/link-grammar/.libs/prefix.o +0 -0
- data/c/link-grammar/.libs/preparation.o +0 -0
- data/c/link-grammar/.libs/print-util.o +0 -0
- data/c/link-grammar/.libs/print.o +0 -0
- data/c/link-grammar/.libs/prune.o +0 -0
- data/c/link-grammar/.libs/read-dict.o +0 -0
- data/c/link-grammar/.libs/read-regex.o +0 -0
- data/c/link-grammar/.libs/regex-morph.o +0 -0
- data/c/link-grammar/.libs/resources.o +0 -0
- data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/c/link-grammar/.libs/string-set.o +0 -0
- data/c/link-grammar/.libs/tokenize.o +0 -0
- data/c/link-grammar/.libs/utilities.o +0 -0
- data/c/link-grammar/.libs/word-file.o +0 -0
- data/c/link-grammar/.libs/word-utils.o +0 -0
- data/c/link-grammar/Makefile +900 -0
- data/c/link-grammar/Makefile.am +202 -0
- data/c/link-grammar/Makefile.in +900 -0
- data/c/link-grammar/analyze-linkage.c +1317 -0
- data/c/link-grammar/analyze-linkage.h +24 -0
- data/c/link-grammar/and.c +1603 -0
- data/c/link-grammar/and.h +27 -0
- data/c/link-grammar/api-structures.h +362 -0
- data/c/link-grammar/api-types.h +72 -0
- data/c/link-grammar/api.c +1887 -0
- data/c/link-grammar/api.h +96 -0
- data/c/link-grammar/autoit/.DS_Store +0 -0
- data/c/link-grammar/autoit/README +10 -0
- data/c/link-grammar/autoit/_LGTest.au3 +22 -0
- data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/c/link-grammar/build-disjuncts.c +487 -0
- data/c/link-grammar/build-disjuncts.h +21 -0
- data/c/link-grammar/command-line.c +458 -0
- data/c/link-grammar/command-line.h +15 -0
- data/c/link-grammar/constituents.c +1836 -0
- data/c/link-grammar/constituents.h +26 -0
- data/c/link-grammar/corpus/.DS_Store +0 -0
- data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/c/link-grammar/corpus/Makefile +527 -0
- data/c/link-grammar/corpus/Makefile.am +46 -0
- data/c/link-grammar/corpus/Makefile.in +527 -0
- data/c/link-grammar/corpus/README +17 -0
- data/c/link-grammar/corpus/cluster.c +286 -0
- data/c/link-grammar/corpus/cluster.h +32 -0
- data/c/link-grammar/corpus/corpus.c +483 -0
- data/c/link-grammar/corpus/corpus.h +46 -0
- data/c/link-grammar/count.c +828 -0
- data/c/link-grammar/count.h +25 -0
- data/c/link-grammar/disjunct-utils.c +261 -0
- data/c/link-grammar/disjunct-utils.h +27 -0
- data/c/link-grammar/disjuncts.c +138 -0
- data/c/link-grammar/disjuncts.h +13 -0
- data/c/link-grammar/error.c +92 -0
- data/c/link-grammar/error.h +35 -0
- data/c/link-grammar/expand.c +67 -0
- data/c/link-grammar/expand.h +13 -0
- data/c/link-grammar/externs.h +22 -0
- data/c/link-grammar/extract-links.c +625 -0
- data/c/link-grammar/extract-links.h +16 -0
- data/c/link-grammar/fast-match.c +309 -0
- data/c/link-grammar/fast-match.h +17 -0
- data/c/link-grammar/idiom.c +373 -0
- data/c/link-grammar/idiom.h +15 -0
- data/c/link-grammar/jni-client.c +779 -0
- data/c/link-grammar/jni-client.h +236 -0
- data/c/link-grammar/liblink-grammar-java.la +42 -0
- data/c/link-grammar/liblink-grammar.la +41 -0
- data/c/link-grammar/link-features.h +37 -0
- data/c/link-grammar/link-features.h.in +37 -0
- data/c/link-grammar/link-grammar-java.def +31 -0
- data/c/link-grammar/link-grammar.def +194 -0
- data/c/link-grammar/link-includes.h +465 -0
- data/c/link-grammar/link-parser.c +849 -0
- data/c/link-grammar/massage.c +329 -0
- data/c/link-grammar/massage.h +13 -0
- data/c/link-grammar/post-process.c +1113 -0
- data/c/link-grammar/post-process.h +45 -0
- data/c/link-grammar/pp_knowledge.c +376 -0
- data/c/link-grammar/pp_knowledge.h +14 -0
- data/c/link-grammar/pp_lexer.c +1920 -0
- data/c/link-grammar/pp_lexer.h +19 -0
- data/c/link-grammar/pp_linkset.c +158 -0
- data/c/link-grammar/pp_linkset.h +20 -0
- data/c/link-grammar/prefix.c +482 -0
- data/c/link-grammar/prefix.h +139 -0
- data/c/link-grammar/preparation.c +412 -0
- data/c/link-grammar/preparation.h +20 -0
- data/c/link-grammar/print-util.c +87 -0
- data/c/link-grammar/print-util.h +32 -0
- data/c/link-grammar/print.c +1085 -0
- data/c/link-grammar/print.h +16 -0
- data/c/link-grammar/prune.c +1864 -0
- data/c/link-grammar/prune.h +17 -0
- data/c/link-grammar/read-dict.c +1785 -0
- data/c/link-grammar/read-dict.h +29 -0
- data/c/link-grammar/read-regex.c +161 -0
- data/c/link-grammar/read-regex.h +12 -0
- data/c/link-grammar/regex-morph.c +126 -0
- data/c/link-grammar/regex-morph.h +17 -0
- data/c/link-grammar/resources.c +180 -0
- data/c/link-grammar/resources.h +23 -0
- data/c/link-grammar/sat-solver/.DS_Store +0 -0
- data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/c/link-grammar/sat-solver/Makefile +527 -0
- data/c/link-grammar/sat-solver/Makefile.am +29 -0
- data/c/link-grammar/sat-solver/Makefile.in +527 -0
- data/c/link-grammar/sat-solver/clock.hpp +33 -0
- data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/c/link-grammar/sat-solver/guiding.hpp +244 -0
- data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/c/link-grammar/sat-solver/trie.hpp +118 -0
- data/c/link-grammar/sat-solver/util.cpp +23 -0
- data/c/link-grammar/sat-solver/util.hpp +14 -0
- data/c/link-grammar/sat-solver/variables.cpp +5 -0
- data/c/link-grammar/sat-solver/variables.hpp +829 -0
- data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/c/link-grammar/spellcheck-aspell.c +148 -0
- data/c/link-grammar/spellcheck-hun.c +136 -0
- data/c/link-grammar/spellcheck.h +34 -0
- data/c/link-grammar/string-set.c +169 -0
- data/c/link-grammar/string-set.h +16 -0
- data/c/link-grammar/structures.h +498 -0
- data/c/link-grammar/tokenize.c +1049 -0
- data/c/link-grammar/tokenize.h +15 -0
- data/c/link-grammar/utilities.c +847 -0
- data/c/link-grammar/utilities.h +281 -0
- data/c/link-grammar/word-file.c +124 -0
- data/c/link-grammar/word-file.h +15 -0
- data/c/link-grammar/word-utils.c +526 -0
- data/c/link-grammar/word-utils.h +152 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/grammar_police.gemspec +23 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_police.rb +11 -0
- data/lib/grammar_police/.DS_Store +0 -0
- data/lib/grammar_police/dictionary.rb +30 -0
- data/lib/grammar_police/linkage.rb +26 -0
- data/lib/grammar_police/parse_options.rb +32 -0
- data/lib/grammar_police/sentence.rb +44 -0
- data/lib/grammar_police/version.rb +3 -0
- data/tests/.DS_Store +0 -0
- data/tests/count_linkages.rb +29 -0
- data/tests/sentences.txt +86 -0
- metadata +408 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2009 Linas Vepstas */
|
3
|
+
/* Copyright (c) 2009 Vikas N. Kumar */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#ifndef SPELLCHECK_HUN_C
|
15
|
+
#define
|
16
|
+
|
17
|
+
#include <stdio.h>
|
18
|
+
#include <stdlib.h>
|
19
|
+
#include "link-includes.h"
|
20
|
+
#include "spellcheck.h"
|
21
|
+
#include "utilities.h" /* For Win32 compatibility */
|
22
|
+
|
23
|
+
#ifdef HAVE_HUNSPELL
|
24
|
+
|
25
|
+
#ifndef HUNSPELL_DICT_DIR
|
26
|
+
#define HUNSPELL_DICT_DIR (char *)0
|
27
|
+
#endif /* HUNSPELL_DICT_DIR */
|
28
|
+
|
29
|
+
static const char *hunspell_dict_dirs[] = {
|
30
|
+
"/usr/share/myspell/dicts",
|
31
|
+
"/usr/share/hunspell/dicts",
|
32
|
+
"/usr/local/share/myspell/dicts",
|
33
|
+
"/usr/local/share/hunspell/dicts",
|
34
|
+
"/usr/share/myspell",
|
35
|
+
"/usr/share/hunspell",
|
36
|
+
"/usr/local/share/myspell",
|
37
|
+
"/usr/local/share/hunspell",
|
38
|
+
HUNSPELL_DICT_DIR
|
39
|
+
};
|
40
|
+
|
41
|
+
static const char *spellcheck_lang_mapping[] = {
|
42
|
+
"en" /* link-grammar language */, "en-US" /* hunspell filename */,
|
43
|
+
"en" /* link-grammar language */, "en_US" /* hunspell filename */
|
44
|
+
};
|
45
|
+
|
46
|
+
#define FPATHLEN 256
|
47
|
+
static char hunspell_aff_file[FPATHLEN];
|
48
|
+
static char hunspell_dic_file[FPATHLEN];
|
49
|
+
|
50
|
+
#include <hunspell.h>
|
51
|
+
#include <string.h>
|
52
|
+
|
53
|
+
void * spellcheck_create(const char * lang)
|
54
|
+
{
|
55
|
+
size_t i = 0, j = 0;
|
56
|
+
Hunhandle *h = NULL;
|
57
|
+
|
58
|
+
memset(hunspell_aff_file, 0, FPATHLEN);
|
59
|
+
memset(hunspell_dic_file, 0, FPATHLEN);
|
60
|
+
for (i = 0; i < sizeof(spellcheck_lang_mapping)/sizeof(char *); i += 2)
|
61
|
+
{
|
62
|
+
if (0 != strcmp(lang, spellcheck_lang_mapping[i])) continue;
|
63
|
+
|
64
|
+
/* check in each hunspell_dict_dir if the files exist */
|
65
|
+
for (j = 0; j < sizeof(hunspell_dict_dirs)/sizeof(char *); ++j)
|
66
|
+
{
|
67
|
+
FILE *fh;
|
68
|
+
/* if the directory name is NULL then ignore */
|
69
|
+
if (hunspell_dict_dirs[j] == NULL) continue;
|
70
|
+
|
71
|
+
snprintf(hunspell_aff_file, FPATHLEN, "%s/%s.aff", hunspell_dict_dirs[j],
|
72
|
+
spellcheck_lang_mapping[i+1]);
|
73
|
+
snprintf(hunspell_dic_file, FPATHLEN, "%s/%s.dic", hunspell_dict_dirs[j],
|
74
|
+
spellcheck_lang_mapping[i+1]);
|
75
|
+
|
76
|
+
/* Some versions of Hunspell_create() will succeed even if
|
77
|
+
* there are no dictionary files. So test for permissions.
|
78
|
+
*/
|
79
|
+
fh = fopen(hunspell_aff_file, "r");
|
80
|
+
if (fh) fclose (fh);
|
81
|
+
else continue;
|
82
|
+
|
83
|
+
fh = fopen(hunspell_dic_file, "r");
|
84
|
+
if (fh) fclose (fh);
|
85
|
+
else continue;
|
86
|
+
|
87
|
+
h = Hunspell_create(hunspell_aff_file, hunspell_dic_file);
|
88
|
+
/* if hunspell handle was created break from loop */
|
89
|
+
if (h != NULL)
|
90
|
+
break;
|
91
|
+
}
|
92
|
+
/* if hunspell handle was created break from loop */
|
93
|
+
if (h != NULL) break;
|
94
|
+
}
|
95
|
+
return h;
|
96
|
+
}
|
97
|
+
|
98
|
+
void spellcheck_destroy(void * chk)
|
99
|
+
{
|
100
|
+
Hunhandle *h = (Hunhandle *) chk;
|
101
|
+
Hunspell_destroy(h);
|
102
|
+
}
|
103
|
+
|
104
|
+
/**
|
105
|
+
* Return boolean: 1 if spelling looks good, else zero
|
106
|
+
*/
|
107
|
+
int spellcheck_test(void * chk, const char * word)
|
108
|
+
{
|
109
|
+
if (NULL == chk)
|
110
|
+
{
|
111
|
+
prt_error("Error: no spell-check handle specified!\n");
|
112
|
+
return 0;
|
113
|
+
}
|
114
|
+
|
115
|
+
return Hunspell_spell((Hunhandle *)chk, word);
|
116
|
+
}
|
117
|
+
|
118
|
+
int spellcheck_suggest(void * chk, char ***sug, const char * word)
|
119
|
+
{
|
120
|
+
if (NULL == chk)
|
121
|
+
{
|
122
|
+
prt_error("Error: no spell-check handle specified!\n");
|
123
|
+
return 0;
|
124
|
+
}
|
125
|
+
|
126
|
+
return Hunspell_suggest((Hunhandle *)chk, sug, word);
|
127
|
+
}
|
128
|
+
|
129
|
+
void spellcheck_free_suggest(char **sug, int size)
|
130
|
+
{
|
131
|
+
free(sug);
|
132
|
+
}
|
133
|
+
|
134
|
+
#endif /* #ifdef HAVE_HUNSPELL */
|
135
|
+
|
136
|
+
#endif
|
@@ -0,0 +1,34 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2009 Linas Vepstas */
|
3
|
+
/* All rights reserved */
|
4
|
+
/* */
|
5
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
6
|
+
/* license set forth in the LICENSE file included with this software, */
|
7
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
8
|
+
/* This license allows free redistribution and use in source and binary */
|
9
|
+
/* forms, with or without modification, subject to certain conditions. */
|
10
|
+
/* */
|
11
|
+
/*************************************************************************/
|
12
|
+
|
13
|
+
#ifndef SPELLCHECK_H
|
14
|
+
#define SPELLCHECK_H
|
15
|
+
#if (defined HAVE_HUNSPELL) || (defined HAVE_ASPELL)
|
16
|
+
|
17
|
+
void * spellcheck_create(const char * lang);
|
18
|
+
void spellcheck_destroy(void *);
|
19
|
+
int spellcheck_test(void *, const char * word);
|
20
|
+
int spellcheck_suggest(void * chk, char ***sug, const char * word);
|
21
|
+
void spellcheck_free_suggest(char **sug, int size);
|
22
|
+
|
23
|
+
#else
|
24
|
+
|
25
|
+
#include "utilities.h" /* For MSVC inline portability */
|
26
|
+
|
27
|
+
static inline void * spellcheck_create(const char * lang) { return NULL; }
|
28
|
+
static inline void spellcheck_destroy(void * chk) {}
|
29
|
+
static inline int spellcheck_test(void * chk, const char * word) { return 0; }
|
30
|
+
static inline int spellcheck_suggest(void * chk, char ***sug, const char * word) { return 0; }
|
31
|
+
static inline void spellcheck_free_suggest(char **sug, int size) {}
|
32
|
+
|
33
|
+
#endif
|
34
|
+
#endif //endif SPELLCHECK_H
|
@@ -0,0 +1,169 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#include "api.h"
|
15
|
+
|
16
|
+
/**
|
17
|
+
* Suppose you have a program that generates strings and keeps pointers to them.
|
18
|
+
The program never needs to change these strings once they're generated.
|
19
|
+
If it generates the same string again, then it can reuse the one it
|
20
|
+
generated before. This is what this package supports.
|
21
|
+
|
22
|
+
String_set is the object. The functions are:
|
23
|
+
|
24
|
+
char * string_set_add(char * source_string, String_set * ss);
|
25
|
+
This function returns a pointer to a string with the same
|
26
|
+
contents as the source_string. If that string is already
|
27
|
+
in the table, then it uses that copy, otherwise it generates
|
28
|
+
and inserts a new one.
|
29
|
+
|
30
|
+
char * string_set_lookup(char * source_string, String_set * ss);
|
31
|
+
This function returns a pointer to a string with the same
|
32
|
+
contents as the source_string. If that string is not already
|
33
|
+
in the table, returns NULL;
|
34
|
+
|
35
|
+
String_set * string_set_create(void);
|
36
|
+
Create a new empty String_set.
|
37
|
+
|
38
|
+
string_set_delete(String_set *ss);
|
39
|
+
Free all the space associated with this string set.
|
40
|
+
|
41
|
+
The implementation uses probed hashing (i.e. not bucket).
|
42
|
+
*/
|
43
|
+
|
44
|
+
static int hash_string(const char *sa, const String_set *ss)
|
45
|
+
{
|
46
|
+
unsigned char *str = (unsigned char *) sa;
|
47
|
+
unsigned int accum = 0;
|
48
|
+
for (;*str != '\0'; str++) accum = ((256*accum) + (*str)) % (ss->size);
|
49
|
+
return accum;
|
50
|
+
}
|
51
|
+
|
52
|
+
static int stride_hash_string(const char *sa, const String_set *ss)
|
53
|
+
{
|
54
|
+
unsigned char *str = (unsigned char *) sa;
|
55
|
+
/* This is the stride used, so we have to make sure that its value is not 0 */
|
56
|
+
unsigned int accum = 0;
|
57
|
+
for (;*str != '\0'; str++) accum = ((17*accum) + (*str)) % (ss->size);
|
58
|
+
if (accum == 0) accum = 1;
|
59
|
+
return accum;
|
60
|
+
}
|
61
|
+
|
62
|
+
/* return the next prime up from start */
|
63
|
+
static int next_prime_up(int start)
|
64
|
+
{
|
65
|
+
int i;
|
66
|
+
start = start | 1; /* make it odd */
|
67
|
+
for (;;) {
|
68
|
+
for (i=3; (i <= (start/i)); i += 2) {
|
69
|
+
if (start % i == 0) break;
|
70
|
+
}
|
71
|
+
if (start % i == 0) {
|
72
|
+
start += 2;
|
73
|
+
} else {
|
74
|
+
return start;
|
75
|
+
}
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
String_set * string_set_create(void)
|
80
|
+
{
|
81
|
+
String_set *ss;
|
82
|
+
int i;
|
83
|
+
ss = (String_set *) xalloc(sizeof(String_set));
|
84
|
+
ss->size = next_prime_up(100);
|
85
|
+
ss->table = (char **) xalloc(ss->size * sizeof(char *));
|
86
|
+
ss->count = 0;
|
87
|
+
for (i=0; i<ss->size; i++) ss->table[i] = NULL;
|
88
|
+
return ss;
|
89
|
+
}
|
90
|
+
|
91
|
+
/**
|
92
|
+
* lookup the given string in the table. Return a pointer
|
93
|
+
* to the place it is, or the place where it should be.
|
94
|
+
*/
|
95
|
+
static int find_place(const char * str, String_set *ss)
|
96
|
+
{
|
97
|
+
int h, s, i;
|
98
|
+
h = hash_string(str, ss);
|
99
|
+
s = stride_hash_string(str, ss);
|
100
|
+
for (i=h; 1; i = (i + s)%(ss->size)) {
|
101
|
+
if ((ss->table[i] == NULL) || (strcmp(ss->table[i], str) == 0)) return i;
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
static void grow_table(String_set *ss)
|
106
|
+
{
|
107
|
+
String_set old;
|
108
|
+
int i, p;
|
109
|
+
|
110
|
+
old = *ss;
|
111
|
+
ss->size = next_prime_up(2 * old.size); /* at least double the size */
|
112
|
+
ss->table = (char **) xalloc(ss->size * sizeof(char *));
|
113
|
+
ss->count = 0;
|
114
|
+
for (i=0; i<ss->size; i++) ss->table[i] = NULL;
|
115
|
+
for (i=0; i<old.size; i++) {
|
116
|
+
if (old.table[i] != NULL) {
|
117
|
+
p = find_place(old.table[i], ss);
|
118
|
+
ss->table[p] = old.table[i];
|
119
|
+
ss->count++;
|
120
|
+
}
|
121
|
+
}
|
122
|
+
/*printf("growing from %d to %d\n", old.size, ss->size);*/
|
123
|
+
fflush(stdout);
|
124
|
+
xfree((char *) old.table, old.size * sizeof(char *));
|
125
|
+
}
|
126
|
+
|
127
|
+
const char * string_set_add(const char * source_string, String_set * ss)
|
128
|
+
{
|
129
|
+
char * str;
|
130
|
+
int len, p;
|
131
|
+
|
132
|
+
assert(source_string != NULL, "STRING_SET: Can't insert a null string");
|
133
|
+
|
134
|
+
p = find_place(source_string, ss);
|
135
|
+
if (ss->table[p] != NULL) return ss->table[p];
|
136
|
+
|
137
|
+
len = strlen(source_string);
|
138
|
+
str = (char *) xalloc(len+1);
|
139
|
+
strcpy(str, source_string);
|
140
|
+
ss->table[p] = str;
|
141
|
+
ss->count++;
|
142
|
+
|
143
|
+
/* We just added it to the table.
|
144
|
+
If the table got too big, we grow it.
|
145
|
+
Too big is defined as being more than 3/4 full */
|
146
|
+
if ((4 * ss->count) > (3 * ss->size)) grow_table(ss);
|
147
|
+
|
148
|
+
return str;
|
149
|
+
}
|
150
|
+
|
151
|
+
const char * string_set_lookup(const char * source_string, String_set * ss)
|
152
|
+
{
|
153
|
+
int p;
|
154
|
+
|
155
|
+
p = find_place(source_string, ss);
|
156
|
+
return ss->table[p];
|
157
|
+
}
|
158
|
+
|
159
|
+
void string_set_delete(String_set *ss)
|
160
|
+
{
|
161
|
+
int i;
|
162
|
+
|
163
|
+
if (ss == NULL) return;
|
164
|
+
for (i=0; i<ss->size; i++) {
|
165
|
+
if (ss->table[i] != NULL) xfree(ss->table[i], strlen(ss->table[i]) + 1);
|
166
|
+
}
|
167
|
+
xfree((char *) ss->table, ss->size * sizeof(char *));
|
168
|
+
xfree((char *) ss, sizeof(String_set));
|
169
|
+
}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
String_set * string_set_create(void);
|
14
|
+
const char * string_set_add(const char * source_string, String_set * ss);
|
15
|
+
const char * string_set_lookup(const char * source_string, String_set * ss);
|
16
|
+
void string_set_delete(String_set *ss);
|
@@ -0,0 +1,498 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#ifndef _STRUCTURES_H_
|
15
|
+
#define _STRUCTURES_H_
|
16
|
+
|
17
|
+
#include "api-types.h"
|
18
|
+
#include "utilities.h" /* Needed for inline defn in Windows */
|
19
|
+
|
20
|
+
/*
|
21
|
+
Global variable descriptions
|
22
|
+
-- Most of these global variables have been eliminated.
|
23
|
+
I've left this comment here for historical purposes --DS 4/98
|
24
|
+
|
25
|
+
N_words:
|
26
|
+
The number of words in the current sentence. Computed by
|
27
|
+
separate_sentence().
|
28
|
+
|
29
|
+
N_links:
|
30
|
+
The number of links in the current linkage. Computed by
|
31
|
+
extract_linkage().
|
32
|
+
|
33
|
+
sentence[].string:
|
34
|
+
Contains a slightly modified form of the words typed by the user.
|
35
|
+
Computed by separate_sentence().
|
36
|
+
|
37
|
+
sentence[].x:
|
38
|
+
Contains, for each word, a pointer to a list of expressions from the
|
39
|
+
dictionary that match the word in sentence[].string.
|
40
|
+
Computed by build_sentence_expressions().
|
41
|
+
|
42
|
+
sentence[].d
|
43
|
+
Contains for each word, a pointer to a list of disjuncts for this word.
|
44
|
+
Computed by: parepare_to_parse(), but modified by pruning and power
|
45
|
+
pruning.
|
46
|
+
|
47
|
+
link_array[]
|
48
|
+
This is an array of links. These links define the current linkage.
|
49
|
+
It is computed by extract_links(). It is used by analyze_linkage() to
|
50
|
+
compute pp_linkage[]. It may contain fat links.
|
51
|
+
|
52
|
+
pp_link_array[] ** eliminated (ALB)
|
53
|
+
Another array of links. Here all fat links have been expunged.
|
54
|
+
It is computed by analyze_linkage(), and used by post_process() and by
|
55
|
+
print_links();
|
56
|
+
|
57
|
+
chosen_disjuncts[]
|
58
|
+
This is an array pointers to disjuncts, one for each word, that is
|
59
|
+
computed by extract_links(). It represents the chosen disjuncts for the
|
60
|
+
current linkage. It is used to compute the cost of the linkage, and
|
61
|
+
also by compute_chosen_words() to compute the chosen_words[].
|
62
|
+
|
63
|
+
chosen_words[]
|
64
|
+
An array of pointers to strings. These are the words to be displayed
|
65
|
+
when printing the solution, the links, etc. Computed as a function of
|
66
|
+
chosen_disjuncts[] by compute_chosen_words(). This differs from
|
67
|
+
sentence[].string because it contains the suffixes. It differs from
|
68
|
+
chosen_disjunct[].string in that the idiom symbols have been removed.
|
69
|
+
|
70
|
+
has_fat_down[]
|
71
|
+
An array of chars, one for each word. TRUE if there is a fat link
|
72
|
+
down from this word, FALSE otherwise. (Only set if there is at least
|
73
|
+
one fat link.) Set by set_has_fat_down_array() and used by
|
74
|
+
analyze_linkage() and is_canonical().
|
75
|
+
|
76
|
+
is_conjunction[]
|
77
|
+
An array of chars, one for each word. TRUE if the word is a conjunction
|
78
|
+
("and", "or", "nor", or "but" at the moment). False otherwise.
|
79
|
+
*/
|
80
|
+
|
81
|
+
|
82
|
+
#define NEGATIVECOST -1000000
|
83
|
+
/* This is a hack that allows one to discard disjuncts containing
|
84
|
+
connectors whose cost is greater than given a bound. This number plus
|
85
|
+
the cost of any connectors on a disjunct must remain negative, and
|
86
|
+
this number multiplied times the number of costly connectors on any
|
87
|
+
disjunct must fit into an integer. */
|
88
|
+
|
89
|
+
/* Upper bound on the cost of any connector. */
|
90
|
+
#define MAX_CONNECTOR_COST 1000.0f
|
91
|
+
|
92
|
+
#define LEFT_WALL_DISPLAY ("LEFT-WALL") /* the string to use to show the wall */
|
93
|
+
#define LEFT_WALL_SUPPRESS ("Wd") /* If this connector is used on the wall, */
|
94
|
+
/* then suppress the display of the wall */
|
95
|
+
/* bogus name to prevent ever suppressing */
|
96
|
+
#define RIGHT_WALL_DISPLAY ("RIGHT-WALL") /* the string to use to show the wall */
|
97
|
+
#define RIGHT_WALL_SUPPRESS ("RW") /* If this connector is used on the wall, */
|
98
|
+
|
99
|
+
/* The following define the names of the special strings in the dictionary. */
|
100
|
+
#define LEFT_WALL_WORD ("LEFT-WALL")
|
101
|
+
#define RIGHT_WALL_WORD ("RIGHT-WALL")
|
102
|
+
#define POSTPROCESS_WORD ("POSTPROCESS")
|
103
|
+
#define ANDABLE_CONNECTORS_WORD ("ANDABLE-CONNECTORS")
|
104
|
+
#define UNLIMITED_CONNECTORS_WORD ("UNLIMITED-CONNECTORS")
|
105
|
+
|
106
|
+
#if DONT_USE_REGEX_GUESSING
|
107
|
+
/* English-language-specific morphology guessing */
|
108
|
+
/* Obsolete, replaced by regex-based morphology handler */
|
109
|
+
#define PROPER_WORD ("CAPITALIZED-WORDS")
|
110
|
+
#define PL_PROPER_WORD ("PL-CAPITALIZED-WORDS")
|
111
|
+
#define HYPHENATED_WORD ("HYPHENATED-WORDS")
|
112
|
+
#define NUMBER_WORD ("NUMBERS")
|
113
|
+
#define ING_WORD ("ING-WORDS")
|
114
|
+
#define S_WORD ("S-WORDS")
|
115
|
+
#define ED_WORD ("ED-WORDS")
|
116
|
+
#define LY_WORD ("LY-WORDS")
|
117
|
+
#endif /* DONT_USE_REGEX_GUESSING */
|
118
|
+
|
119
|
+
#define UNKNOWN_WORD ("UNKNOWN-WORD")
|
120
|
+
|
121
|
+
#define MAX_PATH_NAME 200 /* file names (including paths)
|
122
|
+
should not be longer than this */
|
123
|
+
|
124
|
+
/* Some size definitions. Reduce these for small machines */
|
125
|
+
#define MAX_WORD 60 /* maximum number of chars in a word */
|
126
|
+
#define MAX_LINE 1500 /* maximum number of chars in a sentence */
|
127
|
+
#define MAX_DISJUNCT_COST 10000
|
128
|
+
|
129
|
+
/* conditional compiling flags */
|
130
|
+
#define PLURALIZATION
|
131
|
+
/* If defined, Turns on the pluralization operation in */
|
132
|
+
/* "and", "or" and "nor" */
|
133
|
+
#define INFIX_NOTATION
|
134
|
+
/* If defined, then we're using infix notation for the dictionary */
|
135
|
+
/* otherwise we're using prefix notation */
|
136
|
+
|
137
|
+
#define DOWN_priority 2
|
138
|
+
#define UP_priority 1
|
139
|
+
#define THIN_priority 0
|
140
|
+
|
141
|
+
#define NORMAL_LABEL (-1) /* used for normal connectors */
|
142
|
+
/* the labels >= 0 are used by fat links */
|
143
|
+
|
144
|
+
#define UNLIMITED_LEN 255
|
145
|
+
#define SHORT_LEN 6
|
146
|
+
#define NO_WORD 255
|
147
|
+
|
148
|
+
#ifndef _MSC_VER
|
149
|
+
typedef long long s64; /* signed 64-bit integer, even on 32-bit cpus */
|
150
|
+
#define PARSE_NUM_OVERFLOW (1LL<<24)
|
151
|
+
#else
|
152
|
+
/* Microsoft Visual C Version 6 doesn't support long long. */
|
153
|
+
typedef signed __int64 s64; /* signed 64-bit integer, even on 32-bit cpus */
|
154
|
+
#define PARSE_NUM_OVERFLOW (((s64)1)<<24)
|
155
|
+
#endif
|
156
|
+
|
157
|
+
struct Connector_struct
|
158
|
+
{
|
159
|
+
short label;
|
160
|
+
short hash;
|
161
|
+
unsigned char word;
|
162
|
+
/* The nearest word to my left (or right) that
|
163
|
+
this could connect to. Computed by power pruning */
|
164
|
+
unsigned char length_limit;
|
165
|
+
/* If this is a length limited connector, this
|
166
|
+
gives the limit of the length of the link
|
167
|
+
that can be used on this connector. Since
|
168
|
+
this is strictly a funcion of the connector
|
169
|
+
name, efficiency is the only reason to store
|
170
|
+
this. If no limit, the value is set to 255. */
|
171
|
+
char priority;/* one of the three priorities above */
|
172
|
+
char multi; /* TRUE if this is a multi-connector */
|
173
|
+
Connector * next;
|
174
|
+
const char * string;
|
175
|
+
|
176
|
+
/* Hash table next pointer, used only during pruning. */
|
177
|
+
Connector * tableNext;
|
178
|
+
const char * prune_string;
|
179
|
+
};
|
180
|
+
|
181
|
+
static inline void connector_set_string(Connector *c, const char *s)
|
182
|
+
{
|
183
|
+
c->string = s;
|
184
|
+
c->hash = -1;
|
185
|
+
}
|
186
|
+
static inline const char * connector_get_string(Connector *c)
|
187
|
+
{
|
188
|
+
return c->string;
|
189
|
+
}
|
190
|
+
|
191
|
+
struct Disjunct_struct
|
192
|
+
{
|
193
|
+
Disjunct *next;
|
194
|
+
const char * string;
|
195
|
+
Connector *left, *right;
|
196
|
+
float cost;
|
197
|
+
char marked;
|
198
|
+
};
|
199
|
+
|
200
|
+
typedef struct Match_node_struct Match_node;
|
201
|
+
struct Match_node_struct
|
202
|
+
{
|
203
|
+
Match_node * next;
|
204
|
+
Disjunct * d;
|
205
|
+
};
|
206
|
+
|
207
|
+
typedef struct X_node_struct X_node;
|
208
|
+
struct X_node_struct
|
209
|
+
{
|
210
|
+
const char * string; /* the word itself */
|
211
|
+
Exp * exp;
|
212
|
+
X_node *next;
|
213
|
+
};
|
214
|
+
|
215
|
+
struct Word_struct
|
216
|
+
{
|
217
|
+
char string[MAX_WORD+1];
|
218
|
+
X_node * x; /* sentence starts out with these */
|
219
|
+
Disjunct * d; /* eventually these get generated */
|
220
|
+
int firstupper;
|
221
|
+
};
|
222
|
+
|
223
|
+
/**
|
224
|
+
* Types of Exp_struct structures
|
225
|
+
*/
|
226
|
+
#define OR_type 0
|
227
|
+
#define AND_type 1
|
228
|
+
#define CONNECTOR_type 2
|
229
|
+
|
230
|
+
/**
|
231
|
+
* The E_list and Exp structures defined below comprise the expression
|
232
|
+
* trees that are stored in the dictionary. The expression has a type
|
233
|
+
* (AND, OR or TERMINAL). If it is not a terminal it has a list
|
234
|
+
* (an E_list) of children.
|
235
|
+
*/
|
236
|
+
struct Exp_struct
|
237
|
+
{
|
238
|
+
Exp * next; /* Used only for mem management,for freeing */
|
239
|
+
char type; /* One of three types, see above */
|
240
|
+
char dir; /* '-' means to the left, '+' means to right (for connector) */
|
241
|
+
char multi; /* TRUE if a multi-connector (for connector) */
|
242
|
+
union {
|
243
|
+
E_list * l; /* only needed for non-terminals */
|
244
|
+
const char * string; /* only needed if it's a connector */
|
245
|
+
} u;
|
246
|
+
float cost; /* The cost of using this expression.
|
247
|
+
Only used for non-terminals */
|
248
|
+
};
|
249
|
+
|
250
|
+
struct E_list_struct
|
251
|
+
{
|
252
|
+
E_list * next;
|
253
|
+
Exp * e;
|
254
|
+
};
|
255
|
+
|
256
|
+
/* The structure below stores a list of dictionary word files. */
|
257
|
+
struct Word_file_struct
|
258
|
+
{
|
259
|
+
char file[MAX_PATH_NAME+1]; /* the file name */
|
260
|
+
int changed; /* TRUE if this file has been changed */
|
261
|
+
Word_file * next;
|
262
|
+
};
|
263
|
+
|
264
|
+
/**
|
265
|
+
* The dictionary is stored as a binary tree comprised of the following
|
266
|
+
* nodes. A list of these (via right pointers) is used to return
|
267
|
+
* the result of a dictionary lookup.
|
268
|
+
*/
|
269
|
+
struct Dict_node_struct
|
270
|
+
{
|
271
|
+
const char * string; /* the word itself */
|
272
|
+
Word_file * file; /* the file the word came from (NULL if dict file) */
|
273
|
+
Exp * exp;
|
274
|
+
Dict_node *left, *right;
|
275
|
+
};
|
276
|
+
|
277
|
+
/* The regexs are stored as a linked list of the following nodes. */
|
278
|
+
struct Regex_node_s
|
279
|
+
{
|
280
|
+
char *name; /* The identifying name of the regex */
|
281
|
+
char *pattern; /* The regular expression pattern */
|
282
|
+
void *re; /* The compiled regex. void * to avoid
|
283
|
+
* having re library details invading the
|
284
|
+
* rest of the LG system; regex-morph.c
|
285
|
+
* takes care of all matching.
|
286
|
+
*/
|
287
|
+
Regex_node *next;
|
288
|
+
};
|
289
|
+
|
290
|
+
|
291
|
+
/* The following three structs comprise what is returned by post_process(). */
|
292
|
+
typedef struct D_type_list_struct D_type_list;
|
293
|
+
struct D_type_list_struct
|
294
|
+
{
|
295
|
+
D_type_list * next;
|
296
|
+
int type;
|
297
|
+
};
|
298
|
+
|
299
|
+
typedef struct PP_node_struct PP_node;
|
300
|
+
struct PP_node_struct
|
301
|
+
{
|
302
|
+
D_type_list *d_type_array[MAX_LINKS];
|
303
|
+
const char *violation;
|
304
|
+
};
|
305
|
+
|
306
|
+
/* Davy added these */
|
307
|
+
|
308
|
+
typedef struct Andlist_struct Andlist;
|
309
|
+
struct Andlist_struct
|
310
|
+
{
|
311
|
+
Andlist * next;
|
312
|
+
int conjunction;
|
313
|
+
int num_elements;
|
314
|
+
int element[MAX_SENTENCE];
|
315
|
+
int num_outside_words;
|
316
|
+
int outside_word[MAX_SENTENCE];
|
317
|
+
int cost;
|
318
|
+
};
|
319
|
+
|
320
|
+
/**
|
321
|
+
* This is for building the graphs of links in post-processing and
|
322
|
+
* fat link extraction.
|
323
|
+
*/
|
324
|
+
struct Linkage_info_struct
|
325
|
+
{
|
326
|
+
int index;
|
327
|
+
Boolean fat;
|
328
|
+
Boolean canonical;
|
329
|
+
Boolean improper_fat_linkage;
|
330
|
+
Boolean inconsistent_domains;
|
331
|
+
short N_violations;
|
332
|
+
short null_cost, unused_word_cost, and_cost, link_cost;
|
333
|
+
float disjunct_cost;
|
334
|
+
double corpus_cost;
|
335
|
+
Andlist * andlist;
|
336
|
+
int island[MAX_SENTENCE];
|
337
|
+
size_t nwords;
|
338
|
+
char **disjunct_list_str;
|
339
|
+
#ifdef USE_CORPUS
|
340
|
+
Sense **sense_list;
|
341
|
+
#endif
|
342
|
+
};
|
343
|
+
|
344
|
+
struct List_o_links_struct
|
345
|
+
{
|
346
|
+
int link; /* the link number */
|
347
|
+
int word; /* the word at the other end of this link */
|
348
|
+
int dir; /* 0: undirected, 1: away from me, -1: toward me */
|
349
|
+
List_o_links * next;
|
350
|
+
};
|
351
|
+
|
352
|
+
/* These parameters tell power_pruning, to tell whether this is before
|
353
|
+
* or after generating and disjuncts. GENTLE is before RUTHLESS is
|
354
|
+
* after.
|
355
|
+
*/
|
356
|
+
#define GENTLE 1
|
357
|
+
#define RUTHLESS 0
|
358
|
+
|
359
|
+
typedef struct string_node_struct String_node;
|
360
|
+
struct string_node_struct
|
361
|
+
{
|
362
|
+
char * string;
|
363
|
+
int size;
|
364
|
+
String_node * next;
|
365
|
+
};
|
366
|
+
|
367
|
+
typedef struct Parse_choice_struct Parse_choice;
|
368
|
+
|
369
|
+
struct Link_s
|
370
|
+
{
|
371
|
+
int l, r;
|
372
|
+
Connector * lc, * rc;
|
373
|
+
const char * name; /* spelling of full link name */
|
374
|
+
};
|
375
|
+
|
376
|
+
struct Parse_choice_struct
|
377
|
+
{
|
378
|
+
Parse_choice * next;
|
379
|
+
Parse_set * set[2];
|
380
|
+
Link link[2]; /* the lc fields of these is NULL if there is no link used */
|
381
|
+
Disjunct *ld, *md, *rd; /* the chosen disjuncts for the relevant three words */
|
382
|
+
};
|
383
|
+
|
384
|
+
struct Parse_set_struct
|
385
|
+
{
|
386
|
+
s64 count; /* the number of ways */
|
387
|
+
Parse_choice * first;
|
388
|
+
Parse_choice * current; /* used to enumerate linkages */
|
389
|
+
};
|
390
|
+
|
391
|
+
struct X_table_connector_struct
|
392
|
+
{
|
393
|
+
short lw, rw;
|
394
|
+
short cost;
|
395
|
+
Parse_set *set;
|
396
|
+
Connector *le, *re;
|
397
|
+
X_table_connector *next;
|
398
|
+
};
|
399
|
+
|
400
|
+
/* from string-set.c */
|
401
|
+
struct String_set_s
|
402
|
+
{
|
403
|
+
int size; /* the current size of the table */
|
404
|
+
int count; /* number of things currently in the table */
|
405
|
+
char ** table; /* the table itself */
|
406
|
+
};
|
407
|
+
|
408
|
+
|
409
|
+
/* from pp_linkset.c */
|
410
|
+
typedef struct pp_linkset_node_s
|
411
|
+
{
|
412
|
+
const char *str;
|
413
|
+
struct pp_linkset_node_s *next;
|
414
|
+
} pp_linkset_node;
|
415
|
+
|
416
|
+
typedef struct pp_linkset_s
|
417
|
+
{
|
418
|
+
int hash_table_size;
|
419
|
+
int population;
|
420
|
+
pp_linkset_node **hash_table; /* data actually lives here */
|
421
|
+
} pp_linkset;
|
422
|
+
|
423
|
+
|
424
|
+
/* from pp_lexer.c */
|
425
|
+
#define PP_LEXER_MAX_LABELS 512
|
426
|
+
|
427
|
+
typedef struct pp_label_node_s
|
428
|
+
{
|
429
|
+
/* linked list of strings associated with a label in the table */
|
430
|
+
const char *str;
|
431
|
+
struct pp_label_node_s *next;
|
432
|
+
} pp_label_node; /* next=NULL: end of list */
|
433
|
+
|
434
|
+
|
435
|
+
typedef struct PPLexTable_s
|
436
|
+
{
|
437
|
+
String_set *string_set;
|
438
|
+
const char *labels[PP_LEXER_MAX_LABELS]; /* array of labels */
|
439
|
+
pp_label_node *nodes_of_label[PP_LEXER_MAX_LABELS]; /*str. for each label*/
|
440
|
+
pp_label_node *last_node_of_label[PP_LEXER_MAX_LABELS]; /* efficiency */
|
441
|
+
pp_label_node *current_node_of_active_label;/* state: curr node of label */
|
442
|
+
int idx_of_active_label; /* read state: current label */
|
443
|
+
} PPLexTable;
|
444
|
+
|
445
|
+
/* from pp_knowledge.c */
|
446
|
+
typedef struct StartingLinkAndDomain_s
|
447
|
+
{
|
448
|
+
const char *starting_link;
|
449
|
+
int domain; /* domain which the link belongs to (-1: terminator)*/
|
450
|
+
} StartingLinkAndDomain;
|
451
|
+
|
452
|
+
typedef struct pp_rule_s
|
453
|
+
{
|
454
|
+
/* Holds a single post-processing rule. Since rules come in many
|
455
|
+
flavors, not all fields of the following are always relevant */
|
456
|
+
const char *selector; /* name of link to which rule applies */
|
457
|
+
int domain; /* type of domain to which rule applies */
|
458
|
+
pp_linkset *link_set; /* handle to set of links relevant to rule */
|
459
|
+
int link_set_size; /* size of this set */
|
460
|
+
const char **link_array; /* array holding the spelled-out names */
|
461
|
+
const char *msg; /* explanation (NULL=end sentinel in array)*/
|
462
|
+
} pp_rule;
|
463
|
+
|
464
|
+
struct pp_knowledge_s
|
465
|
+
{
|
466
|
+
PPLexTable *lt; /* Internal rep'n of sets of strings from knowledge file */
|
467
|
+
const char *path; /* Name of file we loaded from */
|
468
|
+
|
469
|
+
/* handles to sets of links specified in knowledge file. These constitute
|
470
|
+
auxiliary data, necessary to implement the rules, below. See comments
|
471
|
+
in post-process.c for a description of these. */
|
472
|
+
pp_linkset *domain_starter_links;
|
473
|
+
pp_linkset *urfl_domain_starter_links;
|
474
|
+
pp_linkset *urfl_only_domain_starter_links;
|
475
|
+
pp_linkset *domain_contains_links;
|
476
|
+
pp_linkset *must_form_a_cycle_links;
|
477
|
+
pp_linkset *restricted_links;
|
478
|
+
pp_linkset *ignore_these_links;
|
479
|
+
pp_linkset *left_domain_starter_links;
|
480
|
+
|
481
|
+
/* arrays of rules specified in knowledge file */
|
482
|
+
pp_rule *connected_rules, *form_a_cycle_rules;
|
483
|
+
pp_rule *contains_one_rules, *contains_none_rules;
|
484
|
+
pp_rule *bounded_rules;
|
485
|
+
|
486
|
+
int n_connected_rules, n_form_a_cycle_rules;
|
487
|
+
int n_contains_one_rules, n_contains_none_rules;
|
488
|
+
int n_bounded_rules;
|
489
|
+
|
490
|
+
pp_linkset *set_of_links_starting_bounded_domain;
|
491
|
+
StartingLinkAndDomain *starting_link_lookup_table;
|
492
|
+
int nStartingLinks;
|
493
|
+
String_set *string_set;
|
494
|
+
};
|
495
|
+
|
496
|
+
|
497
|
+
#endif
|
498
|
+
|