grammar_police 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/c/.DS_Store +0 -0
- data/c/link-grammar.c +65 -0
- data/c/link-grammar.h +60 -0
- data/c/link-grammar.o +0 -0
- data/c/link-grammar.so +0 -0
- data/c/link-grammar/.DS_Store +0 -0
- data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/c/link-grammar/.deps/and.Plo +202 -0
- data/c/link-grammar/.deps/api.Plo +244 -0
- data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/c/link-grammar/.deps/command-line.Plo +201 -0
- data/c/link-grammar/.deps/constituents.Plo +201 -0
- data/c/link-grammar/.deps/count.Plo +202 -0
- data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/c/link-grammar/.deps/disjuncts.Plo +123 -0
- data/c/link-grammar/.deps/error.Plo +121 -0
- data/c/link-grammar/.deps/expand.Plo +133 -0
- data/c/link-grammar/.deps/extract-links.Plo +198 -0
- data/c/link-grammar/.deps/fast-match.Plo +200 -0
- data/c/link-grammar/.deps/idiom.Plo +200 -0
- data/c/link-grammar/.deps/jni-client.Plo +217 -0
- data/c/link-grammar/.deps/link-parser.Po +1 -0
- data/c/link-grammar/.deps/massage.Plo +202 -0
- data/c/link-grammar/.deps/post-process.Plo +202 -0
- data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/c/link-grammar/.deps/prefix.Plo +102 -0
- data/c/link-grammar/.deps/preparation.Plo +202 -0
- data/c/link-grammar/.deps/print-util.Plo +200 -0
- data/c/link-grammar/.deps/print.Plo +201 -0
- data/c/link-grammar/.deps/prune.Plo +202 -0
- data/c/link-grammar/.deps/read-dict.Plo +223 -0
- data/c/link-grammar/.deps/read-regex.Plo +123 -0
- data/c/link-grammar/.deps/regex-morph.Plo +131 -0
- data/c/link-grammar/.deps/resources.Plo +203 -0
- data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/c/link-grammar/.deps/string-set.Plo +198 -0
- data/c/link-grammar/.deps/tokenize.Plo +160 -0
- data/c/link-grammar/.deps/utilities.Plo +222 -0
- data/c/link-grammar/.deps/word-file.Plo +201 -0
- data/c/link-grammar/.deps/word-utils.Plo +212 -0
- data/c/link-grammar/.libs/analyze-linkage.o +0 -0
- data/c/link-grammar/.libs/and.o +0 -0
- data/c/link-grammar/.libs/api.o +0 -0
- data/c/link-grammar/.libs/build-disjuncts.o +0 -0
- data/c/link-grammar/.libs/command-line.o +0 -0
- data/c/link-grammar/.libs/constituents.o +0 -0
- data/c/link-grammar/.libs/count.o +0 -0
- data/c/link-grammar/.libs/disjunct-utils.o +0 -0
- data/c/link-grammar/.libs/disjuncts.o +0 -0
- data/c/link-grammar/.libs/error.o +0 -0
- data/c/link-grammar/.libs/expand.o +0 -0
- data/c/link-grammar/.libs/extract-links.o +0 -0
- data/c/link-grammar/.libs/fast-match.o +0 -0
- data/c/link-grammar/.libs/idiom.o +0 -0
- data/c/link-grammar/.libs/jni-client.o +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.a +0 -0
- data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/c/link-grammar/.libs/liblink-grammar.la +41 -0
- data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/c/link-grammar/.libs/massage.o +0 -0
- data/c/link-grammar/.libs/post-process.o +0 -0
- data/c/link-grammar/.libs/pp_knowledge.o +0 -0
- data/c/link-grammar/.libs/pp_lexer.o +0 -0
- data/c/link-grammar/.libs/pp_linkset.o +0 -0
- data/c/link-grammar/.libs/prefix.o +0 -0
- data/c/link-grammar/.libs/preparation.o +0 -0
- data/c/link-grammar/.libs/print-util.o +0 -0
- data/c/link-grammar/.libs/print.o +0 -0
- data/c/link-grammar/.libs/prune.o +0 -0
- data/c/link-grammar/.libs/read-dict.o +0 -0
- data/c/link-grammar/.libs/read-regex.o +0 -0
- data/c/link-grammar/.libs/regex-morph.o +0 -0
- data/c/link-grammar/.libs/resources.o +0 -0
- data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/c/link-grammar/.libs/string-set.o +0 -0
- data/c/link-grammar/.libs/tokenize.o +0 -0
- data/c/link-grammar/.libs/utilities.o +0 -0
- data/c/link-grammar/.libs/word-file.o +0 -0
- data/c/link-grammar/.libs/word-utils.o +0 -0
- data/c/link-grammar/Makefile +900 -0
- data/c/link-grammar/Makefile.am +202 -0
- data/c/link-grammar/Makefile.in +900 -0
- data/c/link-grammar/analyze-linkage.c +1317 -0
- data/c/link-grammar/analyze-linkage.h +24 -0
- data/c/link-grammar/and.c +1603 -0
- data/c/link-grammar/and.h +27 -0
- data/c/link-grammar/api-structures.h +362 -0
- data/c/link-grammar/api-types.h +72 -0
- data/c/link-grammar/api.c +1887 -0
- data/c/link-grammar/api.h +96 -0
- data/c/link-grammar/autoit/.DS_Store +0 -0
- data/c/link-grammar/autoit/README +10 -0
- data/c/link-grammar/autoit/_LGTest.au3 +22 -0
- data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/c/link-grammar/build-disjuncts.c +487 -0
- data/c/link-grammar/build-disjuncts.h +21 -0
- data/c/link-grammar/command-line.c +458 -0
- data/c/link-grammar/command-line.h +15 -0
- data/c/link-grammar/constituents.c +1836 -0
- data/c/link-grammar/constituents.h +26 -0
- data/c/link-grammar/corpus/.DS_Store +0 -0
- data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/c/link-grammar/corpus/Makefile +527 -0
- data/c/link-grammar/corpus/Makefile.am +46 -0
- data/c/link-grammar/corpus/Makefile.in +527 -0
- data/c/link-grammar/corpus/README +17 -0
- data/c/link-grammar/corpus/cluster.c +286 -0
- data/c/link-grammar/corpus/cluster.h +32 -0
- data/c/link-grammar/corpus/corpus.c +483 -0
- data/c/link-grammar/corpus/corpus.h +46 -0
- data/c/link-grammar/count.c +828 -0
- data/c/link-grammar/count.h +25 -0
- data/c/link-grammar/disjunct-utils.c +261 -0
- data/c/link-grammar/disjunct-utils.h +27 -0
- data/c/link-grammar/disjuncts.c +138 -0
- data/c/link-grammar/disjuncts.h +13 -0
- data/c/link-grammar/error.c +92 -0
- data/c/link-grammar/error.h +35 -0
- data/c/link-grammar/expand.c +67 -0
- data/c/link-grammar/expand.h +13 -0
- data/c/link-grammar/externs.h +22 -0
- data/c/link-grammar/extract-links.c +625 -0
- data/c/link-grammar/extract-links.h +16 -0
- data/c/link-grammar/fast-match.c +309 -0
- data/c/link-grammar/fast-match.h +17 -0
- data/c/link-grammar/idiom.c +373 -0
- data/c/link-grammar/idiom.h +15 -0
- data/c/link-grammar/jni-client.c +779 -0
- data/c/link-grammar/jni-client.h +236 -0
- data/c/link-grammar/liblink-grammar-java.la +42 -0
- data/c/link-grammar/liblink-grammar.la +41 -0
- data/c/link-grammar/link-features.h +37 -0
- data/c/link-grammar/link-features.h.in +37 -0
- data/c/link-grammar/link-grammar-java.def +31 -0
- data/c/link-grammar/link-grammar.def +194 -0
- data/c/link-grammar/link-includes.h +465 -0
- data/c/link-grammar/link-parser.c +849 -0
- data/c/link-grammar/massage.c +329 -0
- data/c/link-grammar/massage.h +13 -0
- data/c/link-grammar/post-process.c +1113 -0
- data/c/link-grammar/post-process.h +45 -0
- data/c/link-grammar/pp_knowledge.c +376 -0
- data/c/link-grammar/pp_knowledge.h +14 -0
- data/c/link-grammar/pp_lexer.c +1920 -0
- data/c/link-grammar/pp_lexer.h +19 -0
- data/c/link-grammar/pp_linkset.c +158 -0
- data/c/link-grammar/pp_linkset.h +20 -0
- data/c/link-grammar/prefix.c +482 -0
- data/c/link-grammar/prefix.h +139 -0
- data/c/link-grammar/preparation.c +412 -0
- data/c/link-grammar/preparation.h +20 -0
- data/c/link-grammar/print-util.c +87 -0
- data/c/link-grammar/print-util.h +32 -0
- data/c/link-grammar/print.c +1085 -0
- data/c/link-grammar/print.h +16 -0
- data/c/link-grammar/prune.c +1864 -0
- data/c/link-grammar/prune.h +17 -0
- data/c/link-grammar/read-dict.c +1785 -0
- data/c/link-grammar/read-dict.h +29 -0
- data/c/link-grammar/read-regex.c +161 -0
- data/c/link-grammar/read-regex.h +12 -0
- data/c/link-grammar/regex-morph.c +126 -0
- data/c/link-grammar/regex-morph.h +17 -0
- data/c/link-grammar/resources.c +180 -0
- data/c/link-grammar/resources.h +23 -0
- data/c/link-grammar/sat-solver/.DS_Store +0 -0
- data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/c/link-grammar/sat-solver/Makefile +527 -0
- data/c/link-grammar/sat-solver/Makefile.am +29 -0
- data/c/link-grammar/sat-solver/Makefile.in +527 -0
- data/c/link-grammar/sat-solver/clock.hpp +33 -0
- data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/c/link-grammar/sat-solver/guiding.hpp +244 -0
- data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/c/link-grammar/sat-solver/trie.hpp +118 -0
- data/c/link-grammar/sat-solver/util.cpp +23 -0
- data/c/link-grammar/sat-solver/util.hpp +14 -0
- data/c/link-grammar/sat-solver/variables.cpp +5 -0
- data/c/link-grammar/sat-solver/variables.hpp +829 -0
- data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/c/link-grammar/spellcheck-aspell.c +148 -0
- data/c/link-grammar/spellcheck-hun.c +136 -0
- data/c/link-grammar/spellcheck.h +34 -0
- data/c/link-grammar/string-set.c +169 -0
- data/c/link-grammar/string-set.h +16 -0
- data/c/link-grammar/structures.h +498 -0
- data/c/link-grammar/tokenize.c +1049 -0
- data/c/link-grammar/tokenize.h +15 -0
- data/c/link-grammar/utilities.c +847 -0
- data/c/link-grammar/utilities.h +281 -0
- data/c/link-grammar/word-file.c +124 -0
- data/c/link-grammar/word-file.h +15 -0
- data/c/link-grammar/word-utils.c +526 -0
- data/c/link-grammar/word-utils.h +152 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/grammar_police.gemspec +23 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_police.rb +11 -0
- data/lib/grammar_police/.DS_Store +0 -0
- data/lib/grammar_police/dictionary.rb +30 -0
- data/lib/grammar_police/linkage.rb +26 -0
- data/lib/grammar_police/parse_options.rb +32 -0
- data/lib/grammar_police/sentence.rb +44 -0
- data/lib/grammar_police/version.rb +3 -0
- data/tests/.DS_Store +0 -0
- data/tests/count_linkages.rb +29 -0
- data/tests/sentences.txt +86 -0
- metadata +408 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
int read_dictionary(Dictionary dict);
|
15
|
+
void dict_display_word_info(Dictionary dict, const char * s);
|
16
|
+
void dict_display_word_expr(Dictionary dict, const char * s);
|
17
|
+
void print_dictionary_data(Dictionary dict);
|
18
|
+
void print_dictionary_words(Dictionary dict);
|
19
|
+
void print_expression(Exp *);
|
20
|
+
int boolean_dictionary_lookup(Dictionary dict, const char *);
|
21
|
+
int delete_dictionary_words(Dictionary dict, const char *);
|
22
|
+
|
23
|
+
Dict_node * dictionary_lookup_list(Dictionary dict, const char *);
|
24
|
+
Dict_node * abridged_lookup_list(Dictionary dict, const char *);
|
25
|
+
void free_lookup_list(Dict_node *);
|
26
|
+
|
27
|
+
Dict_node * insert_dict(Dictionary dict, Dict_node * n, Dict_node * newnode);
|
28
|
+
void free_dictionary(Dictionary dict);
|
29
|
+
Exp * Exp_create(Dictionary dict);
|
@@ -0,0 +1,161 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2005 Sampo Pyysalo */
|
3
|
+
/* */
|
4
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
5
|
+
/* license set forth in the LICENSE file included with this software, */
|
6
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
7
|
+
/* This license allows free redistribution and use in source and binary */
|
8
|
+
/* forms, with or without modification, subject to certain conditions. */
|
9
|
+
/* */
|
10
|
+
/*************************************************************************/
|
11
|
+
|
12
|
+
#include <string.h>
|
13
|
+
#include "link-includes.h"
|
14
|
+
#include "api-structures.h"
|
15
|
+
#include "structures.h"
|
16
|
+
#include "read-regex.h"
|
17
|
+
|
18
|
+
/*
|
19
|
+
Function for reading regular expression name:pattern combinations
|
20
|
+
into the Dictionary from a given file.
|
21
|
+
|
22
|
+
The format of the regex file is as follows:
|
23
|
+
|
24
|
+
Lines starting with "%" are comments and are ignored.
|
25
|
+
All other nonempty lines must follow the following format:
|
26
|
+
|
27
|
+
REGEX_NAME: /pattern/
|
28
|
+
|
29
|
+
here REGEX_NAME is an identifying unique name for the regex.
|
30
|
+
This name is used to determine the disjuncts that will be assigned to
|
31
|
+
tokens matching the pattern, so in the dictionary file (e.g. 4.0.dict)
|
32
|
+
you must have something like
|
33
|
+
|
34
|
+
REGEX_NAME: (({@MX+} & (JG- or <noun-main-s>)) or YS+)) or AN+ or G+);
|
35
|
+
|
36
|
+
using the same name. The pattern itself must be surrounded by slashes.
|
37
|
+
Extra whitespace is ignored.
|
38
|
+
*/
|
39
|
+
|
40
|
+
#define MAX_REGEX_NAME_LENGTH 50
|
41
|
+
#define MAX_REGEX_LENGTH 255
|
42
|
+
|
43
|
+
int read_regex_file(Dictionary dict, const char *file_name)
|
44
|
+
{
|
45
|
+
Regex_node **tail = &dict->regex_root; /* Last Regex_node * in list */
|
46
|
+
Regex_node *new_re;
|
47
|
+
char name[MAX_REGEX_NAME_LENGTH];
|
48
|
+
char regex[MAX_REGEX_LENGTH];
|
49
|
+
int c,prev,i,line=1;
|
50
|
+
FILE *fp;
|
51
|
+
|
52
|
+
fp = dictopen(file_name, "r");
|
53
|
+
if (fp == NULL)
|
54
|
+
{
|
55
|
+
prt_error("Error: cannot open regex file %s\n", file_name);
|
56
|
+
return 1;
|
57
|
+
}
|
58
|
+
|
59
|
+
/* read in regexs. loop broken on EOF. */
|
60
|
+
while (1)
|
61
|
+
{
|
62
|
+
/* skip whitespace and comments. */
|
63
|
+
do
|
64
|
+
{
|
65
|
+
do
|
66
|
+
{
|
67
|
+
c = fgetc(fp);
|
68
|
+
if (c == '\n') { line++; }
|
69
|
+
}
|
70
|
+
while(isspace(c));
|
71
|
+
|
72
|
+
if (c == '%')
|
73
|
+
{
|
74
|
+
while ((c != EOF) && (c != '\n')) { c = fgetc(fp); }
|
75
|
+
line++;
|
76
|
+
}
|
77
|
+
}
|
78
|
+
while(isspace(c));
|
79
|
+
|
80
|
+
if (c == EOF) { break; } /* done. */
|
81
|
+
|
82
|
+
/* read in the name of the regex. */
|
83
|
+
i = 0;
|
84
|
+
do
|
85
|
+
{
|
86
|
+
if (i > MAX_REGEX_NAME_LENGTH-1)
|
87
|
+
{
|
88
|
+
prt_error("Error: Regex name too long on line %d\n", line);
|
89
|
+
goto failure;
|
90
|
+
}
|
91
|
+
name[i++] = c;
|
92
|
+
c = fgetc(fp);
|
93
|
+
}
|
94
|
+
while ((!isspace(c)) && (c != ':') && (c != EOF));
|
95
|
+
name[i] = '\0';
|
96
|
+
|
97
|
+
/* Skip possible whitespace after name, expect colon. */
|
98
|
+
while (isspace(c))
|
99
|
+
{
|
100
|
+
if (c == '\n') { line++; }
|
101
|
+
c = fgetc(fp);
|
102
|
+
}
|
103
|
+
if (c != ':')
|
104
|
+
{
|
105
|
+
prt_error("Error: Regex missing colon on line %d\n", line);
|
106
|
+
goto failure;
|
107
|
+
}
|
108
|
+
|
109
|
+
/* Skip whitespace after colon, expect slash. */
|
110
|
+
do
|
111
|
+
{
|
112
|
+
if (c == '\n') { line++; }
|
113
|
+
c = fgetc(fp);
|
114
|
+
}
|
115
|
+
while (isspace(c));
|
116
|
+
if (c != '/') {
|
117
|
+
prt_error("Error: Regex missing leading slash on line %d\n", line);
|
118
|
+
goto failure;
|
119
|
+
}
|
120
|
+
|
121
|
+
/* Read in the regex. */
|
122
|
+
prev = 0;
|
123
|
+
i = 0;
|
124
|
+
do
|
125
|
+
{
|
126
|
+
if (i > MAX_REGEX_LENGTH-1)
|
127
|
+
{
|
128
|
+
prt_error("Error: Regex too long on line %d\n", line);
|
129
|
+
goto failure;
|
130
|
+
}
|
131
|
+
prev = c;
|
132
|
+
c = fgetc(fp);
|
133
|
+
regex[i++] = c;
|
134
|
+
}
|
135
|
+
while ((c != '/' || prev == '\\') && (c != EOF));
|
136
|
+
regex[i-1] = '\0';
|
137
|
+
|
138
|
+
/* Expect termination by a slash. */
|
139
|
+
if (c != '/')
|
140
|
+
{
|
141
|
+
prt_error("Error: Regex missing trailing slash on line %d\n", line);
|
142
|
+
goto failure;
|
143
|
+
}
|
144
|
+
|
145
|
+
/* Create new Regex_node and add to dict list. */
|
146
|
+
new_re = (Regex_node *) malloc(sizeof(Regex_node));
|
147
|
+
new_re->name = strdup(name);
|
148
|
+
new_re->pattern = strdup(regex);
|
149
|
+
new_re->re = NULL;
|
150
|
+
new_re->next = NULL;
|
151
|
+
*tail = new_re;
|
152
|
+
tail = &new_re->next;
|
153
|
+
}
|
154
|
+
|
155
|
+
fclose(fp);
|
156
|
+
return 0;
|
157
|
+
failure:
|
158
|
+
fclose(fp);
|
159
|
+
return 1;
|
160
|
+
}
|
161
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2005 Sampo Pyysalo */
|
3
|
+
/* */
|
4
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
5
|
+
/* license set forth in the LICENSE file included with this software, */
|
6
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
7
|
+
/* This license allows free redistribution and use in source and binary */
|
8
|
+
/* forms, with or without modification, subject to certain conditions. */
|
9
|
+
/* */
|
10
|
+
/*************************************************************************/
|
11
|
+
|
12
|
+
int read_regex_file(Dictionary dict, const char *file_name);
|
@@ -0,0 +1,126 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2005 Sampo Pyysalo */
|
3
|
+
/* All rights reserved */
|
4
|
+
/* */
|
5
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
6
|
+
/* license set forth in the LICENSE file included with this software, */
|
7
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
8
|
+
/* This license allows free redistribution and use in source and binary */
|
9
|
+
/* forms, with or without modification, subject to certain conditions. */
|
10
|
+
/* */
|
11
|
+
/*************************************************************************/
|
12
|
+
|
13
|
+
/* On MS Windows, regex.h fails to pull in size_t, so work around this by
|
14
|
+
* including <stddef.h> before <regex.h> (<sys/types.h> is not enough) */
|
15
|
+
#include <stddef.h>
|
16
|
+
#include <regex.h>
|
17
|
+
#include "api-structures.h"
|
18
|
+
#include "link-includes.h"
|
19
|
+
#include "read-dict.h"
|
20
|
+
#include "regex-morph.h"
|
21
|
+
#include "structures.h"
|
22
|
+
|
23
|
+
/**
|
24
|
+
* Support for the regular-expression based token matching system
|
25
|
+
* using standard POSIX regex.
|
26
|
+
*/
|
27
|
+
|
28
|
+
/* Compiles all the regexs in the Dictionary. Returns 0 on success,
|
29
|
+
* else an error code.
|
30
|
+
*/
|
31
|
+
int compile_regexs(Dictionary dict)
|
32
|
+
{
|
33
|
+
regex_t *preg;
|
34
|
+
int rc;
|
35
|
+
|
36
|
+
Regex_node *re = dict->regex_root;
|
37
|
+
while (re != NULL)
|
38
|
+
{
|
39
|
+
/* If re->re non-null, assume compiled already. */
|
40
|
+
if(re->re == NULL)
|
41
|
+
{
|
42
|
+
/* Compile with default options (0) and default character
|
43
|
+
* tables (NULL). */
|
44
|
+
/* re->re = pcre_compile(re->pattern, 0, &error, &erroroffset, NULL); */
|
45
|
+
preg = (regex_t *) malloc (sizeof(regex_t));
|
46
|
+
re->re = preg;
|
47
|
+
rc = regcomp(preg, re->pattern, REG_EXTENDED);
|
48
|
+
if (rc)
|
49
|
+
{
|
50
|
+
/*
|
51
|
+
prt_error("Error: Failed to compile regex '%s' (%s) at %d: %s\n",
|
52
|
+
re->pattern, re->name, erroroffset, error);
|
53
|
+
*/
|
54
|
+
prt_error("Error: Failed to compile regex '%s' (%s)\n",
|
55
|
+
re->pattern, re->name);
|
56
|
+
return rc;
|
57
|
+
}
|
58
|
+
|
59
|
+
/* Check that the regex name is defined in the dictionary. */
|
60
|
+
if (!boolean_dictionary_lookup(dict, re->name))
|
61
|
+
{
|
62
|
+
/* TODO: better error handing. Maybe remove the regex? */
|
63
|
+
prt_error("Error: Regex name %s not found in dictionary!\n",
|
64
|
+
re->name);
|
65
|
+
}
|
66
|
+
}
|
67
|
+
re = re->next;
|
68
|
+
}
|
69
|
+
return 0;
|
70
|
+
}
|
71
|
+
|
72
|
+
/**
|
73
|
+
* Tries to match each regex in turn to word s.
|
74
|
+
* On match, returns the name of the first matching regex.
|
75
|
+
* If no match is found, returns NULL.
|
76
|
+
*/
|
77
|
+
const char *match_regex(Dictionary dict, const char *s)
|
78
|
+
{
|
79
|
+
int rc;
|
80
|
+
|
81
|
+
Regex_node *re = dict->regex_root;
|
82
|
+
while (re != NULL)
|
83
|
+
{
|
84
|
+
if (re->re == NULL)
|
85
|
+
{
|
86
|
+
/* Re not compiled; if this happens, it's likely an
|
87
|
+
* internal error, but nevermind for now. */
|
88
|
+
continue;
|
89
|
+
}
|
90
|
+
/* Try to match with no extra data (NULL), whole str (0 to strlen(s)),
|
91
|
+
* and default options (second 0). */
|
92
|
+
/* int rc = pcre_exec(re->re, NULL, s, strlen(s), 0,
|
93
|
+
* 0, ovector, PCRE_OVEC_SIZE); */
|
94
|
+
|
95
|
+
rc = regexec((regex_t*) re->re, s, 0, NULL, 0);
|
96
|
+
if (0 == rc)
|
97
|
+
{
|
98
|
+
return re->name; /* match found. just return--no multiple matches. */
|
99
|
+
}
|
100
|
+
else if (rc != REG_NOMATCH)
|
101
|
+
{
|
102
|
+
/* We have an error. TODO: more appropriate error handling.*/
|
103
|
+
fprintf(stderr,"Regex matching error %d occurred!\n", rc);
|
104
|
+
}
|
105
|
+
re = re->next;
|
106
|
+
}
|
107
|
+
return NULL; /* no matches. */
|
108
|
+
}
|
109
|
+
|
110
|
+
/**
|
111
|
+
* Delete associated storage
|
112
|
+
*/
|
113
|
+
void free_regexs(Dictionary dict)
|
114
|
+
{
|
115
|
+
Regex_node *re = dict->regex_root;
|
116
|
+
while (re != NULL)
|
117
|
+
{
|
118
|
+
Regex_node *next = re->next;
|
119
|
+
regfree((regex_t *)re->re);
|
120
|
+
free(re->re);
|
121
|
+
free(re->name);
|
122
|
+
free(re->pattern);
|
123
|
+
free(re);
|
124
|
+
re = next;
|
125
|
+
}
|
126
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2005 Sampo Pyysalo */
|
3
|
+
/* All rights reserved */
|
4
|
+
/* */
|
5
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
6
|
+
/* license set forth in the LICENSE file included with this software, */
|
7
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
8
|
+
/* This license allows free redistribution and use in source and binary */
|
9
|
+
/* forms, with or without modification, subject to certain conditions. */
|
10
|
+
/* */
|
11
|
+
/*************************************************************************/
|
12
|
+
|
13
|
+
#include "api-structures.h"
|
14
|
+
|
15
|
+
int compile_regexs(Dictionary);
|
16
|
+
const char *match_regex(Dictionary, const char *);
|
17
|
+
void free_regexs(Dictionary dict);
|
@@ -0,0 +1,180 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#include "api.h"
|
15
|
+
#include "api.c"
|
16
|
+
|
17
|
+
#include <time.h>
|
18
|
+
|
19
|
+
#if !defined(_WIN32)
|
20
|
+
#include <sys/time.h>
|
21
|
+
#include <sys/resource.h>
|
22
|
+
#endif
|
23
|
+
|
24
|
+
#if defined(__linux__)
|
25
|
+
/* based on reading the man page for getrusage on linux, I inferred that
|
26
|
+
I needed to include this. However it doesn't seem to be necessary */
|
27
|
+
#include <unistd.h>
|
28
|
+
#endif
|
29
|
+
|
30
|
+
#if defined(__hpux__)
|
31
|
+
#include <sys/syscall.h>
|
32
|
+
int syscall(int, int, struct rusage *rusage); /* can't find
|
33
|
+
the prototype for this */
|
34
|
+
#define getrusage(a, b) syscall(SYS_GETRUSAGE, (a), (b))
|
35
|
+
#endif /* __hpux__ */
|
36
|
+
|
37
|
+
#if defined(__sun__)
|
38
|
+
int getrusage(int who, struct rusage *rusage);
|
39
|
+
/* Declaration missing from sys/resource.h in sun operating systems (?) */
|
40
|
+
#endif /* __sun__ */
|
41
|
+
|
42
|
+
#define MAX_PARSE_TIME_UNLIMITED -1
|
43
|
+
#define MAX_MEMORY_UNLIMITED ((size_t) -1)
|
44
|
+
|
45
|
+
/** returns the current usage time clock in seconds */
|
46
|
+
static double current_usage_time(void)
|
47
|
+
{
|
48
|
+
#if !defined(_WIN32)
|
49
|
+
struct rusage u;
|
50
|
+
getrusage (RUSAGE_SELF, &u);
|
51
|
+
return (u.ru_utime.tv_sec + ((double) u.ru_utime.tv_usec) / 1000000.0);
|
52
|
+
#else
|
53
|
+
return ((double) clock())/CLOCKS_PER_SEC;
|
54
|
+
#endif
|
55
|
+
}
|
56
|
+
|
57
|
+
Resources resources_create(void)
|
58
|
+
{
|
59
|
+
Resources r;
|
60
|
+
|
61
|
+
r = (Resources) xalloc(sizeof(struct Resources_s));
|
62
|
+
r->max_parse_time = MAX_PARSE_TIME_UNLIMITED;
|
63
|
+
r->when_created = current_usage_time();
|
64
|
+
r->when_last_called = current_usage_time();
|
65
|
+
r->time_when_parse_started = current_usage_time();
|
66
|
+
r->space_when_parse_started = get_space_in_use();
|
67
|
+
r->max_memory = MAX_MEMORY_UNLIMITED;
|
68
|
+
r->cumulative_time = 0;
|
69
|
+
r->memory_exhausted = FALSE;
|
70
|
+
r->timer_expired = FALSE;
|
71
|
+
|
72
|
+
return r;
|
73
|
+
}
|
74
|
+
|
75
|
+
void resources_delete(Resources r)
|
76
|
+
{
|
77
|
+
xfree(r, sizeof(struct Resources_s));
|
78
|
+
}
|
79
|
+
|
80
|
+
void resources_reset(Resources r)
|
81
|
+
{
|
82
|
+
r->when_last_called = r->time_when_parse_started = current_usage_time();
|
83
|
+
r->space_when_parse_started = get_space_in_use();
|
84
|
+
r->timer_expired = FALSE;
|
85
|
+
r->memory_exhausted = FALSE;
|
86
|
+
}
|
87
|
+
|
88
|
+
#if 0
|
89
|
+
static void resources_reset_time(Resources r)
|
90
|
+
{
|
91
|
+
r->when_last_called = r->time_when_parse_started = current_usage_time();
|
92
|
+
}
|
93
|
+
#endif
|
94
|
+
|
95
|
+
void resources_reset_space(Resources r)
|
96
|
+
{
|
97
|
+
r->space_when_parse_started = get_space_in_use();
|
98
|
+
}
|
99
|
+
|
100
|
+
int resources_exhausted(Resources r)
|
101
|
+
{
|
102
|
+
if (resources_timer_expired(r)) {
|
103
|
+
r->timer_expired = TRUE;
|
104
|
+
}
|
105
|
+
if (resources_memory_exhausted(r)) {
|
106
|
+
r->memory_exhausted = TRUE;
|
107
|
+
}
|
108
|
+
return (r->timer_expired || r->memory_exhausted);
|
109
|
+
}
|
110
|
+
|
111
|
+
int resources_timer_expired(Resources r)
|
112
|
+
{
|
113
|
+
if (r->max_parse_time == MAX_PARSE_TIME_UNLIMITED) return 0;
|
114
|
+
else return (r->timer_expired ||
|
115
|
+
(current_usage_time() - r->time_when_parse_started > r->max_parse_time));
|
116
|
+
}
|
117
|
+
|
118
|
+
int resources_memory_exhausted(Resources r)
|
119
|
+
{
|
120
|
+
if (r->max_memory == MAX_MEMORY_UNLIMITED) return 0;
|
121
|
+
else return (r->memory_exhausted || (get_space_in_use() > r->max_memory));
|
122
|
+
}
|
123
|
+
|
124
|
+
/** print out the cpu ticks since this was last called */
|
125
|
+
static void resources_print_time(int verbosity, Resources r, const char * s)
|
126
|
+
{
|
127
|
+
double new_t;
|
128
|
+
new_t = current_usage_time();
|
129
|
+
if (verbosity > 1) {
|
130
|
+
printf("++++");
|
131
|
+
left_print_string(stdout, s,
|
132
|
+
" ");
|
133
|
+
printf("%7.2f seconds\n", new_t - r->when_last_called);
|
134
|
+
}
|
135
|
+
r->when_last_called = new_t;
|
136
|
+
}
|
137
|
+
|
138
|
+
/** print out the cpu ticks since this was last called */
|
139
|
+
static void resources_print_total_time(int verbosity, Resources r)
|
140
|
+
{
|
141
|
+
double new_t;
|
142
|
+
new_t = current_usage_time();
|
143
|
+
r->cumulative_time += (new_t - r->time_when_parse_started) ;
|
144
|
+
if (verbosity > 0) {
|
145
|
+
printf("++++");
|
146
|
+
left_print_string(stdout, "Time",
|
147
|
+
" ");
|
148
|
+
printf("%7.2f seconds (%.2f total)\n",
|
149
|
+
new_t - r->time_when_parse_started, r->cumulative_time);
|
150
|
+
}
|
151
|
+
r->time_when_parse_started = new_t;
|
152
|
+
}
|
153
|
+
|
154
|
+
static void resources_print_total_space(int verbosity, Resources r)
|
155
|
+
{
|
156
|
+
if (verbosity > 1) {
|
157
|
+
printf("++++");
|
158
|
+
left_print_string(stdout, "Total space",
|
159
|
+
" ");
|
160
|
+
printf("%lu bytes (%lu max)\n",
|
161
|
+
(long unsigned int) get_space_in_use(),
|
162
|
+
(long unsigned int) get_max_space_used());
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
166
|
+
void print_time(Parse_Options opts, const char * s)
|
167
|
+
{
|
168
|
+
resources_print_time(opts->verbosity, opts->resources, s);
|
169
|
+
}
|
170
|
+
|
171
|
+
void parse_options_print_total_time(Parse_Options opts)
|
172
|
+
{
|
173
|
+
resources_print_total_time(opts->verbosity, opts->resources);
|
174
|
+
}
|
175
|
+
|
176
|
+
void print_total_space(Parse_Options opts)
|
177
|
+
{
|
178
|
+
resources_print_total_space(opts->verbosity, opts->resources);
|
179
|
+
}
|
180
|
+
|