RubyGems - grammar_police - Versions diffs - 0.1.0 - Mend

grammar_police 0.1.0

Files changed (345) hide show

data/.DS_Store +0 -0
data/.gitignore +4 -0
data/Gemfile +4 -0
data/Rakefile +2 -0
data/c/.DS_Store +0 -0
data/c/link-grammar.c +65 -0
data/c/link-grammar.h +60 -0
data/c/link-grammar.o +0 -0
data/c/link-grammar.so +0 -0
data/c/link-grammar/.DS_Store +0 -0
data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
data/c/link-grammar/.deps/and.Plo +202 -0
data/c/link-grammar/.deps/api.Plo +244 -0
data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
data/c/link-grammar/.deps/command-line.Plo +201 -0
data/c/link-grammar/.deps/constituents.Plo +201 -0
data/c/link-grammar/.deps/count.Plo +202 -0
data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
data/c/link-grammar/.deps/disjuncts.Plo +123 -0
data/c/link-grammar/.deps/error.Plo +121 -0
data/c/link-grammar/.deps/expand.Plo +133 -0
data/c/link-grammar/.deps/extract-links.Plo +198 -0
data/c/link-grammar/.deps/fast-match.Plo +200 -0
data/c/link-grammar/.deps/idiom.Plo +200 -0
data/c/link-grammar/.deps/jni-client.Plo +217 -0
data/c/link-grammar/.deps/link-parser.Po +1 -0
data/c/link-grammar/.deps/massage.Plo +202 -0
data/c/link-grammar/.deps/post-process.Plo +202 -0
data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
data/c/link-grammar/.deps/prefix.Plo +102 -0
data/c/link-grammar/.deps/preparation.Plo +202 -0
data/c/link-grammar/.deps/print-util.Plo +200 -0
data/c/link-grammar/.deps/print.Plo +201 -0
data/c/link-grammar/.deps/prune.Plo +202 -0
data/c/link-grammar/.deps/read-dict.Plo +223 -0
data/c/link-grammar/.deps/read-regex.Plo +123 -0
data/c/link-grammar/.deps/regex-morph.Plo +131 -0
data/c/link-grammar/.deps/resources.Plo +203 -0
data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
data/c/link-grammar/.deps/string-set.Plo +198 -0
data/c/link-grammar/.deps/tokenize.Plo +160 -0
data/c/link-grammar/.deps/utilities.Plo +222 -0
data/c/link-grammar/.deps/word-file.Plo +201 -0
data/c/link-grammar/.deps/word-utils.Plo +212 -0
data/c/link-grammar/.libs/analyze-linkage.o +0 -0
data/c/link-grammar/.libs/and.o +0 -0
data/c/link-grammar/.libs/api.o +0 -0
data/c/link-grammar/.libs/build-disjuncts.o +0 -0
data/c/link-grammar/.libs/command-line.o +0 -0
data/c/link-grammar/.libs/constituents.o +0 -0
data/c/link-grammar/.libs/count.o +0 -0
data/c/link-grammar/.libs/disjunct-utils.o +0 -0
data/c/link-grammar/.libs/disjuncts.o +0 -0
data/c/link-grammar/.libs/error.o +0 -0
data/c/link-grammar/.libs/expand.o +0 -0
data/c/link-grammar/.libs/extract-links.o +0 -0
data/c/link-grammar/.libs/fast-match.o +0 -0
data/c/link-grammar/.libs/idiom.o +0 -0
data/c/link-grammar/.libs/jni-client.o +0 -0
data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
data/c/link-grammar/.libs/liblink-grammar.a +0 -0
data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
data/c/link-grammar/.libs/liblink-grammar.la +41 -0
data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
data/c/link-grammar/.libs/massage.o +0 -0
data/c/link-grammar/.libs/post-process.o +0 -0
data/c/link-grammar/.libs/pp_knowledge.o +0 -0
data/c/link-grammar/.libs/pp_lexer.o +0 -0
data/c/link-grammar/.libs/pp_linkset.o +0 -0
data/c/link-grammar/.libs/prefix.o +0 -0
data/c/link-grammar/.libs/preparation.o +0 -0
data/c/link-grammar/.libs/print-util.o +0 -0
data/c/link-grammar/.libs/print.o +0 -0
data/c/link-grammar/.libs/prune.o +0 -0
data/c/link-grammar/.libs/read-dict.o +0 -0
data/c/link-grammar/.libs/read-regex.o +0 -0
data/c/link-grammar/.libs/regex-morph.o +0 -0
data/c/link-grammar/.libs/resources.o +0 -0
data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
data/c/link-grammar/.libs/string-set.o +0 -0
data/c/link-grammar/.libs/tokenize.o +0 -0
data/c/link-grammar/.libs/utilities.o +0 -0
data/c/link-grammar/.libs/word-file.o +0 -0
data/c/link-grammar/.libs/word-utils.o +0 -0
data/c/link-grammar/Makefile +900 -0
data/c/link-grammar/Makefile.am +202 -0
data/c/link-grammar/Makefile.in +900 -0
data/c/link-grammar/analyze-linkage.c +1317 -0
data/c/link-grammar/analyze-linkage.h +24 -0
data/c/link-grammar/and.c +1603 -0
data/c/link-grammar/and.h +27 -0
data/c/link-grammar/api-structures.h +362 -0
data/c/link-grammar/api-types.h +72 -0
data/c/link-grammar/api.c +1887 -0
data/c/link-grammar/api.h +96 -0
data/c/link-grammar/autoit/.DS_Store +0 -0
data/c/link-grammar/autoit/README +10 -0
data/c/link-grammar/autoit/_LGTest.au3 +22 -0
data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
data/c/link-grammar/build-disjuncts.c +487 -0
data/c/link-grammar/build-disjuncts.h +21 -0
data/c/link-grammar/command-line.c +458 -0
data/c/link-grammar/command-line.h +15 -0
data/c/link-grammar/constituents.c +1836 -0
data/c/link-grammar/constituents.h +26 -0
data/c/link-grammar/corpus/.DS_Store +0 -0
data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
data/c/link-grammar/corpus/Makefile +527 -0
data/c/link-grammar/corpus/Makefile.am +46 -0
data/c/link-grammar/corpus/Makefile.in +527 -0
data/c/link-grammar/corpus/README +17 -0
data/c/link-grammar/corpus/cluster.c +286 -0
data/c/link-grammar/corpus/cluster.h +32 -0
data/c/link-grammar/corpus/corpus.c +483 -0
data/c/link-grammar/corpus/corpus.h +46 -0
data/c/link-grammar/count.c +828 -0
data/c/link-grammar/count.h +25 -0
data/c/link-grammar/disjunct-utils.c +261 -0
data/c/link-grammar/disjunct-utils.h +27 -0
data/c/link-grammar/disjuncts.c +138 -0
data/c/link-grammar/disjuncts.h +13 -0
data/c/link-grammar/error.c +92 -0
data/c/link-grammar/error.h +35 -0
data/c/link-grammar/expand.c +67 -0
data/c/link-grammar/expand.h +13 -0
data/c/link-grammar/externs.h +22 -0
data/c/link-grammar/extract-links.c +625 -0
data/c/link-grammar/extract-links.h +16 -0
data/c/link-grammar/fast-match.c +309 -0
data/c/link-grammar/fast-match.h +17 -0
data/c/link-grammar/idiom.c +373 -0
data/c/link-grammar/idiom.h +15 -0
data/c/link-grammar/jni-client.c +779 -0
data/c/link-grammar/jni-client.h +236 -0
data/c/link-grammar/liblink-grammar-java.la +42 -0
data/c/link-grammar/liblink-grammar.la +41 -0
data/c/link-grammar/link-features.h +37 -0
data/c/link-grammar/link-features.h.in +37 -0
data/c/link-grammar/link-grammar-java.def +31 -0
data/c/link-grammar/link-grammar.def +194 -0
data/c/link-grammar/link-includes.h +465 -0
data/c/link-grammar/link-parser.c +849 -0
data/c/link-grammar/massage.c +329 -0
data/c/link-grammar/massage.h +13 -0
data/c/link-grammar/post-process.c +1113 -0
data/c/link-grammar/post-process.h +45 -0
data/c/link-grammar/pp_knowledge.c +376 -0
data/c/link-grammar/pp_knowledge.h +14 -0
data/c/link-grammar/pp_lexer.c +1920 -0
data/c/link-grammar/pp_lexer.h +19 -0
data/c/link-grammar/pp_linkset.c +158 -0
data/c/link-grammar/pp_linkset.h +20 -0
data/c/link-grammar/prefix.c +482 -0
data/c/link-grammar/prefix.h +139 -0
data/c/link-grammar/preparation.c +412 -0
data/c/link-grammar/preparation.h +20 -0
data/c/link-grammar/print-util.c +87 -0
data/c/link-grammar/print-util.h +32 -0
data/c/link-grammar/print.c +1085 -0
data/c/link-grammar/print.h +16 -0
data/c/link-grammar/prune.c +1864 -0
data/c/link-grammar/prune.h +17 -0
data/c/link-grammar/read-dict.c +1785 -0
data/c/link-grammar/read-dict.h +29 -0
data/c/link-grammar/read-regex.c +161 -0
data/c/link-grammar/read-regex.h +12 -0
data/c/link-grammar/regex-morph.c +126 -0
data/c/link-grammar/regex-morph.h +17 -0
data/c/link-grammar/resources.c +180 -0
data/c/link-grammar/resources.h +23 -0
data/c/link-grammar/sat-solver/.DS_Store +0 -0
data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
data/c/link-grammar/sat-solver/Makefile +527 -0
data/c/link-grammar/sat-solver/Makefile.am +29 -0
data/c/link-grammar/sat-solver/Makefile.in +527 -0
data/c/link-grammar/sat-solver/clock.hpp +33 -0
data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
data/c/link-grammar/sat-solver/guiding.hpp +244 -0
data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
data/c/link-grammar/sat-solver/trie.hpp +118 -0
data/c/link-grammar/sat-solver/util.cpp +23 -0
data/c/link-grammar/sat-solver/util.hpp +14 -0
data/c/link-grammar/sat-solver/variables.cpp +5 -0
data/c/link-grammar/sat-solver/variables.hpp +829 -0
data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
data/c/link-grammar/spellcheck-aspell.c +148 -0
data/c/link-grammar/spellcheck-hun.c +136 -0
data/c/link-grammar/spellcheck.h +34 -0
data/c/link-grammar/string-set.c +169 -0
data/c/link-grammar/string-set.h +16 -0
data/c/link-grammar/structures.h +498 -0
data/c/link-grammar/tokenize.c +1049 -0
data/c/link-grammar/tokenize.h +15 -0
data/c/link-grammar/utilities.c +847 -0
data/c/link-grammar/utilities.h +281 -0
data/c/link-grammar/word-file.c +124 -0
data/c/link-grammar/word-file.h +15 -0
data/c/link-grammar/word-utils.c +526 -0
data/c/link-grammar/word-utils.h +152 -0
data/data/.DS_Store +0 -0
data/data/Makefile +511 -0
data/data/Makefile.am +4 -0
data/data/Makefile.in +511 -0
data/data/de/.DS_Store +0 -0
data/data/de/4.0.affix +7 -0
data/data/de/4.0.dict +474 -0
data/data/de/Makefile +387 -0
data/data/de/Makefile.am +9 -0
data/data/de/Makefile.in +387 -0
data/data/en/.DS_Store +0 -0
data/data/en/4.0.affix +26 -0
data/data/en/4.0.batch +1002 -0
data/data/en/4.0.biolg.batch +411 -0
data/data/en/4.0.constituent-knowledge +127 -0
data/data/en/4.0.dict +8759 -0
data/data/en/4.0.dict.m4 +6928 -0
data/data/en/4.0.enwiki.batch +14 -0
data/data/en/4.0.fixes.batch +2776 -0
data/data/en/4.0.knowledge +306 -0
data/data/en/4.0.regex +225 -0
data/data/en/4.0.voa.batch +114 -0
data/data/en/Makefile +554 -0
data/data/en/Makefile.am +19 -0
data/data/en/Makefile.in +554 -0
data/data/en/README +173 -0
data/data/en/tiny.dict +157 -0
data/data/en/words/.DS_Store +0 -0
data/data/en/words/Makefile +456 -0
data/data/en/words/Makefile.am +78 -0
data/data/en/words/Makefile.in +456 -0
data/data/en/words/currency +205 -0
data/data/en/words/currency.p +28 -0
data/data/en/words/entities.given-bisex.sing +39 -0
data/data/en/words/entities.given-female.sing +4141 -0
data/data/en/words/entities.given-male.sing +1633 -0
data/data/en/words/entities.locations.sing +68 -0
data/data/en/words/entities.national.sing +253 -0
data/data/en/words/entities.organizations.sing +7 -0
data/data/en/words/entities.us-states.sing +11 -0
data/data/en/words/units.1 +45 -0
data/data/en/words/units.1.dot +4 -0
data/data/en/words/units.3 +2 -0
data/data/en/words/units.4 +5 -0
data/data/en/words/units.4.dot +1 -0
data/data/en/words/words-medical.adv.1 +1191 -0
data/data/en/words/words-medical.prep.1 +67 -0
data/data/en/words/words-medical.v.4.1 +2835 -0
data/data/en/words/words-medical.v.4.2 +2848 -0
data/data/en/words/words-medical.v.4.3 +3011 -0
data/data/en/words/words-medical.v.4.4 +3036 -0
data/data/en/words/words-medical.v.4.5 +3050 -0
data/data/en/words/words.adj.1 +6794 -0
data/data/en/words/words.adj.2 +638 -0
data/data/en/words/words.adj.3 +667 -0
data/data/en/words/words.adv.1 +1573 -0
data/data/en/words/words.adv.2 +67 -0
data/data/en/words/words.adv.3 +157 -0
data/data/en/words/words.adv.4 +80 -0
data/data/en/words/words.n.1 +11464 -0
data/data/en/words/words.n.1.wiki +264 -0
data/data/en/words/words.n.2.s +2017 -0
data/data/en/words/words.n.2.s.biolg +1 -0
data/data/en/words/words.n.2.s.wiki +298 -0
data/data/en/words/words.n.2.x +65 -0
data/data/en/words/words.n.2.x.wiki +10 -0
data/data/en/words/words.n.3 +5717 -0
data/data/en/words/words.n.t +23 -0
data/data/en/words/words.v.1.1 +1038 -0
data/data/en/words/words.v.1.2 +1043 -0
data/data/en/words/words.v.1.3 +1052 -0
data/data/en/words/words.v.1.4 +1023 -0
data/data/en/words/words.v.1.p +17 -0
data/data/en/words/words.v.10.1 +14 -0
data/data/en/words/words.v.10.2 +15 -0
data/data/en/words/words.v.10.3 +88 -0
data/data/en/words/words.v.10.4 +17 -0
data/data/en/words/words.v.2.1 +1253 -0
data/data/en/words/words.v.2.2 +1304 -0
data/data/en/words/words.v.2.3 +1280 -0
data/data/en/words/words.v.2.4 +1285 -0
data/data/en/words/words.v.2.5 +1287 -0
data/data/en/words/words.v.4.1 +2472 -0
data/data/en/words/words.v.4.2 +2487 -0
data/data/en/words/words.v.4.3 +2441 -0
data/data/en/words/words.v.4.4 +2478 -0
data/data/en/words/words.v.4.5 +2483 -0
data/data/en/words/words.v.5.1 +98 -0
data/data/en/words/words.v.5.2 +98 -0
data/data/en/words/words.v.5.3 +103 -0
data/data/en/words/words.v.5.4 +102 -0
data/data/en/words/words.v.6.1 +388 -0
data/data/en/words/words.v.6.2 +401 -0
data/data/en/words/words.v.6.3 +397 -0
data/data/en/words/words.v.6.4 +405 -0
data/data/en/words/words.v.6.5 +401 -0
data/data/en/words/words.v.8.1 +117 -0
data/data/en/words/words.v.8.2 +118 -0
data/data/en/words/words.v.8.3 +118 -0
data/data/en/words/words.v.8.4 +119 -0
data/data/en/words/words.v.8.5 +119 -0
data/data/en/words/words.y +104 -0
data/data/lt/.DS_Store +0 -0
data/data/lt/4.0.affix +6 -0
data/data/lt/4.0.constituent-knowledge +24 -0
data/data/lt/4.0.dict +135 -0
data/data/lt/4.0.knowledge +38 -0
data/data/lt/Makefile +389 -0
data/data/lt/Makefile.am +11 -0
data/data/lt/Makefile.in +389 -0
data/grammar_police.gemspec +23 -0
data/lib/.DS_Store +0 -0
data/lib/grammar_police.rb +11 -0
data/lib/grammar_police/.DS_Store +0 -0
data/lib/grammar_police/dictionary.rb +30 -0
data/lib/grammar_police/linkage.rb +26 -0
data/lib/grammar_police/parse_options.rb +32 -0
data/lib/grammar_police/sentence.rb +44 -0
data/lib/grammar_police/version.rb +3 -0
data/tests/.DS_Store +0 -0
data/tests/count_linkages.rb +29 -0
data/tests/sentences.txt +86 -0
metadata +408 -0

data/c/link-grammar/read-dict.h ADDED Viewed

@@ -0,0 +1,29 @@
+/*************************************************************************/
+/* Copyright (c) 2004                                                    */
+/* Daniel Sleator, David Temperley, and John Lafferty                    */
+/* All rights reserved                                                   */
+/*                                                                       */
+/* Use of the link grammar parsing system is subject to the terms of the */
+/* license set forth in the LICENSE file included with this software,    */
+/* and also available at http://www.link.cs.cmu.edu/link/license.html    */
+/* This license allows free redistribution and use in source and binary  */
+/* forms, with or without modification, subject to certain conditions.   */
+/*                                                                       */
+/*************************************************************************/
+int  read_dictionary(Dictionary dict);
+void dict_display_word_info(Dictionary dict, const char * s);
+void dict_display_word_expr(Dictionary dict, const char * s);
+void print_dictionary_data(Dictionary dict);
+void print_dictionary_words(Dictionary dict);
+void print_expression(Exp *);
+int  boolean_dictionary_lookup(Dictionary dict, const char *);
+int  delete_dictionary_words(Dictionary dict, const char *);
+Dict_node * dictionary_lookup_list(Dictionary dict, const char *);
+Dict_node * abridged_lookup_list(Dictionary dict, const char *);
+void free_lookup_list(Dict_node *);
+Dict_node * insert_dict(Dictionary dict, Dict_node * n, Dict_node * newnode);
+void        free_dictionary(Dictionary dict);
+Exp *       Exp_create(Dictionary dict);

data/c/link-grammar/read-regex.c ADDED Viewed

@@ -0,0 +1,161 @@
+/*************************************************************************/
+/* Copyright (c) 2005 Sampo Pyysalo                                      */
+/*                                                                       */
+/* Use of the link grammar parsing system is subject to the terms of the */
+/* license set forth in the LICENSE file included with this software,    */
+/* and also available at http://www.link.cs.cmu.edu/link/license.html    */
+/* This license allows free redistribution and use in source and binary  */
+/* forms, with or without modification, subject to certain conditions.   */
+/*                                                                       */
+/*************************************************************************/
+#include <string.h>
+#include "link-includes.h"
+#include "api-structures.h"
+#include "structures.h"
+#include "read-regex.h"
+/*
+  Function for reading regular expression name:pattern combinations
+  into the Dictionary from a given file.
+  The format of the regex file is as follows:
+  Lines starting with "%" are comments and are ignored.
+  All other nonempty lines must follow the following format:
+      REGEX_NAME:  /pattern/
+  here REGEX_NAME is an identifying unique name for the regex.
+  This name is used to determine the disjuncts that will be assigned to
+  tokens matching the pattern, so in the dictionary file (e.g. 4.0.dict)
+  you must have something like
+     REGEX_NAME:  (({@MX+} & (JG- or <noun-main-s>)) or YS+)) or AN+ or G+);
+  using the same name. The pattern itself must be surrounded by slashes.
+  Extra whitespace is ignored.
+*/
+#define MAX_REGEX_NAME_LENGTH 50
+#define MAX_REGEX_LENGTH      255
+int read_regex_file(Dictionary dict, const char *file_name)
+{
+	Regex_node **tail = &dict->regex_root; /* Last Regex_node * in list */
+	Regex_node *new_re;
+	char name[MAX_REGEX_NAME_LENGTH];
+	char regex[MAX_REGEX_LENGTH];
+	int c,prev,i,line=1;
+	FILE *fp;
+	fp = dictopen(file_name, "r");
+	if (fp == NULL)
+	{
+		prt_error("Error: cannot open regex file %s\n", file_name);
+		return 1;
+	}
+	/* read in regexs. loop broken on EOF. */
+	while (1)
+	{
+		/* skip whitespace and comments. */
+		do
+		{
+			do
+			{
+				c = fgetc(fp);
+				if (c == '\n') { line++; }
+			}
+			while(isspace(c));
+			if (c == '%')
+			{
+				while ((c != EOF) && (c != '\n')) { c = fgetc(fp); }
+				line++;
+			}
+		}
+		while(isspace(c));
+		if (c == EOF) { break; } /* done. */
+		/* read in the name of the regex. */
+		i = 0;
+		do
+		{
+			if (i > MAX_REGEX_NAME_LENGTH-1)
+			{
+				prt_error("Error: Regex name too long on line %d\n", line);
+				goto failure;
+			}
+			name[i++] = c;
+			c = fgetc(fp);
+		}
+		while ((!isspace(c)) && (c != ':') && (c != EOF));
+		name[i] = '\0';
+		/* Skip possible whitespace after name, expect colon. */
+		while (isspace(c))
+		{
+			if (c == '\n') { line++; }
+			c = fgetc(fp);
+		}
+		if (c != ':')
+		{
+			prt_error("Error: Regex missing colon on line %d\n", line);
+			goto failure;
+		}
+		/* Skip whitespace after colon, expect slash. */
+		do
+		{
+			if (c == '\n') { line++; }
+			c = fgetc(fp);
+		}
+		while (isspace(c));
+		if (c != '/') {
+			prt_error("Error: Regex missing leading slash on line %d\n", line);
+			goto failure;
+		}
+		/* Read in the regex. */
+		prev = 0;
+		i = 0;
+		do
+		{
+			if (i > MAX_REGEX_LENGTH-1)
+			{
+				prt_error("Error: Regex too long on line %d\n", line);
+				goto failure;
+			}
+			prev = c;
+			c = fgetc(fp);
+			regex[i++] = c;
+		}
+		while ((c != '/' || prev == '\\') && (c != EOF));
+		regex[i-1] = '\0';
+		/* Expect termination by a slash. */
+		if (c != '/')
+		{
+			prt_error("Error: Regex missing trailing slash on line %d\n", line);
+			goto failure;
+		}
+		/* Create new Regex_node and add to dict list. */
+		new_re = (Regex_node *) malloc(sizeof(Regex_node));
+		new_re->name    = strdup(name);
+		new_re->pattern = strdup(regex);
+		new_re->re      = NULL;
+		new_re->next    = NULL;
+		*tail = new_re;
+		tail	= &new_re->next;
+	}
+	fclose(fp);
+	return 0;
+failure:
+	fclose(fp);
+	return 1;
+}

data/c/link-grammar/read-regex.h ADDED Viewed

@@ -0,0 +1,12 @@
+/*************************************************************************/
+/* Copyright (c) 2005 Sampo Pyysalo                                      */
+/*                                                                       */
+/* Use of the link grammar parsing system is subject to the terms of the */
+/* license set forth in the LICENSE file included with this software,    */
+/* and also available at http://www.link.cs.cmu.edu/link/license.html    */
+/* This license allows free redistribution and use in source and binary  */
+/* forms, with or without modification, subject to certain conditions.   */
+/*                                                                       */
+/*************************************************************************/
+int read_regex_file(Dictionary dict, const char *file_name);

data/c/link-grammar/regex-morph.c ADDED Viewed

@@ -0,0 +1,126 @@
+/*************************************************************************/
+/* Copyright (c) 2005  Sampo Pyysalo                                     */
+/* All rights reserved                                                   */
+/*                                                                       */
+/* Use of the link grammar parsing system is subject to the terms of the */
+/* license set forth in the LICENSE file included with this software,    */
+/* and also available at http://www.link.cs.cmu.edu/link/license.html    */
+/* This license allows free redistribution and use in source and binary  */
+/* forms, with or without modification, subject to certain conditions.   */
+/*                                                                       */
+/*************************************************************************/
+/* On MS Windows, regex.h fails to pull in size_t, so work around this by
+ * including <stddef.h> before <regex.h> (<sys/types.h> is not enough) */
+#include <stddef.h>
+#include <regex.h>
+#include "api-structures.h"
+#include "link-includes.h"
+#include "read-dict.h"
+#include "regex-morph.h"
+#include "structures.h"
+/**
+ * Support for the regular-expression based token matching system
+ * using standard POSIX regex.
+ */
+/* Compiles all the regexs in the Dictionary. Returns 0 on success,
+ * else an error code.
+ */
+int compile_regexs(Dictionary dict)
+{
+	regex_t *preg;
+	int rc;
+	Regex_node *re = dict->regex_root;
+	while (re != NULL)
+	{
+		/* If re->re non-null, assume compiled already. */
+		if(re->re == NULL)
+		{
+			/* Compile with default options (0) and default character
+			 * tables (NULL). */
+			/* re->re = pcre_compile(re->pattern, 0, &error, &erroroffset, NULL); */
+			preg = (regex_t *) malloc (sizeof(regex_t));
+			re->re = preg;
+			rc = regcomp(preg, re->pattern, REG_EXTENDED);
+			if (rc)
+			{
+				/*
+				prt_error("Error: Failed to compile regex '%s' (%s) at %d: %s\n",
+								re->pattern, re->name, erroroffset, error);
+				*/
+				prt_error("Error: Failed to compile regex '%s' (%s)\n",
+								re->pattern, re->name);
+				return rc;
+			}
+			/* Check that the regex name is defined in the dictionary. */
+			if (!boolean_dictionary_lookup(dict, re->name))
+			{
+				/* TODO: better error handing. Maybe remove the regex? */
+				prt_error("Error: Regex name %s not found in dictionary!\n",
+				       re->name);
+			}
+		}
+		re = re->next;
+	}
+	return 0;
+}
+/**
+ * Tries to match each regex in turn to word s.
+ * On match, returns the name of the first matching regex.
+ * If no match is found, returns NULL.
+ */
+const char *match_regex(Dictionary dict, const char *s)
+{
+	int rc;
+	Regex_node *re = dict->regex_root;
+	while (re != NULL)
+	{
+		if (re->re == NULL)
+		{
+			/* Re not compiled; if this happens, it's likely an
+			 *  internal error, but nevermind for now.  */
+			continue;
+		}
+		/* Try to match with no extra data (NULL), whole str (0 to strlen(s)),
+		 * and default options (second 0). */
+		/* int rc = pcre_exec(re->re, NULL, s, strlen(s), 0,
+		 *                    0, ovector, PCRE_OVEC_SIZE); */
+		rc = regexec((regex_t*) re->re, s, 0, NULL, 0);
+		if (0 == rc)
+		{
+			return re->name; /* match found. just return--no multiple matches. */
+		}
+		else if (rc != REG_NOMATCH)
+		{
+			/* We have an error. TODO: more appropriate error handling.*/
+			fprintf(stderr,"Regex matching error %d occurred!\n", rc);
+		}
+		re = re->next;
+	}
+	return NULL; /* no matches. */
+}
+/**
+ * Delete associated storage
+ */
+void free_regexs(Dictionary dict)
+{
+	Regex_node *re = dict->regex_root;
+	while (re != NULL)
+	{
+		Regex_node *next = re->next;
+		regfree((regex_t *)re->re);
+		free(re->re);
+		free(re->name);
+		free(re->pattern);
+		free(re);
+		re = next;
+	}
+}

data/c/link-grammar/regex-morph.h ADDED Viewed

@@ -0,0 +1,17 @@
+/*************************************************************************/
+/* Copyright (c) 2005  Sampo Pyysalo                                     */
+/* All rights reserved                                                   */
+/*                                                                       */
+/* Use of the link grammar parsing system is subject to the terms of the */
+/* license set forth in the LICENSE file included with this software,    */
+/* and also available at http://www.link.cs.cmu.edu/link/license.html    */
+/* This license allows free redistribution and use in source and binary  */
+/* forms, with or without modification, subject to certain conditions.   */
+/*                                                                       */
+/*************************************************************************/
+#include "api-structures.h"
+int compile_regexs(Dictionary);
+const char *match_regex(Dictionary, const char *);
+void free_regexs(Dictionary dict);

data/c/link-grammar/resources.c ADDED Viewed

@@ -0,0 +1,180 @@
+/*************************************************************************/
+/* Copyright (c) 2004                                                    */
+/* Daniel Sleator, David Temperley, and John Lafferty                    */
+/* All rights reserved                                                   */
+/*                                                                       */
+/* Use of the link grammar parsing system is subject to the terms of the */
+/* license set forth in the LICENSE file included with this software,    */
+/* and also available at http://www.link.cs.cmu.edu/link/license.html    */
+/* This license allows free redistribution and use in source and binary  */
+/* forms, with or without modification, subject to certain conditions.   */
+/*                                                                       */
+/*************************************************************************/
+#include "api.h"
+#include "api.c"
+#include <time.h>
+#if !defined(_WIN32)
+   #include <sys/time.h>
+   #include <sys/resource.h>
+#endif
+#if defined(__linux__)
+/* based on reading the man page for getrusage on linux, I inferred that
+   I needed to include this.  However it doesn't seem to be necessary */
+   #include <unistd.h>
+#endif
+#if defined(__hpux__)
+  #include <sys/syscall.h>
+  int syscall(int, int, struct rusage *rusage);  /* can't find
+													the prototype for this */
+  #define getrusage(a, b)  syscall(SYS_GETRUSAGE, (a), (b))
+#endif /* __hpux__ */
+#if defined(__sun__)
+int getrusage(int who, struct rusage *rusage);
+/* Declaration missing from sys/resource.h in sun operating systems (?) */
+#endif /* __sun__ */
+#define MAX_PARSE_TIME_UNLIMITED -1
+#define MAX_MEMORY_UNLIMITED ((size_t) -1)
+/** returns the current usage time clock in seconds */
+static double current_usage_time(void)
+{
+#if !defined(_WIN32)
+	struct rusage u;
+	getrusage (RUSAGE_SELF, &u);
+	return (u.ru_utime.tv_sec + ((double) u.ru_utime.tv_usec) / 1000000.0);
+#else
+	return ((double) clock())/CLOCKS_PER_SEC;
+#endif
+}
+Resources resources_create(void)
+{
+	Resources r;
+	r = (Resources) xalloc(sizeof(struct Resources_s));
+	r->max_parse_time = MAX_PARSE_TIME_UNLIMITED;
+	r->when_created = current_usage_time();
+	r->when_last_called = current_usage_time();
+	r->time_when_parse_started = current_usage_time();
+	r->space_when_parse_started = get_space_in_use();
+	r->max_memory = MAX_MEMORY_UNLIMITED;
+	r->cumulative_time = 0;
+	r->memory_exhausted = FALSE;
+	r->timer_expired = FALSE;
+	return r;
+}
+void resources_delete(Resources r)
+{
+	xfree(r, sizeof(struct Resources_s));
+}
+void resources_reset(Resources r)
+{
+	r->when_last_called = r->time_when_parse_started = current_usage_time();
+	r->space_when_parse_started = get_space_in_use();
+	r->timer_expired = FALSE;
+	r->memory_exhausted = FALSE;
+}
+#if 0
+static void resources_reset_time(Resources r)
+{
+	r->when_last_called = r->time_when_parse_started = current_usage_time();
+}
+#endif
+void resources_reset_space(Resources r)
+{
+	r->space_when_parse_started = get_space_in_use();
+}
+int resources_exhausted(Resources r)
+{
+	if (resources_timer_expired(r)) {
+		r->timer_expired = TRUE;
+	}
+	if (resources_memory_exhausted(r)) {
+		r->memory_exhausted = TRUE;
+	}
+	return (r->timer_expired || r->memory_exhausted);
+}
+int resources_timer_expired(Resources r)
+{
+	if (r->max_parse_time == MAX_PARSE_TIME_UNLIMITED) return 0;
+	else return (r->timer_expired ||
+	     (current_usage_time() - r->time_when_parse_started > r->max_parse_time));
+}
+int resources_memory_exhausted(Resources r)
+{
+	if (r->max_memory == MAX_MEMORY_UNLIMITED) return 0;
+	else return (r->memory_exhausted || (get_space_in_use() > r->max_memory));
+}
+/** print out the cpu ticks since this was last called */
+static void resources_print_time(int verbosity, Resources r, const char * s)
+{
+	double new_t;
+	new_t = current_usage_time();
+	if (verbosity > 1) {
+		printf("++++");
+		left_print_string(stdout, s,
+			"                                     ");
+		printf("%7.2f seconds\n", new_t - r->when_last_called);
+	}
+	r->when_last_called = new_t;
+}
+/** print out the cpu ticks since this was last called */
+static void resources_print_total_time(int verbosity, Resources r)
+{
+	double new_t;
+	new_t = current_usage_time();
+	r->cumulative_time += (new_t - r->time_when_parse_started) ;
+	if (verbosity > 0) {
+		printf("++++");
+		left_print_string(stdout, "Time",
+		                  "                                           ");
+		printf("%7.2f seconds (%.2f total)\n",
+			   new_t - r->time_when_parse_started, r->cumulative_time);
+	}
+	r->time_when_parse_started = new_t;
+}
+static void resources_print_total_space(int verbosity, Resources r)
+{
+	if (verbosity > 1) {
+		printf("++++");
+		left_print_string(stdout, "Total space",
+		                  "                                            ");
+		printf("%lu bytes (%lu max)\n",
+			(long unsigned int) get_space_in_use(),
+			(long unsigned int) get_max_space_used());
+	}
+}
+void print_time(Parse_Options opts, const char * s)
+{
+	resources_print_time(opts->verbosity, opts->resources, s);
+}
+void parse_options_print_total_time(Parse_Options opts)
+{
+	resources_print_total_time(opts->verbosity, opts->resources);
+}
+void print_total_space(Parse_Options opts)
+{
+	resources_print_total_space(opts->verbosity, opts->resources);
+}