RubyGems - grammar_cop - Versions diffs - 0.1.0 - Mend

grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (344) hide show

data/.DS_Store +0 -0
data/.gitignore +4 -0
data/Gemfile +4 -0
data/Rakefile +8 -0
data/data/.DS_Store +0 -0
data/data/Makefile +511 -0
data/data/Makefile.am +4 -0
data/data/Makefile.in +511 -0
data/data/de/.DS_Store +0 -0
data/data/de/4.0.affix +7 -0
data/data/de/4.0.dict +474 -0
data/data/de/Makefile +387 -0
data/data/de/Makefile.am +9 -0
data/data/de/Makefile.in +387 -0
data/data/en/.DS_Store +0 -0
data/data/en/4.0.affix +26 -0
data/data/en/4.0.batch +1002 -0
data/data/en/4.0.biolg.batch +411 -0
data/data/en/4.0.constituent-knowledge +127 -0
data/data/en/4.0.dict +8759 -0
data/data/en/4.0.dict.m4 +6928 -0
data/data/en/4.0.enwiki.batch +14 -0
data/data/en/4.0.fixes.batch +2776 -0
data/data/en/4.0.knowledge +306 -0
data/data/en/4.0.regex +225 -0
data/data/en/4.0.voa.batch +114 -0
data/data/en/Makefile +554 -0
data/data/en/Makefile.am +19 -0
data/data/en/Makefile.in +554 -0
data/data/en/README +173 -0
data/data/en/tiny.dict +157 -0
data/data/en/words/.DS_Store +0 -0
data/data/en/words/Makefile +456 -0
data/data/en/words/Makefile.am +78 -0
data/data/en/words/Makefile.in +456 -0
data/data/en/words/currency +205 -0
data/data/en/words/currency.p +28 -0
data/data/en/words/entities.given-bisex.sing +39 -0
data/data/en/words/entities.given-female.sing +4141 -0
data/data/en/words/entities.given-male.sing +1633 -0
data/data/en/words/entities.locations.sing +68 -0
data/data/en/words/entities.national.sing +253 -0
data/data/en/words/entities.organizations.sing +7 -0
data/data/en/words/entities.us-states.sing +11 -0
data/data/en/words/units.1 +45 -0
data/data/en/words/units.1.dot +4 -0
data/data/en/words/units.3 +2 -0
data/data/en/words/units.4 +5 -0
data/data/en/words/units.4.dot +1 -0
data/data/en/words/words-medical.adv.1 +1191 -0
data/data/en/words/words-medical.prep.1 +67 -0
data/data/en/words/words-medical.v.4.1 +2835 -0
data/data/en/words/words-medical.v.4.2 +2848 -0
data/data/en/words/words-medical.v.4.3 +3011 -0
data/data/en/words/words-medical.v.4.4 +3036 -0
data/data/en/words/words-medical.v.4.5 +3050 -0
data/data/en/words/words.adj.1 +6794 -0
data/data/en/words/words.adj.2 +638 -0
data/data/en/words/words.adj.3 +667 -0
data/data/en/words/words.adv.1 +1573 -0
data/data/en/words/words.adv.2 +67 -0
data/data/en/words/words.adv.3 +157 -0
data/data/en/words/words.adv.4 +80 -0
data/data/en/words/words.n.1 +11464 -0
data/data/en/words/words.n.1.wiki +264 -0
data/data/en/words/words.n.2.s +2017 -0
data/data/en/words/words.n.2.s.biolg +1 -0
data/data/en/words/words.n.2.s.wiki +298 -0
data/data/en/words/words.n.2.x +65 -0
data/data/en/words/words.n.2.x.wiki +10 -0
data/data/en/words/words.n.3 +5717 -0
data/data/en/words/words.n.t +23 -0
data/data/en/words/words.v.1.1 +1038 -0
data/data/en/words/words.v.1.2 +1043 -0
data/data/en/words/words.v.1.3 +1052 -0
data/data/en/words/words.v.1.4 +1023 -0
data/data/en/words/words.v.1.p +17 -0
data/data/en/words/words.v.10.1 +14 -0
data/data/en/words/words.v.10.2 +15 -0
data/data/en/words/words.v.10.3 +88 -0
data/data/en/words/words.v.10.4 +17 -0
data/data/en/words/words.v.2.1 +1253 -0
data/data/en/words/words.v.2.2 +1304 -0
data/data/en/words/words.v.2.3 +1280 -0
data/data/en/words/words.v.2.4 +1285 -0
data/data/en/words/words.v.2.5 +1287 -0
data/data/en/words/words.v.4.1 +2472 -0
data/data/en/words/words.v.4.2 +2487 -0
data/data/en/words/words.v.4.3 +2441 -0
data/data/en/words/words.v.4.4 +2478 -0
data/data/en/words/words.v.4.5 +2483 -0
data/data/en/words/words.v.5.1 +98 -0
data/data/en/words/words.v.5.2 +98 -0
data/data/en/words/words.v.5.3 +103 -0
data/data/en/words/words.v.5.4 +102 -0
data/data/en/words/words.v.6.1 +388 -0
data/data/en/words/words.v.6.2 +401 -0
data/data/en/words/words.v.6.3 +397 -0
data/data/en/words/words.v.6.4 +405 -0
data/data/en/words/words.v.6.5 +401 -0
data/data/en/words/words.v.8.1 +117 -0
data/data/en/words/words.v.8.2 +118 -0
data/data/en/words/words.v.8.3 +118 -0
data/data/en/words/words.v.8.4 +119 -0
data/data/en/words/words.v.8.5 +119 -0
data/data/en/words/words.y +104 -0
data/data/lt/.DS_Store +0 -0
data/data/lt/4.0.affix +6 -0
data/data/lt/4.0.constituent-knowledge +24 -0
data/data/lt/4.0.dict +135 -0
data/data/lt/4.0.knowledge +38 -0
data/data/lt/Makefile +389 -0
data/data/lt/Makefile.am +11 -0
data/data/lt/Makefile.in +389 -0
data/ext/.DS_Store +0 -0
data/ext/link_grammar/.DS_Store +0 -0
data/ext/link_grammar/extconf.rb +2 -0
data/ext/link_grammar/link-grammar/.DS_Store +0 -0
data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
data/ext/link_grammar/link-grammar/Makefile +900 -0
data/ext/link_grammar/link-grammar/Makefile.am +202 -0
data/ext/link_grammar/link-grammar/Makefile.in +900 -0
data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
data/ext/link_grammar/link-grammar/and.c +1603 -0
data/ext/link_grammar/link-grammar/and.h +27 -0
data/ext/link_grammar/link-grammar/api-structures.h +362 -0
data/ext/link_grammar/link-grammar/api-types.h +72 -0
data/ext/link_grammar/link-grammar/api.c +1887 -0
data/ext/link_grammar/link-grammar/api.h +96 -0
data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
data/ext/link_grammar/link-grammar/autoit/README +10 -0
data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
data/ext/link_grammar/link-grammar/command-line.c +458 -0
data/ext/link_grammar/link-grammar/command-line.h +15 -0
data/ext/link_grammar/link-grammar/constituents.c +1836 -0
data/ext/link_grammar/link-grammar/constituents.h +26 -0
data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
data/ext/link_grammar/link-grammar/corpus/README +17 -0
data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
data/ext/link_grammar/link-grammar/count.c +828 -0
data/ext/link_grammar/link-grammar/count.h +25 -0
data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
data/ext/link_grammar/link-grammar/error.c +92 -0
data/ext/link_grammar/link-grammar/error.h +35 -0
data/ext/link_grammar/link-grammar/expand.c +67 -0
data/ext/link_grammar/link-grammar/expand.h +13 -0
data/ext/link_grammar/link-grammar/externs.h +22 -0
data/ext/link_grammar/link-grammar/extract-links.c +625 -0
data/ext/link_grammar/link-grammar/extract-links.h +16 -0
data/ext/link_grammar/link-grammar/fast-match.c +309 -0
data/ext/link_grammar/link-grammar/fast-match.h +17 -0
data/ext/link_grammar/link-grammar/idiom.c +373 -0
data/ext/link_grammar/link-grammar/idiom.h +15 -0
data/ext/link_grammar/link-grammar/jni-client.c +779 -0
data/ext/link_grammar/link-grammar/jni-client.h +236 -0
data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
data/ext/link_grammar/link-grammar/link-features.h +37 -0
data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
data/ext/link_grammar/link-grammar/link-includes.h +465 -0
data/ext/link_grammar/link-grammar/link-parser.c +849 -0
data/ext/link_grammar/link-grammar/massage.c +329 -0
data/ext/link_grammar/link-grammar/massage.h +13 -0
data/ext/link_grammar/link-grammar/post-process.c +1113 -0
data/ext/link_grammar/link-grammar/post-process.h +45 -0
data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
data/ext/link_grammar/link-grammar/prefix.c +482 -0
data/ext/link_grammar/link-grammar/prefix.h +139 -0
data/ext/link_grammar/link-grammar/preparation.c +412 -0
data/ext/link_grammar/link-grammar/preparation.h +20 -0
data/ext/link_grammar/link-grammar/print-util.c +87 -0
data/ext/link_grammar/link-grammar/print-util.h +32 -0
data/ext/link_grammar/link-grammar/print.c +1085 -0
data/ext/link_grammar/link-grammar/print.h +16 -0
data/ext/link_grammar/link-grammar/prune.c +1864 -0
data/ext/link_grammar/link-grammar/prune.h +17 -0
data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
data/ext/link_grammar/link-grammar/read-dict.h +29 -0
data/ext/link_grammar/link-grammar/read-regex.c +161 -0
data/ext/link_grammar/link-grammar/read-regex.h +12 -0
data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
data/ext/link_grammar/link-grammar/resources.c +180 -0
data/ext/link_grammar/link-grammar/resources.h +23 -0
data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
data/ext/link_grammar/link-grammar/string-set.c +169 -0
data/ext/link_grammar/link-grammar/string-set.h +16 -0
data/ext/link_grammar/link-grammar/structures.h +498 -0
data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
data/ext/link_grammar/link-grammar/tokenize.h +15 -0
data/ext/link_grammar/link-grammar/utilities.c +847 -0
data/ext/link_grammar/link-grammar/utilities.h +281 -0
data/ext/link_grammar/link-grammar/word-file.c +124 -0
data/ext/link_grammar/link-grammar/word-file.h +15 -0
data/ext/link_grammar/link-grammar/word-utils.c +526 -0
data/ext/link_grammar/link-grammar/word-utils.h +152 -0
data/ext/link_grammar/link_grammar.c +202 -0
data/ext/link_grammar/link_grammar.h +99 -0
data/grammar_cop.gemspec +24 -0
data/lib/.DS_Store +0 -0
data/lib/grammar_cop.rb +9 -0
data/lib/grammar_cop/.DS_Store +0 -0
data/lib/grammar_cop/dictionary.rb +19 -0
data/lib/grammar_cop/linkage.rb +30 -0
data/lib/grammar_cop/parse_options.rb +32 -0
data/lib/grammar_cop/sentence.rb +36 -0
data/lib/grammar_cop/version.rb +3 -0
data/test/.DS_Store +0 -0
data/test/grammar_cop_test.rb +27 -0
metadata +407 -0

data/data/en/4.0.knowledge ADDED Viewed

@@ -0,0 +1,306 @@
+; Post-processing knowledge file
+; 6/96
+; ----------------------------------------------------------------------------
+; This file contains the knowledge related to post-processing, in the
+; form of lists and rules. This file is read by post-process.c at run-time.
+; Syntax of file:
+;           line starting with ";" is a comment
+;           commas are field delimiters
+;           any token beginning with the character @ is expanded to the set
+;               of symbols it defined. e.g. one could write
+; FOO: blah1 blah2 blah3
+; thus defining a set FOO containing three strings. Then one could later write
+; BAR: blah5 @FOO blah8
+; which defines a set BAR containing 5 strings.
+;
+; Capitalized tokens are *required*, though if you feel like providing an
+; empty list afterwards, that's your right.
+; ----------------------------------------------------------------------------
+; The following links start a domain. Each must be given a name in the
+; table below (STARTING_LINK_TYPE_TABLE)
+DOMAIN_STARTER_LINKS:
+ W   Ce   Cs   Ca   Cc   Ci   R*   Rn   Re   RSe  Mr   QI#d   Mv*   Jr  Mj   Qd
+ TOn   TOi   Mg*   MVi  Ss#d   Bsd   ER   Z  Ma#*   SIs#g  BIqx   MX#p   MX#a
+ MX#r   MX#j   MV#o   MV#p  Eq   COq   CCq  AFd   PFc
+; ----------------------------------------------------------------------
+; The following links start a urfl domain.  They are also included in the
+; domain, as opposed to regular starter links (above), which are not. A
+; urfl domain includes links accessible from the root word, tracing to
+; the right (as well as everything accessible from the left end of the
+; starter link).
+URFL_DOMAIN_STARTER_LINKS:  TOo   I#j  Pa##j   CP
+; ----------------------------------------------------------------------
+; The following start a urfl_only domain. These include _only_ links :
+; reachable from the root word, tracing to the right. They aren't
+; included in the domain
+URFL_ONLY_DOMAIN_STARTER_LINKS:  SFsx   Ss#g   COp
+; ----------------------------------------------------------------------
+; Links which start a domain and are also part of the domain. This must be
+; a sublist of the domain_starter_list
+DOMAIN_CONTAINS_LINKS:
+ Mg*   Mx   Bsd   MX#a   Ma#*   Mv*   MX#r   Ss#d   Ws   Wq  Qd   Mj   Wj
+ Wi  MX#j   AFd   PFc   Jr   Wd   Mr
+; ----------------------------------------------------------------------
+; These links are not put in the word/link graph. They also cannot be the
+; starter links for a domain. (These links may also only be used in cycles.)
+IGNORE_THESE_LINKS: Xca
+; ----------------------------------------------------------------------
+; These links may only be used in cycles.
+MUST_FORM_A_CYCLE_LINKS:  R#*   TOt   EXx   HA    SFsic   Jr  JQ  Xca
+; ----------------------------------------------------------------------
+; These links are not traced further if they point back before the root word.
+; The creation of Rw necessitated making B#m a restricted link, to
+; prevent the (e) domain, started by Ce, from extending around through
+; the Rw link.
+; Reverted.
+; This breaks parsing of
+;    How fast a program does he think it is
+;    I wonder how fast a program he thinks it is
+;    I wonder how much money you earned
+;    I wonder how many people you saw
+;    I wonder how big a department it is
+;    I wonder how much oil they spilled
+;    This is the man whose dog I bought
+;    I wonder which dog he said you chased
+;    How efficient a program is it
+; Meanwhile, I can't find the Ce problem mentioned ... this needs more
+; documentation!
+RESTRICTED_LINKS:
+   B#*  D##w   B#w   B#d   AFh  MVt   Xx   HL   SFsic  AFd   Bc   CX  EAh
+   H   HA   PFc   B#j   Wd   PF   Z
+;   H   HA   PFc   B#j   Wd   PF   Z  B#m
+; ----------------------------------------------------------------------
+; ---------------------- LINK TYPE TABLE-------------------------------
+; ----------------------------------------------------------------------
+; The following table associates a domain type with each possible
+; starting link. It contains pairs: the first of each pair is a link
+; type, and the second is the domain to which that link type belongs.
+STARTING_LINK_TYPE_TABLE:
+ Ce    e
+ R*    r
+ Rn    r
+ Re    r
+ W     m
+ RSe   e
+ Cs    s
+ Ca    s
+ Jr    e
+ Mr    r
+ Cc    s
+ Mv*   e
+ QI#d  s
+ BIqx  s
+ TOn   e
+ TOi   e
+ MVi   e
+ MV#o  s
+ MV#p  s
+ AFd   s
+ PFc   s
+ Mg*   e
+ Mj    j
+ Qd    m
+ MX#j  j
+ TOo   x
+ I#j   x
+ Pa##j x
+ CP    x
+ COp   d
+ SFsx  d
+ Ss#g  d
+ SIs#g s
+ Ss#d  s
+ Bsd   s
+ ER    s
+ Z     s
+ Ma#*  e
+ MX#p  e
+ Ci    e
+ MX#a  e
+ Eq    e
+ COq   e
+ CCq   s
+ MX#r  r
+; ----------------------------------------------------------------------
+; ----------------------- LINK SETS ------------------------------------
+; ----------------------------------------------------------------------
+; (Not in use at present; see comment at beginning of file)
+; ----------------------------------------------------------------------
+; ----------------- RULES ----------------------------------------------
+; ----------------------------------------------------------------------
+; Explanation of syntax: as usual, each stanza begins with a label
+; terminated by a colon. The interpretation of the rule depends on
+; the label, as specified in each stanza.
+; The following rule asserts that the linkage must *still* be connected
+; when the specified set(s) of links are removed from the linkage.
+FORM_A_CYCLE_RULES:
+        @MUST_FORM_A_CYCLE_LINKS  ,  "'must form a cycle' violation0"
+; For the following rules, if a domain contains a link matching the 1st
+; column, it must also contain a linkage matching one of the members of the
+; set in the 2nd column. The individual rules are demarcated by semicolons and
+; the fields within a rule are demarcated by commas.
+CONTAINS_ONE_RULES:
+ SI#*  ,  Wq    Qd    CQ    PFc             , "Bad use of s-v inversion1" ,
+ SI#x  ,  Wq    Qd    CQ    PFc             , "Bad use of s-v inversion2" ,
+ SFI##* , Wq    Qd    CQ    PFc             , "Bad use of s-v inversion3",
+ SXI   ,  Wq    Qd    CQ    PFc             , "Bad use of s-v inversion4" ,
+ Ws    ,  D##w    S##w    H                 , "S-V inversion required5",
+ I#a   ,  B#m    B#w                        , "incorrect use of 'to'6"       ,
+ Wq    ,  SI    SFI   SXI                   , "S-V inversion required7" ,
+ Qd    ,  SI    SFI   SXI                   , "S-V inversion required8" ,
+ PFc   ,  SI    SFI   SXI                   , "S-V inversion required9" ,
+ Mj    ,  Jw    JQ                          , "Incorrect relative10" ,
+ MX#j  ,  Jw    JQ                          , "Incorrect relative11" ,
+ Wj    ,  Jw    JQ                          , "Misuse of preposition12"     ,
+ JQ    ,  Mj    Wj    MX#j                  , "Misuse of preposition13"     ,
+ Jw    ,  Mj    Wj    MX#j                  , "Misuse of preposition14"     ,
+ B#j   ,  Jr                                , "Incorrect relative15"       ,
+ Jr    ,  B#j                               , "Incorrect relative16"       ,
+ EAh   ,  AF    Bsm    B*m    Qe    Ca    AFm
+					    , "Incorrect use of 'how'17"    ,
+ EEh   ,  AF    Bsm    B*m    Qe    Ca    AFm
+					    , "Incorrect use of 'how'18"    ,
+ Qe    ,  EEh           		    , "Incorrect use of adverb19"   ,
+ THi   ,  SFsi   SFIsi   OXi                , "Complement requires 'it'20"  ,
+ TSi   ,  SFsi   SFIsi   OXi                , "Complement requires 'it'21"   ,
+ QIi   ,  SFsi   SFIsi   OXi                , "Complement requires 'it'22"    ,
+ TOi   ,  SFsi   SFIsi   OXi                , "Complement requires 'it'23"     ,
+ Ci    ,  SFsi   SFIsi   OXi                , "Complement requires 'it'24"     ,
+ COqi  ,  SFsi   SFIsi   OXi                , "Complement requires 'it'25"     ,
+ CPi   ,  SFsi   SFIsi   OXi                , "Complement requires 'it'26"     ,
+ Eqi   ,  SFsi   SFIsi   OXi                , "Complement requires 'it'27"     ,
+ LEi   ,  SFsi   SFIsi   OXi                , "Complement requires 'it'28"     ,
+ MVti  ,  SFsi   SFIsi   OXi                , "Complement requires 'it'29"     ,
+ AFdi  ,  SFsi   SFIsi   OXi                , "Complement requires 'it'30"     ,
+ O#i   ,  SFsi   SFIsi   OXi                , "Complement requires 'it'31"     ,
+ SFst  ,  O*t    Ost    Omt    Bs#t    B*#t    Bc#t    , "Bad use of 'there'32"           ,
+ SFIst ,  O*t    Ost    Omt    Bs#t    B*#t    Bc#t    , "Bad use of 'there'33"           ,
+ SFp   ,  Opt    Omt    O*t    Bp#t    B*#t    Bc#t    , "Bad use of 'there'34"           ,
+;
+; This SFu rule forces subject-object agreement for uncountable noun objects
+ SFu   ,  Out    Omt    O*t    Bp#t    B*#t    Bc#t    , "Bad use of 'there'34a"           ,
+ SFIp  ,  Opt    Omt    O*t    Bp#t    B*#t    Bc#t    , "Bad use of 'there'35"           ,
+ OXt   ,  O#t    B##t                     ,   "Bad use of 'there'36"           ,
+ SFsi* ,  TOi    THi   QIi    TSi    O#i    Ci    THb   CPi
+          COqi    CPi    Eqi    AFdi    BIh , "Bad use of 'it'37"           ,
+ SFIsi ,  TOi    THi   QIi    TSi    O#i    Ci    THb   CPi
+          COqi    CPi    Eqi    AFdi    BIh , "Bad use of 'it'38"           ,
+ OXi   ,  TOi    THi   QIi    TSi    O#i    Ci    THb   CPi
+          COqi    CPi    Eqi    AFdi    BIh , "Bad use of 'it'39"           ,
+ THb   ,  S##t    SI##t  SFsi  SFIsi        , "Bad use of predicate40"      ,
+ BIh   ,  Ss#b    SIs#b  SFsi  SFIsi        , "Bad use of predicate41"      ,
+ BIq   ,  S##q    SI##q  SFsi  Ss#b    SFIsi SIs#b
+                                            , "Bad use of predicate42"      ,
+ MVt   ,  Dm#m   EAm   EEm   MVm  Pam   Pafm   AFm   EB#m   MVb AJrc
+          Om   Mam  Am  Jm  Ds*m   MX#m     , "Bad comparative43"    ,
+ MVz   ,  D##y    EAy    EEy    MVy    EB#y , "Bad comparative44"    ,
+ MV#a  ,  Pam    Pafm    EAm   Ds*m   EAy   AFm   Mam   Am
+					    , "Bad comparative45"    ,
+ MV#i  ,  Pam    Pafm    EAm   Ds*m   EAy   AFm   Mam   Am
+					    , "Bad comparative46"    ,
+ MV#o  ,  D##m    D##y    Om    Oy    Jm    Jy   Am   MX#m
+					    , "Bad comparative47"    ,
+ MV#p  ,  EEm   MVb   Dm#m   EEy   D##y  MVm   Om   Oy
+                            Jm   Jy   Am   MX#m
+					    , "Bad comparative48"    ,
+ Pafc  ,  EB#m    EB#y                      , "Bad comparative49"    ,
+ Pafc  ,  Pa*    Paf*                       , "Bad comparative50"    ,
+ MVat  ,  MVm                               , "Bad comparative51"    ,
+ MVpt  ,  MVm                               , "Bad comparative52"    ,
+ MVat  ,  MVa   MVp                         , "Bad comparative53"    ,
+ MVpt  ,  MVa   MVp                         , "Bad comparative54"    ,
+ U#t   ,  D##m    D##y    Om    Oy    Jm    Jy   Am   MX#m
+				            , "Bad comparative55"    ,
+ Cc    ,  EEm    EEy    MVm    MVb    MVy
+	                                    , "Bad comparative56"    ,
+ Sp#c  ,  Dmcm    Dmcy    Om    Oy    Jm    Jy  MX#m
+	 				    , "Bad comparative57"    ,
+ Ss#c  ,  Dmum    Dmuy    Om    Oy    Jm    Jy    Ds*y  MX#m
+					    , "Bad comparative58"    ,
+ S##c  ,  Dm#m    D##y    Om    Oy    Jm    Jy   MX#m
+					    , "Bad comparative59"    ,
+ THc   ,  TH                                , "Bad comparative60"    ,
+ TOc   ,  TO**   TOf*   TOi*                , "Bad comparative61"    ,
+ TOtc  ,  TOt  ,                              "Bad comparative62"    ,
+ Ma**  ,   TO   TOf   TH   MVp   TOt   QI   OF  MVt   MVz   MVh   Ytm   Ya
+					    , "Bad use of adjective63"    ,
+ Mam   ,  TO   TOf   TH   MVp   TOt   QI   OF  MVt   MVz   MVh   Ytm   Ya
+					    , "Bad use of adjective64"    ,
+ MX#a  ,  TO   TOf   TH   MVp   TOt   QI   OF  MVt   MVz   MVh   Ytm   Ya  MJ
+			                    , "Bad use of adjective65"    ,
+; There's no ZZZ connector, which means that Ixd and Oxn
+; are prohibited from ever occuring. 4.0.batch covers this.
+ Ixd   ,  ZZZ                               , "Can't use 'do' with that verb" ,
+ Oxn   ,  ZZZ                               , "Bad use of pronoun66" ,
+ MVh   ,  EExk   EAxk   D##k                , "Incorrect use of that67" ,
+; The Rw link necessitated commenting out 68, because we had to make B#m
+; a restricted link(see above) xxx reverted .. this is needed ...
+;
+B#m  ,   D##w   H   HA                     , "Bad use of gerund68"
+CONTAINS_NONE_RULES:
+ S     ,  Spxi                  	    , "Bad n-v agreement69" ,
+ SI     , SIpxi                		    , "Bad n-v agreement70" ,
+ Ws    ,  B#m   Ca   BT      		    , "Question inversion violated71" ,
+ SF    ,  I*   PP*   TO*   Pa*   Pam  Pg*   Pv*   LE*   AFd*  MVta
+					    , "Bad use of 'filler' subject72" ,
+ SFI   ,  I*   PP*   TO*   Pa*   Pam  Pg*   Pv*   LE*   AFd*  MVta
+					    , "Bad use of 'filler' subject73" ,
+ OX    ,  I*   PP*   TO*   Pa*   Pam  Pg*   Pv*   LE*   AFd*  MVta
+					    , "Bad use of 'filler' subject74" ,
+ MXsr  ,  Sp#w 				    , "Bad n-v agreement75" ,
+ MXpr  ,  Ss#w   S#iw			    , "Bad n-v agreement76" ,
+ Mr    ,  B#*				    , "Bad use of 'whose'77"
+; ----------------------------------------------------------------------
+; The following rule asserts that all specified domains must have the
+; property that all of the words that touch a link in the domain are
+; not to the left of the root word of the domain. These rules are
+; different from the above in that the first field is a *domain name*,
+; rather than a set of links.
+BOUNDED_RULES:
+ s                               , "Unbounded s domain78" ,
+ r                               , "Unbounded r domain79"

data/data/en/4.0.regex ADDED Viewed

@@ -0,0 +1,225 @@
+ %***************************************************************************%
+ %                                                                           %
+ %  Copyright (C) 2005, 2006 Sampo Pyysalo, Sophie Aubin                     %
+ %  See file "LICENSE" for information about commercial use of this system   %
+ %                                                                           %
+ %***************************************************************************%
+% This file contains regular expressions that are used to match
+% tokens not found in the dictionary. Each regex is given a name which
+% determines the disjuncts assigned when the regex matches; this name
+% must be defined in the dictionary along with the appropriate disjuncts.
+% Note that the order of the regular expressions matters: matches will
+% be attempted in the order in which the regexs appear in this file,
+% and only the first match will be used.
+% Numbers.
+% XXX, we need to add utf8 U+00A0 "no-break space"
+%
+% Allows at most two colons in hour-muinute-second HH:MM:SS expressions
+% Allows at most two digits between colons
+HMS-TIME: /^[0-9][0-9]?(:[0-9][0-9]?(:[0-9][0-9]?)?)?(AM|PM|am|pm)?$/
+% e.g. 1950's leading number can be higher, for science fiction.
+% Must be four digits, or possible three. Must end in s, 's ’s
+DECADE-TIME: /^([1-4][0-9][0-9]|[1-9][0-9])0(s|'s|’s)$/
+% Day-of-month names; this regex will match before the one below.
+DAY-ORDINALS: /^(1st|2nd|3rd|[4-9]th|1[0-9]th|2(0th|1st|2nd|3rd|[4-9]th)|30th|31st)$/
+% Ordinal numbers; everything except 1st through 13th
+% is handled by regex.
+ORDINALS: /^[1-9][0-9]*(0th|1st|2nd|3rd|[4-9]th)$/
+% Allows any number of commas or periods
+% Be careful not match the period at the end of a sentence;
+% for example: "It happened in 1942."
+NUMBERS: /^[0-9,.]*[0-9]$/
+% This parses signed numbers and ranges, e.g. "-5" and "5-10" and "9+/-6.5"
+NUMBERS: /^[0-9.,-]*[0-9](\+\/-[0-9.,-]*[0-9])?$/
+% Parses simple fractions e.g. "1/60" with no decimal points or anything fancy
+FRACTION: /^[0-9]+\/[0-9]+$/
+% "10(3)" exponent (used in PubMed)
+NUMBERS: /^[0-9.,-]*[0-9][0-9.,-]*\([0-9:.,-]*[0-9][0-9.,-]*\)$/
+% Roman numerals
+% The first expr has the potential(?) problem that it matches an empty
+% string. Thus, the next three rules specify that at least one section
+% is non-empty.
+ROMAN-NUMERAL-WORDS: /^M*(CM|D?C{0,3}|CD)(XC|L?X{0,3}|XL)(IX|V?I{0,3}|IV)$/
+% ROMAN-NUMERAL-WORDS: /^M*(CM|D?C{0,3}|CD){1}(XC|L?X{0,3}|XL)(IX|V?I{0,3}|IV)$/
+% ROMAN-NUMERAL-WORDS: /^M*(CM|D?C{0,3}|CD)(XC|L?X{0,3}|XL){1}(IX|V?I{0,3}|IV)$/
+% ROMAN-NUMERAL-WORDS: /^M*(CM|D?C{0,3}|CD)(XC|L?X{0,3}|XL)(IX|V?I{0,3}|IV){1}$/
+% Strings of initials. e.g "Dr. J.G.D. Smith lives on Main St."
+INITIALS: /^([A-Z]\.)+$/
+% Greek letters with numbers
+GREEK-LETTER-AND-NUMBER: /^(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)\-?[0-9]+$/
+PL-GREEK-LETTER-AND-NUMBER: /^(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)s\-?[0-9]+$/
+% Some "safe" derived units. Simple units are in dictionary.
+% The idea here is for the regex to match something that is almost
+% certainly part of a derived unit, and allow the rest to be
+% anything; this way we can capture difficult derived units such
+% as "mg/kg/day" and even oddities such as "micrograms/mouse/day"
+% without listing them explicitly.
+% TODO: add more.
+% Some (real) misses from these:
+% micrograms.kg-1.h-1 microM-1 J/cm2 %/day mN/m cm/yr
+% m/s days/week ml/s degrees/sec cm/sec cm/s mm/s N/mm (is that a unit?)
+% cuts/minute clicks/s beats/minute x/week W/kg/W %/patient-year
+% microIU/ml degrees/s counts/mm2 cells/mm3 tumors/mouse
+% mm/sec ml/hr mJ/cm(2) m2/g amol/mm2 animals/group
+% h-1 min-1 day-1 cm-1 mg-1 kg-1 mg.m-2.min-1 ms.cm-1 g-1
+% sec-1 ms-1 ml.min.-1kg-1 ml.hr-1
+% also, both kilometer and kilometers seem to be absent(!)
+% remember "mm"!
+UNITS: /^([npmk]|nano|pico|milli|micro|kilo)?(g|grams?)\//   % grams/anything
+UNITS: /^([fnmp]|femto|nano|micro|pico|mu)?mol(es)?\//       % mol/anything
+UNITS: /^[a-zA-Z\/.]+\/((m|micro)?[lLg]|kg|mol|min|day|h)$/  % common endings
+% common endings, except in the style "mg.kg-1" instead of "mg/kg".
+UNITS: /^[a-zA-Z\/.1-]+\.((m|micro)?[lLg]|kg|mol|min|day|h)(-1|\(-1\))$/
+% combinations of numbers and units, e.g. "50-kDa", "1-2h"
+% TODO: Clean up and check that these are up-to-date wrt the
+% dictionary-recognized units; this is quite a mess currently.
+% TODO: Extend the "number" part of the regex to allow anything
+% that the NUMBER regex matches.
+% One problem here is a failure to split up the expression ...
+% e.g. "2hr" becomes 2 - ND - hr with the ND link. But 2-hr is treated
+% as a single word ('I is a 2-hr wait')
+% NUMBER-AND-UNIT: /^[0-9.,-]+(msec|s|min|hour|h|hr|day|week|wk|month|year|yr|kDa|kilodalton|base|kilobase|base-pair|kD|kd|kDa|bp|nt|kb|mm|mg|cm|nm|g|Hz|ms|kg|ml|mL|km|microm|\%)$/
+% Comment out above, it screws up handling of unit suffixes, for
+% example: "Zangbert stock fell 30% to $2.50 yesterday."
+% fold-words. Matches NUMBER-fold, where NUMBER can be either numeric
+% or a spelled-out number, and the hyphen is optional. Note that for
+% spelled-out numbers, anything is allowed between the "initial" number
+% and "fold" to catch e.g. "two-to-three fold" ("fourteen" etc. are absent
+% as the prefix "four" is sufficient to match).
+FOLD-WORDS: /^[0-9.,:-]*[0-9]([0-9.,:-]|\([0-9.,:-]*[0-9][0-9.,:-]*\)|\+\/-)*-?fold$/
+FOLD-WORDS: /^(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fifteen|twenty|thirty|fifty|hundred|thousand|million).*fold$/
+% Plural proper nouns.
+% Make sure that apostrophe-s is split out correctly.
+PL-CAPITALIZED-WORDS:  /^[[:upper:]].*[^iuoys'’]s$/
+% Other proper nouns.
+% We demand that these end with an alphanumeric, i.e. explicitly
+% reject punctuation. We don't want this regex to "swallow" any trailing
+% commas, colons, or periods/question-marks at the end of sentences.
+% In addition, this must not swallow words ending in 's 'll etc.
+% (... any affix, for that matter ...) and so no embedded apostrophe
+CAPITALIZED-WORDS:     /^[[:upper:]][^'’]*[^[:punct:]]$/
+% SUFFIX GUESSING
+% For all suffix-guessing patterns, we insist that the pattern start
+% with an alphanumeric. This is needed to guarentee that the
+% prefix-stripping code works correctly, as otherwise, the regex will
+% gobble the prefix. So for example: "We left (carrying the dog) and
+% Fred followed."  Since "(carrying" is not in the dict, we need to be
+% sure to not match the leading paren so that it will get tripped.
+%
+ING-WORDS:        /^\w.+ing$/
+% Plurals or verb-s. Make sure that apostrophe-s is split out correctly.
+% e.g. "The subject's name is John Doe."  should be
+%     +--Ds--+---YS--+--Ds-+
+%     |      |       |     |
+%    the subject.n 's.p name.n
+S-WORDS:          /^\w.+[^iuoys'’]s$/
+% Verbs ending -ed.
+ED-WORDS:         /^\w.+ed$/
+% Advebs ending -ly.
+LY-WORDS:         /^\w.+ly$/
+% Nouns ending in -ism, -asm (chiliasm .. ) Usualy mass nouns
+% Stubbed out for now; I'm not convinced this improves accuracy.
+% ISM-WORDS:        /^\w.+asm$/
+% ISM-WORDS:        /^\w.+ism$/
+% Corresponding count noun version of above (chiliast...)
+% AST-WORDS:        /^\w.+ast$/
+% AST-WORDS:        /^\w.+ist$/
+% Corresponding adjectival form of above
+ADJ-WORDS: /^\w.+astic$/
+ADJ-WORDS: /^\w.+istic$/
+% Nouns ending -ation  stubbed out in BioLG, stub out here ...
+%ATION-WORDS:      /^\w.+ation$/
+% Extension by LIPN 11/10/2005
+% nouns -- typically seen in (bio-)chemistry texts
+% synthetase, kinase
+% 5-(hydroxymethyl)-2’-deoxyuridine
+% hydroxyethyl, hydroxymethyl
+% septation, reguion
+% isomaltotetraose, isomaltotriose
+% glycosylphosphatidylinositol
+% iodide, oligodeoxynucleotide
+% chronicity, hypochromicity
+MC-NOUN-WORDS: /^\w.+ase$/
+MC-NOUN-WORDS: /^\w.+ine?$/
+MC-NOUN-WORDS: /^\w.+yl$/
+MC-NOUN-WORDS: /^\w.+ion$/
+MC-NOUN-WORDS: /^\w.+ose$/
+MC-NOUN-WORDS: /^\w.+ol$/
+MC-NOUN-WORDS: /^\w.+ide$/
+MC-NOUN-WORDS: /^\w.+ity$/
+% replicon, intron
+C-NOUN-WORDS: /^\w.+o[rn]$/
+% adjectives
+% exogenous, heterologous
+% intermolecular, intramolecular
+% glycolytic, ribonucleic, uronic
+% ribosomal, ribsosomal
+% nonpermissive, thermosensitive
+% inducible, metastable
+ADJ-WORDS: /^\w.+ous$/
+ADJ-WORDS: /^\w.+ar$/
+ADJ-WORDS: /^\w.+ic$/
+ADJ-WORDS: /^\w.+al$/
+ADJ-WORDS: /^\w.+ive$/
+ADJ-WORDS: /^\w.+ble$/
+% latin (postposed) adjectives
+% influenzae, tarentolae
+% pentosaceus, luteus, carnosus
+LATIN-ADJ-WORDS: /^\w.+ae$/
+LATIN-ADJ-WORDS: /^\w.+us$/ % must appear after -ous in this file
+% latin (postposed) adjectives  or latin plural noun
+% brevis, israelensis
+% japonicum, tabacum, xylinum
+LATIN-ADJ-P-NOUN-WORDS: /^\w.+is?$/
+LATIN-ADJ-S-NOUN-WORDS: /^\w.+um$/
+% Hyphenated words. In the original LG morpho-guessing system that
+% predated the regex-based system, hyphenated words were detected
+% before ING-WORDS, S-WORDS etc., causing e.g. "cross-linked" to be
+% treated as a HYPHENATED-WORD (a generic adjective/noun), and
+% never a verb. To return to this ordering, move this regex just
+% after the CAPITALIZED-WORDS regex.
+HYPHENATED-WORDS: /^[[:alpha:][:digit:],.][[:alpha:][:digit:],.-]*-[[:alpha:][:digit:],.-]*[[:alpha:][:digit:],.]$/
+% proteins often end "ase", so we'll assume those things are names.
+% removed, too many false positives.
+% NAME: /ase$/
+% Sequence of punctuation marks. If some mark appears in the affix table
+% such as a period, comma, dash or underscore, and there's a sequence of
+% these, then treat it as a "fill-in-the-blank" placeholder.
+% This matters only for punc. appearing in the affix table, since the
+% tokenizer explicitly mangles based on these punctution marks.
+%
+% Look for at least four in a row.
+UNKNOWN-WORD: /^[.,-]{4}[.,-]*$/