grammar_cop 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,159 @@
|
|
1
|
+
#include "word-tag.hpp"
|
2
|
+
#include "fast-sprintf.hpp"
|
3
|
+
|
4
|
+
void WordTag::insert_connectors(Exp* exp, int& dfs_position,
|
5
|
+
bool& leading_right, bool& leading_left,
|
6
|
+
std::vector<int>& eps_right,
|
7
|
+
std::vector<int>& eps_left,
|
8
|
+
char* var, bool root, int parrent_cost) {
|
9
|
+
int cost = parrent_cost + exp->cost;
|
10
|
+
if (exp->type == CONNECTOR_type) {
|
11
|
+
dfs_position++;
|
12
|
+
|
13
|
+
const char* name = exp->u.string;
|
14
|
+
|
15
|
+
Connector* connector = connector_new();
|
16
|
+
connector->multi = exp->multi;
|
17
|
+
connector->string = name;
|
18
|
+
set_connector_length_limit(connector);
|
19
|
+
|
20
|
+
|
21
|
+
switch(exp->dir) {
|
22
|
+
case '+':
|
23
|
+
_position.push_back(_right_connectors.size());
|
24
|
+
_dir.push_back('+');
|
25
|
+
_right_connectors.push_back(PositionConnector(connector, '+', _word, dfs_position, exp->cost, cost,
|
26
|
+
leading_right, false,
|
27
|
+
eps_right, eps_left));
|
28
|
+
leading_right = false;
|
29
|
+
break;
|
30
|
+
case '-':
|
31
|
+
_position.push_back(_left_connectors.size());
|
32
|
+
_dir.push_back('-');
|
33
|
+
_left_connectors.push_back(PositionConnector(connector, '-', _word, dfs_position, exp->cost, cost,
|
34
|
+
false, leading_left,
|
35
|
+
eps_right, eps_left));
|
36
|
+
leading_left = false;
|
37
|
+
break;
|
38
|
+
default:
|
39
|
+
throw std::string("Unknown connector direction: ") + exp->dir;
|
40
|
+
}
|
41
|
+
} else if (exp->type == AND_type) {
|
42
|
+
if (exp->u.l == NULL) {
|
43
|
+
/* zeroary and */
|
44
|
+
} else
|
45
|
+
if (exp->u.l != NULL && exp->u.l->next == NULL) {
|
46
|
+
/* unary and - skip */
|
47
|
+
insert_connectors(exp->u.l->e, dfs_position, leading_right, leading_left, eps_right, eps_left, var, root, cost);
|
48
|
+
} else {
|
49
|
+
int i;
|
50
|
+
E_list* l;
|
51
|
+
|
52
|
+
char new_var[MAX_VARIABLE_NAME];
|
53
|
+
char* last_new_var = new_var;
|
54
|
+
char* last_var = var;
|
55
|
+
while(*last_new_var = *last_var) {
|
56
|
+
last_new_var++;
|
57
|
+
last_var++;
|
58
|
+
}
|
59
|
+
|
60
|
+
for (i = 0, l = exp->u.l; l != NULL; l = l->next, i++) {
|
61
|
+
char* s = last_new_var;
|
62
|
+
*s++ = 'c';
|
63
|
+
fast_sprintf(s, i);
|
64
|
+
|
65
|
+
insert_connectors(l->e, dfs_position, leading_right, leading_left, eps_right, eps_left, new_var, false, cost);
|
66
|
+
if (leading_right && var != NULL) {
|
67
|
+
eps_right.push_back(_variables->epsilon(new_var, '+'));
|
68
|
+
}
|
69
|
+
|
70
|
+
|
71
|
+
if (leading_left && var != NULL) {
|
72
|
+
eps_left.push_back(_variables->epsilon(new_var, '-'));
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
} else if (exp->type == OR_type) {
|
77
|
+
if (exp->u.l != NULL && exp->u.l->next == NULL) {
|
78
|
+
/* unary or - skip */
|
79
|
+
insert_connectors(exp->u.l->e, dfs_position, leading_right, leading_left, eps_right, eps_left, var, root, cost);
|
80
|
+
} else {
|
81
|
+
int i;
|
82
|
+
E_list* l;
|
83
|
+
bool ll_true = false;
|
84
|
+
bool lr_true = false;
|
85
|
+
|
86
|
+
char new_var[MAX_VARIABLE_NAME];
|
87
|
+
char* last_new_var = new_var;
|
88
|
+
char* last_var = var;
|
89
|
+
while(*last_new_var = *last_var) {
|
90
|
+
last_new_var++;
|
91
|
+
last_var++;
|
92
|
+
}
|
93
|
+
|
94
|
+
for (i = 0, l = exp->u.l; l != NULL; l = l->next, i++) {
|
95
|
+
bool lr = leading_right, ll = leading_left;
|
96
|
+
std::vector<int> er = eps_right, el = eps_left;
|
97
|
+
|
98
|
+
char* s = last_new_var;
|
99
|
+
*s++ = 'd';
|
100
|
+
fast_sprintf(s, i);
|
101
|
+
|
102
|
+
insert_connectors(l->e, dfs_position, lr, ll, er, el, new_var, false, cost);
|
103
|
+
if (lr)
|
104
|
+
lr_true = true;
|
105
|
+
if (ll)
|
106
|
+
ll_true = true;
|
107
|
+
}
|
108
|
+
leading_right = lr_true;
|
109
|
+
leading_left = ll_true;
|
110
|
+
}
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
void WordTag::find_matches(int w, const char* C, char dir, std::vector<PositionConnector*>& matches) {
|
117
|
+
// cout << "Look connection on: ." << _word << ". ." << w << ". " << C << dir << endl;
|
118
|
+
Connector search_cntr;
|
119
|
+
init_connector(&search_cntr);
|
120
|
+
search_cntr.label = NORMAL_LABEL;
|
121
|
+
search_cntr.priority = THIN_priority;
|
122
|
+
search_cntr.string = C;
|
123
|
+
set_connector_length_limit(&search_cntr);
|
124
|
+
|
125
|
+
std::vector<PositionConnector>* connectors;
|
126
|
+
switch(dir) {
|
127
|
+
case '+':
|
128
|
+
connectors = &_left_connectors;
|
129
|
+
break;
|
130
|
+
case '-':
|
131
|
+
connectors = &_right_connectors;
|
132
|
+
break;
|
133
|
+
default:
|
134
|
+
throw std::string("Unknown connector direction: ") + dir;
|
135
|
+
}
|
136
|
+
|
137
|
+
bool conjunction = sentence_contains_conjunction(_sent);
|
138
|
+
std::vector<PositionConnector>::iterator i;
|
139
|
+
for (i = connectors->begin(); i != connectors->end(); i++) {
|
140
|
+
if (WordTag::match(w, search_cntr, dir, (*i).word, *((*i).connector), conjunction)) {
|
141
|
+
matches.push_back(&(*i));
|
142
|
+
}
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
void WordTag::add_matches_with_word(WordTag& tag) {
|
147
|
+
std::vector<PositionConnector>::iterator i;
|
148
|
+
for (i = _right_connectors.begin(); i != _right_connectors.end(); i++) {
|
149
|
+
std::vector<PositionConnector*> connector_matches;
|
150
|
+
tag.find_matches(_word, (*i).connector->string, '+', connector_matches);
|
151
|
+
std::vector<PositionConnector*>::iterator j;
|
152
|
+
for (j = connector_matches.begin(); j != connector_matches.end(); j++) {
|
153
|
+
i->matches.push_back(*j);
|
154
|
+
set_match_possible((*j)->word, (*j)->position);
|
155
|
+
(*j)->matches.push_back(&(*i));
|
156
|
+
tag.set_match_possible(_word, (*i).position);
|
157
|
+
}
|
158
|
+
}
|
159
|
+
}
|
@@ -0,0 +1,162 @@
|
|
1
|
+
#ifndef __WORD_TAG_HPP__
|
2
|
+
#define __WORD_TAG_HPP__
|
3
|
+
|
4
|
+
#include <vector>
|
5
|
+
#include <map>
|
6
|
+
#include <set>
|
7
|
+
|
8
|
+
extern "C" {
|
9
|
+
#include <link-grammar/api.h>
|
10
|
+
}
|
11
|
+
|
12
|
+
#include "variables.hpp"
|
13
|
+
|
14
|
+
|
15
|
+
struct PositionConnector {
|
16
|
+
PositionConnector(Connector* c, char d, int w, int p, int cst, int pcst,
|
17
|
+
bool lr, bool ll, const std::vector<int>& er, const std::vector<int>& el)
|
18
|
+
: connector(c), dir(d), word(w), position(p), cost(cst), parrent_cost(pcst),
|
19
|
+
leading_right(lr), leading_left(ll),
|
20
|
+
eps_right(er), eps_left(el) {
|
21
|
+
/*
|
22
|
+
cout << c->string << " : ." << w << ". : ." << p << ". ";
|
23
|
+
if (leading_right) {
|
24
|
+
cout << "lr: ";
|
25
|
+
copy(er.begin(), er.end(), ostream_iterator<int>(cout, " "));
|
26
|
+
}
|
27
|
+
if (leading_left) {
|
28
|
+
cout << "ll: ";
|
29
|
+
copy(el.begin(), el.end(), ostream_iterator<int>(cout, " "));
|
30
|
+
}
|
31
|
+
cout << endl;
|
32
|
+
*/
|
33
|
+
}
|
34
|
+
|
35
|
+
// Connector itself
|
36
|
+
Connector* connector;
|
37
|
+
// Direction
|
38
|
+
char dir;
|
39
|
+
// word in a sentence that this connector belongs to
|
40
|
+
int word;
|
41
|
+
// position in the word tag
|
42
|
+
int position;
|
43
|
+
// cost of the connector
|
44
|
+
int cost;
|
45
|
+
// parrent cost
|
46
|
+
int parrent_cost;
|
47
|
+
|
48
|
+
bool leading_right;
|
49
|
+
std::vector<int> eps_right;
|
50
|
+
bool leading_left;
|
51
|
+
std::vector<int> eps_left;
|
52
|
+
|
53
|
+
// Matches with other words
|
54
|
+
std::vector<PositionConnector*> matches;
|
55
|
+
|
56
|
+
};
|
57
|
+
|
58
|
+
|
59
|
+
// TODO: Hash connectors for faster matching
|
60
|
+
|
61
|
+
class WordTag {
|
62
|
+
private:
|
63
|
+
std::vector<PositionConnector> _left_connectors;
|
64
|
+
std::vector<PositionConnector> _right_connectors;
|
65
|
+
|
66
|
+
std::vector<char> _dir;
|
67
|
+
std::vector<int> _position;
|
68
|
+
|
69
|
+
int _word;
|
70
|
+
Variables* _variables;
|
71
|
+
|
72
|
+
Sentence _sent;
|
73
|
+
Parse_Options _opts;
|
74
|
+
|
75
|
+
// Could this word tag match a connector (wi, pi)?
|
76
|
+
// For each word wi I keep a set of positions pi that can be matched
|
77
|
+
std::vector< std::set<int> > _match_possible;
|
78
|
+
void set_match_possible(int wj, int pj) {
|
79
|
+
_match_possible[wj].insert(pj);
|
80
|
+
}
|
81
|
+
|
82
|
+
public:
|
83
|
+
WordTag(int word, Variables* variables, Sentence sent, Parse_Options opts)
|
84
|
+
: _word(word), _variables(variables), _sent(sent), _opts(opts) {
|
85
|
+
_match_possible.resize(_sent->length);
|
86
|
+
}
|
87
|
+
|
88
|
+
const std::vector<PositionConnector>& get_left_connectors() {
|
89
|
+
return _left_connectors;
|
90
|
+
}
|
91
|
+
|
92
|
+
const std::vector<PositionConnector>& get_right_connectors() {
|
93
|
+
return _right_connectors;
|
94
|
+
}
|
95
|
+
|
96
|
+
PositionConnector* get(int dfs_position) {
|
97
|
+
switch (_dir[dfs_position - 1]) {
|
98
|
+
case '+':
|
99
|
+
return &_right_connectors[_position[dfs_position - 1]];
|
100
|
+
case '-':
|
101
|
+
return &_left_connectors[_position[dfs_position - 1]];
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
void set_connector_length_limit(Connector* c) {
|
106
|
+
int short_len = _opts->short_length;
|
107
|
+
if (short_len > UNLIMITED_LEN)
|
108
|
+
short_len = UNLIMITED_LEN;
|
109
|
+
|
110
|
+
Connector_set *conset = _sent->dict->unlimited_connector_set;
|
111
|
+
if (parse_options_get_all_short_connectors(_opts)) {
|
112
|
+
c->length_limit = short_len;
|
113
|
+
}
|
114
|
+
else if (conset == NULL || match_in_connector_set(_sent, conset, c, '+')) {
|
115
|
+
c->length_limit = UNLIMITED_LEN;
|
116
|
+
} else {
|
117
|
+
c->length_limit = short_len;
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
int match(int w1, Connector& cntr1, char dir, int w2, Connector& cntr2, bool conjunction) {
|
122
|
+
if (conjunction) {
|
123
|
+
switch (dir) {
|
124
|
+
case '+':
|
125
|
+
return ::prune_match(0, &cntr1, &cntr2);
|
126
|
+
case '-':
|
127
|
+
return ::prune_match(0, &cntr2, &cntr1);
|
128
|
+
default:
|
129
|
+
throw std::string("Unknown connector direction: ") + dir;
|
130
|
+
}
|
131
|
+
} else {
|
132
|
+
return ::do_match(_sent, &cntr1, &cntr2, w1, w2);
|
133
|
+
}
|
134
|
+
}
|
135
|
+
|
136
|
+
void insert_connectors(Exp* exp, int& dfs_position,
|
137
|
+
bool& leading_right, bool& leading_left,
|
138
|
+
std::vector<int>& eps_right,
|
139
|
+
std::vector<int>& eps_left,
|
140
|
+
char* var, bool root, int parrent_cost);
|
141
|
+
|
142
|
+
// Caches information about the found matches to the _matches vector, and also
|
143
|
+
// updates the _matches vector of all connectors in the given tag.
|
144
|
+
// In order to have all possible matches correctly cached, the function assumes that it is
|
145
|
+
// iteratively called for all words in the sentence, where the tag is on the right side of
|
146
|
+
// this word
|
147
|
+
void add_matches_with_word(WordTag& tag);
|
148
|
+
|
149
|
+
// Find matches in this word tag with the connector (name, dir).
|
150
|
+
void find_matches(int w, const char* C, char dir, std::vector<PositionConnector*>& matches);
|
151
|
+
|
152
|
+
// A simpler function: Can any connector in this word match a connector wi, pi?
|
153
|
+
// It is assumed that
|
154
|
+
bool match_possible(int wi, int pi) {
|
155
|
+
return _match_possible[wi].find(pi) != _match_possible[wi].end();
|
156
|
+
}
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
};
|
161
|
+
|
162
|
+
#endif
|
@@ -0,0 +1,148 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2009 Vikas N. Kumar */
|
3
|
+
/* Copyright (c) 2009 Linas Vepstas */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
|
14
|
+
#ifndef SPELLCHECK_ASPELL_C
|
15
|
+
#define
|
16
|
+
|
17
|
+
#ifdef HAVE_ASPELL
|
18
|
+
|
19
|
+
#include <stdio.h>
|
20
|
+
#include <stdlib.h>
|
21
|
+
#include <string.h>
|
22
|
+
|
23
|
+
#include <aspell.h>
|
24
|
+
#include "link-includes.h"
|
25
|
+
#include "spellcheck.h"
|
26
|
+
#include "utilities.h" /* For Win32 compatibility */
|
27
|
+
|
28
|
+
#define ASPELL_LANG_KEY "lang"
|
29
|
+
static const char *spellcheck_lang_mapping[] = {
|
30
|
+
"en" /* link-grammar language */, "en_US" /* Aspell language key */
|
31
|
+
};
|
32
|
+
|
33
|
+
struct linkgrammar_aspell {
|
34
|
+
AspellConfig *config;
|
35
|
+
AspellSpeller *speller;
|
36
|
+
};
|
37
|
+
|
38
|
+
void * spellcheck_create(const char * lang)
|
39
|
+
{
|
40
|
+
struct linkgrammar_aspell *aspell = NULL;
|
41
|
+
size_t i = 0;
|
42
|
+
AspellCanHaveError *spell_err = NULL;
|
43
|
+
|
44
|
+
for (i = 0; i < sizeof(spellcheck_lang_mapping)/sizeof(char *); i += 2)
|
45
|
+
{
|
46
|
+
if (0 != strcmp(lang, spellcheck_lang_mapping[i])) continue;
|
47
|
+
aspell = (struct linkgrammar_aspell *)malloc(sizeof(struct linkgrammar_aspell));
|
48
|
+
if (!aspell) {
|
49
|
+
prt_error("Error: out of memory. Aspell not used.\n");
|
50
|
+
aspell = NULL;
|
51
|
+
break;
|
52
|
+
}
|
53
|
+
aspell->config = NULL;
|
54
|
+
aspell->speller = NULL;
|
55
|
+
aspell->config = new_aspell_config();
|
56
|
+
if (aspell_config_replace(aspell->config, ASPELL_LANG_KEY,
|
57
|
+
spellcheck_lang_mapping[i]) == 0) {
|
58
|
+
prt_error("Error: failed to set language in aspell: %s\n", lang);
|
59
|
+
delete_aspell_config(aspell->config);
|
60
|
+
free(aspell);
|
61
|
+
aspell = NULL;
|
62
|
+
break;
|
63
|
+
}
|
64
|
+
spell_err = new_aspell_speller(aspell->config);
|
65
|
+
if (aspell_error_number(spell_err) != 0) {
|
66
|
+
prt_error("Error: Aspell: %s\n", aspell_error_message(spell_err));
|
67
|
+
delete_aspell_can_have_error(spell_err);
|
68
|
+
delete_aspell_config(aspell->config);
|
69
|
+
free(aspell);
|
70
|
+
aspell = NULL;
|
71
|
+
break;
|
72
|
+
}
|
73
|
+
aspell->speller = to_aspell_speller(spell_err);
|
74
|
+
break;
|
75
|
+
}
|
76
|
+
return aspell;
|
77
|
+
}
|
78
|
+
|
79
|
+
void spellcheck_destroy(void * chk)
|
80
|
+
{
|
81
|
+
struct linkgrammar_aspell *aspell = (struct linkgrammar_aspell *)chk;
|
82
|
+
if (aspell) {
|
83
|
+
delete_aspell_speller(aspell->speller);
|
84
|
+
delete_aspell_config(aspell->config);
|
85
|
+
free(aspell);
|
86
|
+
aspell = NULL;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
int spellcheck_test(void * chk, const char * word)
|
91
|
+
{
|
92
|
+
int val = 0;
|
93
|
+
struct linkgrammar_aspell *aspell = (struct linkgrammar_aspell *)chk;
|
94
|
+
if (aspell && aspell->speller) {
|
95
|
+
/* this can return -1 on failure */
|
96
|
+
val = aspell_speller_check(aspell->speller, word, -1);
|
97
|
+
}
|
98
|
+
return (val == 1) ? 1 : 0;
|
99
|
+
}
|
100
|
+
|
101
|
+
int spellcheck_suggest(void * chk, char ***sug, const char * word)
|
102
|
+
{
|
103
|
+
struct linkgrammar_aspell *aspell = (struct linkgrammar_aspell *)chk;
|
104
|
+
if (!sug) {
|
105
|
+
prt_error("Error: Aspell. Corrupt pointer.\n");
|
106
|
+
return 0;
|
107
|
+
}
|
108
|
+
if (aspell && aspell->speller) {
|
109
|
+
const AspellWordList *list = NULL;
|
110
|
+
AspellStringEnumeration *elem = NULL;
|
111
|
+
const char *aword = NULL;
|
112
|
+
unsigned int size, i;
|
113
|
+
char **array = NULL;
|
114
|
+
|
115
|
+
list = aspell_speller_suggest(aspell->speller, word, -1);
|
116
|
+
elem = aspell_word_list_elements(list);
|
117
|
+
size = aspell_word_list_size(list);
|
118
|
+
/* allocate an array of char* for returning back to link-parser
|
119
|
+
*/
|
120
|
+
array = (char **)malloc(sizeof(char *) * size);
|
121
|
+
if (!array) {
|
122
|
+
prt_error("Error: Aspell. Out of memory.\n");
|
123
|
+
delete_aspell_string_enumeration(elem);
|
124
|
+
return 0;
|
125
|
+
}
|
126
|
+
i = 0;
|
127
|
+
while ((aword = aspell_string_enumeration_next(elem)) != NULL) {
|
128
|
+
array[i++] = strdup(aword);
|
129
|
+
}
|
130
|
+
delete_aspell_string_enumeration(elem);
|
131
|
+
*sug = array;
|
132
|
+
return size;
|
133
|
+
}
|
134
|
+
return 0;
|
135
|
+
}
|
136
|
+
|
137
|
+
void spellcheck_free_suggest(char **sug, int size)
|
138
|
+
{
|
139
|
+
int i = 0;
|
140
|
+
for (i = 0; i < size; ++i) {
|
141
|
+
free(sug[i]);
|
142
|
+
sug[i] = NULL;
|
143
|
+
}
|
144
|
+
free(sug);
|
145
|
+
}
|
146
|
+
|
147
|
+
#endif /* #ifdef HAVE_ASPELL */
|
148
|
+
#endif
|