grammar_cop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.DS_Store +0 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/data/.DS_Store +0 -0
- data/data/Makefile +511 -0
- data/data/Makefile.am +4 -0
- data/data/Makefile.in +511 -0
- data/data/de/.DS_Store +0 -0
- data/data/de/4.0.affix +7 -0
- data/data/de/4.0.dict +474 -0
- data/data/de/Makefile +387 -0
- data/data/de/Makefile.am +9 -0
- data/data/de/Makefile.in +387 -0
- data/data/en/.DS_Store +0 -0
- data/data/en/4.0.affix +26 -0
- data/data/en/4.0.batch +1002 -0
- data/data/en/4.0.biolg.batch +411 -0
- data/data/en/4.0.constituent-knowledge +127 -0
- data/data/en/4.0.dict +8759 -0
- data/data/en/4.0.dict.m4 +6928 -0
- data/data/en/4.0.enwiki.batch +14 -0
- data/data/en/4.0.fixes.batch +2776 -0
- data/data/en/4.0.knowledge +306 -0
- data/data/en/4.0.regex +225 -0
- data/data/en/4.0.voa.batch +114 -0
- data/data/en/Makefile +554 -0
- data/data/en/Makefile.am +19 -0
- data/data/en/Makefile.in +554 -0
- data/data/en/README +173 -0
- data/data/en/tiny.dict +157 -0
- data/data/en/words/.DS_Store +0 -0
- data/data/en/words/Makefile +456 -0
- data/data/en/words/Makefile.am +78 -0
- data/data/en/words/Makefile.in +456 -0
- data/data/en/words/currency +205 -0
- data/data/en/words/currency.p +28 -0
- data/data/en/words/entities.given-bisex.sing +39 -0
- data/data/en/words/entities.given-female.sing +4141 -0
- data/data/en/words/entities.given-male.sing +1633 -0
- data/data/en/words/entities.locations.sing +68 -0
- data/data/en/words/entities.national.sing +253 -0
- data/data/en/words/entities.organizations.sing +7 -0
- data/data/en/words/entities.us-states.sing +11 -0
- data/data/en/words/units.1 +45 -0
- data/data/en/words/units.1.dot +4 -0
- data/data/en/words/units.3 +2 -0
- data/data/en/words/units.4 +5 -0
- data/data/en/words/units.4.dot +1 -0
- data/data/en/words/words-medical.adv.1 +1191 -0
- data/data/en/words/words-medical.prep.1 +67 -0
- data/data/en/words/words-medical.v.4.1 +2835 -0
- data/data/en/words/words-medical.v.4.2 +2848 -0
- data/data/en/words/words-medical.v.4.3 +3011 -0
- data/data/en/words/words-medical.v.4.4 +3036 -0
- data/data/en/words/words-medical.v.4.5 +3050 -0
- data/data/en/words/words.adj.1 +6794 -0
- data/data/en/words/words.adj.2 +638 -0
- data/data/en/words/words.adj.3 +667 -0
- data/data/en/words/words.adv.1 +1573 -0
- data/data/en/words/words.adv.2 +67 -0
- data/data/en/words/words.adv.3 +157 -0
- data/data/en/words/words.adv.4 +80 -0
- data/data/en/words/words.n.1 +11464 -0
- data/data/en/words/words.n.1.wiki +264 -0
- data/data/en/words/words.n.2.s +2017 -0
- data/data/en/words/words.n.2.s.biolg +1 -0
- data/data/en/words/words.n.2.s.wiki +298 -0
- data/data/en/words/words.n.2.x +65 -0
- data/data/en/words/words.n.2.x.wiki +10 -0
- data/data/en/words/words.n.3 +5717 -0
- data/data/en/words/words.n.t +23 -0
- data/data/en/words/words.v.1.1 +1038 -0
- data/data/en/words/words.v.1.2 +1043 -0
- data/data/en/words/words.v.1.3 +1052 -0
- data/data/en/words/words.v.1.4 +1023 -0
- data/data/en/words/words.v.1.p +17 -0
- data/data/en/words/words.v.10.1 +14 -0
- data/data/en/words/words.v.10.2 +15 -0
- data/data/en/words/words.v.10.3 +88 -0
- data/data/en/words/words.v.10.4 +17 -0
- data/data/en/words/words.v.2.1 +1253 -0
- data/data/en/words/words.v.2.2 +1304 -0
- data/data/en/words/words.v.2.3 +1280 -0
- data/data/en/words/words.v.2.4 +1285 -0
- data/data/en/words/words.v.2.5 +1287 -0
- data/data/en/words/words.v.4.1 +2472 -0
- data/data/en/words/words.v.4.2 +2487 -0
- data/data/en/words/words.v.4.3 +2441 -0
- data/data/en/words/words.v.4.4 +2478 -0
- data/data/en/words/words.v.4.5 +2483 -0
- data/data/en/words/words.v.5.1 +98 -0
- data/data/en/words/words.v.5.2 +98 -0
- data/data/en/words/words.v.5.3 +103 -0
- data/data/en/words/words.v.5.4 +102 -0
- data/data/en/words/words.v.6.1 +388 -0
- data/data/en/words/words.v.6.2 +401 -0
- data/data/en/words/words.v.6.3 +397 -0
- data/data/en/words/words.v.6.4 +405 -0
- data/data/en/words/words.v.6.5 +401 -0
- data/data/en/words/words.v.8.1 +117 -0
- data/data/en/words/words.v.8.2 +118 -0
- data/data/en/words/words.v.8.3 +118 -0
- data/data/en/words/words.v.8.4 +119 -0
- data/data/en/words/words.v.8.5 +119 -0
- data/data/en/words/words.y +104 -0
- data/data/lt/.DS_Store +0 -0
- data/data/lt/4.0.affix +6 -0
- data/data/lt/4.0.constituent-knowledge +24 -0
- data/data/lt/4.0.dict +135 -0
- data/data/lt/4.0.knowledge +38 -0
- data/data/lt/Makefile +389 -0
- data/data/lt/Makefile.am +11 -0
- data/data/lt/Makefile.in +389 -0
- data/ext/.DS_Store +0 -0
- data/ext/link_grammar/.DS_Store +0 -0
- data/ext/link_grammar/extconf.rb +2 -0
- data/ext/link_grammar/link-grammar/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
- data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
- data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
- data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
- data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
- data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
- data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
- data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
- data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
- data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
- data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
- data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
- data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
- data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
- data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
- data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
- data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
- data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
- data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
- data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
- data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
- data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
- data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
- data/ext/link_grammar/link-grammar/Makefile +900 -0
- data/ext/link_grammar/link-grammar/Makefile.am +202 -0
- data/ext/link_grammar/link-grammar/Makefile.in +900 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
- data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
- data/ext/link_grammar/link-grammar/and.c +1603 -0
- data/ext/link_grammar/link-grammar/and.h +27 -0
- data/ext/link_grammar/link-grammar/api-structures.h +362 -0
- data/ext/link_grammar/link-grammar/api-types.h +72 -0
- data/ext/link_grammar/link-grammar/api.c +1887 -0
- data/ext/link_grammar/link-grammar/api.h +96 -0
- data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/autoit/README +10 -0
- data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
- data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
- data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
- data/ext/link_grammar/link-grammar/command-line.c +458 -0
- data/ext/link_grammar/link-grammar/command-line.h +15 -0
- data/ext/link_grammar/link-grammar/constituents.c +1836 -0
- data/ext/link_grammar/link-grammar/constituents.h +26 -0
- data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
- data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/corpus/README +17 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
- data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
- data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
- data/ext/link_grammar/link-grammar/count.c +828 -0
- data/ext/link_grammar/link-grammar/count.h +25 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
- data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
- data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
- data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
- data/ext/link_grammar/link-grammar/error.c +92 -0
- data/ext/link_grammar/link-grammar/error.h +35 -0
- data/ext/link_grammar/link-grammar/expand.c +67 -0
- data/ext/link_grammar/link-grammar/expand.h +13 -0
- data/ext/link_grammar/link-grammar/externs.h +22 -0
- data/ext/link_grammar/link-grammar/extract-links.c +625 -0
- data/ext/link_grammar/link-grammar/extract-links.h +16 -0
- data/ext/link_grammar/link-grammar/fast-match.c +309 -0
- data/ext/link_grammar/link-grammar/fast-match.h +17 -0
- data/ext/link_grammar/link-grammar/idiom.c +373 -0
- data/ext/link_grammar/link-grammar/idiom.h +15 -0
- data/ext/link_grammar/link-grammar/jni-client.c +779 -0
- data/ext/link_grammar/link-grammar/jni-client.h +236 -0
- data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
- data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
- data/ext/link_grammar/link-grammar/link-features.h +37 -0
- data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
- data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
- data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
- data/ext/link_grammar/link-grammar/link-includes.h +465 -0
- data/ext/link_grammar/link-grammar/link-parser.c +849 -0
- data/ext/link_grammar/link-grammar/massage.c +329 -0
- data/ext/link_grammar/link-grammar/massage.h +13 -0
- data/ext/link_grammar/link-grammar/post-process.c +1113 -0
- data/ext/link_grammar/link-grammar/post-process.h +45 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
- data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
- data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
- data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
- data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
- data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
- data/ext/link_grammar/link-grammar/prefix.c +482 -0
- data/ext/link_grammar/link-grammar/prefix.h +139 -0
- data/ext/link_grammar/link-grammar/preparation.c +412 -0
- data/ext/link_grammar/link-grammar/preparation.h +20 -0
- data/ext/link_grammar/link-grammar/print-util.c +87 -0
- data/ext/link_grammar/link-grammar/print-util.h +32 -0
- data/ext/link_grammar/link-grammar/print.c +1085 -0
- data/ext/link_grammar/link-grammar/print.h +16 -0
- data/ext/link_grammar/link-grammar/prune.c +1864 -0
- data/ext/link_grammar/link-grammar/prune.h +17 -0
- data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
- data/ext/link_grammar/link-grammar/read-dict.h +29 -0
- data/ext/link_grammar/link-grammar/read-regex.c +161 -0
- data/ext/link_grammar/link-grammar/read-regex.h +12 -0
- data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
- data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
- data/ext/link_grammar/link-grammar/resources.c +180 -0
- data/ext/link_grammar/link-grammar/resources.h +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
- data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
- data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
- data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
- data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
- data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
- data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
- data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
- data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
- data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
- data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
- data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
- data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
- data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
- data/ext/link_grammar/link-grammar/string-set.c +169 -0
- data/ext/link_grammar/link-grammar/string-set.h +16 -0
- data/ext/link_grammar/link-grammar/structures.h +498 -0
- data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
- data/ext/link_grammar/link-grammar/tokenize.h +15 -0
- data/ext/link_grammar/link-grammar/utilities.c +847 -0
- data/ext/link_grammar/link-grammar/utilities.h +281 -0
- data/ext/link_grammar/link-grammar/word-file.c +124 -0
- data/ext/link_grammar/link-grammar/word-file.h +15 -0
- data/ext/link_grammar/link-grammar/word-utils.c +526 -0
- data/ext/link_grammar/link-grammar/word-utils.h +152 -0
- data/ext/link_grammar/link_grammar.c +202 -0
- data/ext/link_grammar/link_grammar.h +99 -0
- data/grammar_cop.gemspec +24 -0
- data/lib/.DS_Store +0 -0
- data/lib/grammar_cop.rb +9 -0
- data/lib/grammar_cop/.DS_Store +0 -0
- data/lib/grammar_cop/dictionary.rb +19 -0
- data/lib/grammar_cop/linkage.rb +30 -0
- data/lib/grammar_cop/parse_options.rb +32 -0
- data/lib/grammar_cop/sentence.rb +36 -0
- data/lib/grammar_cop/version.rb +3 -0
- data/test/.DS_Store +0 -0
- data/test/grammar_cop_test.rb +27 -0
- metadata +407 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* All rights reserved */
|
5
|
+
/* */
|
6
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
7
|
+
/* license set forth in the LICENSE file included with this software, */
|
8
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
9
|
+
/* This license allows free redistribution and use in source and binary */
|
10
|
+
/* forms, with or without modification, subject to certain conditions. */
|
11
|
+
/* */
|
12
|
+
/*************************************************************************/
|
13
|
+
int separate_sentence(Sentence, Parse_Options);
|
14
|
+
int build_sentence_expressions(Sentence, Parse_Options);
|
15
|
+
int sentence_in_dictionary(Sentence);
|
@@ -0,0 +1,847 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* Copyright (c) 2004 */
|
3
|
+
/* Daniel Sleator, David Temperley, and John Lafferty */
|
4
|
+
/* Copyright 2008, 2009 Linas Vepstas */
|
5
|
+
/* All rights reserved */
|
6
|
+
/* */
|
7
|
+
/* Use of the link grammar parsing system is subject to the terms of the */
|
8
|
+
/* license set forth in the LICENSE file included with this software, */
|
9
|
+
/* and also available at http://www.link.cs.cmu.edu/link/license.html */
|
10
|
+
/* This license allows free redistribution and use in source and binary */
|
11
|
+
/* forms, with or without modification, subject to certain conditions. */
|
12
|
+
/* */
|
13
|
+
/*************************************************************************/
|
14
|
+
|
15
|
+
//#ifndef GRAMMAR_UTILITIES_H
|
16
|
+
//#define
|
17
|
+
|
18
|
+
#include "api.h"
|
19
|
+
#include <limits.h>
|
20
|
+
#include <locale.h>
|
21
|
+
#include <stdlib.h>
|
22
|
+
#include <string.h>
|
23
|
+
#include <ctype.h>
|
24
|
+
|
25
|
+
#ifdef USE_PTHREADS
|
26
|
+
#include <pthread.h>
|
27
|
+
#endif
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
#ifdef ENABLE_BINRELOC
|
32
|
+
#include "prefix.h"
|
33
|
+
#endif /* BINRELOC */
|
34
|
+
|
35
|
+
#ifdef _WIN32
|
36
|
+
# include <windows.h>
|
37
|
+
# define DIR_SEPARATOR '\\'
|
38
|
+
# define PATH_SEPARATOR ';'
|
39
|
+
#else
|
40
|
+
# define DIR_SEPARATOR '/'
|
41
|
+
# define PATH_SEPARATOR ':'
|
42
|
+
#endif
|
43
|
+
|
44
|
+
#define IS_DIR_SEPARATOR(ch) (DIR_SEPARATOR == (ch))
|
45
|
+
//#ifdef _MSC_VER
|
46
|
+
//#define DICTIONARY_DIR "."
|
47
|
+
//#endif
|
48
|
+
#define DICTIONARY_DIR "../data/"
|
49
|
+
#define DEFAULTPATH DICTIONARY_DIR
|
50
|
+
|
51
|
+
/* This file contains certain general utilities. */
|
52
|
+
int verbosity;
|
53
|
+
|
54
|
+
/* ============================================================= */
|
55
|
+
/* String utilities */
|
56
|
+
|
57
|
+
char *safe_strdup(const char *u)
|
58
|
+
{
|
59
|
+
if(u)
|
60
|
+
return strdup(u);
|
61
|
+
return NULL;
|
62
|
+
}
|
63
|
+
|
64
|
+
/**
|
65
|
+
* Copies as much of v into u as it can assuming u is of size usize
|
66
|
+
* guaranteed to terminate u with a '\0'.
|
67
|
+
*/
|
68
|
+
void safe_strcpy(char *u, const char * v, size_t usize)
|
69
|
+
{
|
70
|
+
strncpy(u, v, usize-1);
|
71
|
+
u[usize-1] = '\0';
|
72
|
+
}
|
73
|
+
|
74
|
+
/**
|
75
|
+
* A version of strlcpy, for those systems that don't have it.
|
76
|
+
*/
|
77
|
+
size_t lg_strlcpy(char * dest, const char *src, size_t size)
|
78
|
+
{
|
79
|
+
size_t i=0;
|
80
|
+
while ((i<size) && (src[i] != 0x0))
|
81
|
+
{
|
82
|
+
dest[i] = src[i];
|
83
|
+
i++;
|
84
|
+
}
|
85
|
+
if (i < size) { dest[i] = 0x0; size = i; }
|
86
|
+
else if (0 < size) { size --; dest[size] = 0x0;}
|
87
|
+
return size;
|
88
|
+
}
|
89
|
+
|
90
|
+
/**
|
91
|
+
* Catenates as much of v onto u as it can assuming u is of size usize
|
92
|
+
* guaranteed to terminate u with a '\0'. Assumes u and v are null
|
93
|
+
* terminated.
|
94
|
+
*/
|
95
|
+
void safe_strcat(char *u, const char *v, size_t usize)
|
96
|
+
{
|
97
|
+
strncat(u, v, usize-strlen(u)-1);
|
98
|
+
u[usize-1] = '\0';
|
99
|
+
}
|
100
|
+
|
101
|
+
/**
|
102
|
+
* prints s then prints the last |t|-|s| characters of t.
|
103
|
+
* if s is longer than t, it truncates s.
|
104
|
+
*/
|
105
|
+
void left_print_string(FILE * fp, const char * s, const char * t)
|
106
|
+
{
|
107
|
+
int i, j, k;
|
108
|
+
j = strlen(t);
|
109
|
+
k = strlen(s);
|
110
|
+
for (i=0; i<j; i++) {
|
111
|
+
if (i<k) {
|
112
|
+
fprintf(fp, "%c", s[i]);
|
113
|
+
} else {
|
114
|
+
fprintf(fp, "%c", t[i]);
|
115
|
+
}
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
#ifdef _WIN32 /* should be !defined(HAVE_STRTOK_R) */
|
120
|
+
|
121
|
+
char *
|
122
|
+
strtok_r (char *s, const char *delim, char **saveptr)
|
123
|
+
{
|
124
|
+
char *p;
|
125
|
+
|
126
|
+
if (s == NULL)
|
127
|
+
s = *saveptr;
|
128
|
+
|
129
|
+
if (s == NULL)
|
130
|
+
return NULL;
|
131
|
+
|
132
|
+
/* Skip past any delimiters. */
|
133
|
+
/* while (*s && strchr (delim, *s)) s++; */
|
134
|
+
s += strspn(s, delim);
|
135
|
+
|
136
|
+
if (*s == '\0')
|
137
|
+
{
|
138
|
+
*saveptr = NULL;
|
139
|
+
return NULL;
|
140
|
+
}
|
141
|
+
|
142
|
+
/* Look for end of the token. */
|
143
|
+
/* p = s; while (*p && !strchr (delim, *p)) p++; */
|
144
|
+
p = strpbrk(s, delim);
|
145
|
+
if (p == NULL)
|
146
|
+
{
|
147
|
+
*saveptr = NULL;
|
148
|
+
return s;
|
149
|
+
}
|
150
|
+
|
151
|
+
*p = 0x0;
|
152
|
+
*saveptr = p+1;
|
153
|
+
|
154
|
+
return s;
|
155
|
+
}
|
156
|
+
|
157
|
+
#endif /* _WIN32 should be !HAVE_STROTOK_R */
|
158
|
+
|
159
|
+
/* ============================================================= */
|
160
|
+
/* UTF8 utilities */
|
161
|
+
|
162
|
+
/**
|
163
|
+
* Downcase the first letter of the word.
|
164
|
+
*/
|
165
|
+
void downcase_utf8_str(char *to, const char * from, size_t usize)
|
166
|
+
{
|
167
|
+
wchar_t c;
|
168
|
+
int i, nbl, nbh;
|
169
|
+
char low[MB_LEN_MAX];
|
170
|
+
mbstate_t mbss;
|
171
|
+
|
172
|
+
nbh = mbtowc (&c, from, MB_CUR_MAX);
|
173
|
+
c = towlower(c);
|
174
|
+
memset(&mbss, 0, sizeof(mbss));
|
175
|
+
nbl = wctomb_check(low, c, &mbss);
|
176
|
+
|
177
|
+
/* Check for error on an in-place copy */
|
178
|
+
if ((nbh < nbl) && (to == from))
|
179
|
+
{
|
180
|
+
/* I'm to lazy to fix this */
|
181
|
+
prt_error("Error: can't downcase multi-byte string!\n");
|
182
|
+
return;
|
183
|
+
}
|
184
|
+
|
185
|
+
/* Downcase */
|
186
|
+
for (i=0; i<nbl; i++) { to[i] = low[i]; }
|
187
|
+
|
188
|
+
if ((nbh == nbl) && (to == from)) return;
|
189
|
+
|
190
|
+
from += nbh;
|
191
|
+
to += nbl;
|
192
|
+
safe_strcpy(to, from, usize-nbl);
|
193
|
+
}
|
194
|
+
|
195
|
+
/**
|
196
|
+
* Upcase the first letter of the word.
|
197
|
+
*/
|
198
|
+
void upcase_utf8_str(char *to, const char * from, size_t usize)
|
199
|
+
{
|
200
|
+
wchar_t c;
|
201
|
+
int i, nbl, nbh;
|
202
|
+
char low[MB_LEN_MAX];
|
203
|
+
mbstate_t mbss;
|
204
|
+
|
205
|
+
nbh = mbtowc (&c, from, MB_CUR_MAX);
|
206
|
+
c = towupper(c);
|
207
|
+
memset(&mbss, 0, sizeof(mbss));
|
208
|
+
nbl = wctomb_check(low, c, &mbss);
|
209
|
+
|
210
|
+
/* Check for error on an in-place copy */
|
211
|
+
if ((nbh < nbl) && (to == from))
|
212
|
+
{
|
213
|
+
/* I'm to lazy to fix this */
|
214
|
+
prt_error("Error: can't upcase multi-byte string!\n");
|
215
|
+
return;
|
216
|
+
}
|
217
|
+
|
218
|
+
/* Upcase */
|
219
|
+
for (i=0; i<nbl; i++) { to[i] = low[i]; }
|
220
|
+
|
221
|
+
if ((nbh == nbl) && (to == from)) return;
|
222
|
+
|
223
|
+
from += nbh;
|
224
|
+
to += nbl;
|
225
|
+
safe_strcpy(to, from, usize-nbl);
|
226
|
+
}
|
227
|
+
|
228
|
+
/* ============================================================= */
|
229
|
+
/* Memory alloc routines below. These routines attempt to keep
|
230
|
+
* track of how much space is getting used during a parse.
|
231
|
+
*
|
232
|
+
* This code is probably obsolescent, and should probably be dumped.
|
233
|
+
* No one (that I know of) looks at the space usage; its one of the
|
234
|
+
* few areas that needs pthreads -- it would be great to just get
|
235
|
+
* rid of it (and thus get rid of pthreads).
|
236
|
+
*/
|
237
|
+
|
238
|
+
#ifdef TRACK_SPACE_USAGE
|
239
|
+
typedef struct
|
240
|
+
{
|
241
|
+
size_t max_space_used;
|
242
|
+
size_t space_in_use;
|
243
|
+
size_t max_external_space_used;
|
244
|
+
size_t external_space_in_use;
|
245
|
+
} space_t;
|
246
|
+
|
247
|
+
#ifdef USE_PTHREADS
|
248
|
+
static pthread_key_t space_key;
|
249
|
+
static pthread_once_t space_key_once = PTHREAD_ONCE_INIT;
|
250
|
+
|
251
|
+
static void fini_memusage(void)
|
252
|
+
{
|
253
|
+
space_t *s = (space_t *) pthread_getspecific(space_key);
|
254
|
+
if (s)
|
255
|
+
{
|
256
|
+
free(s);
|
257
|
+
pthread_setspecific(space_key, NULL);
|
258
|
+
}
|
259
|
+
pthread_key_delete(space_key);
|
260
|
+
space_key = 0;
|
261
|
+
}
|
262
|
+
|
263
|
+
static void space_key_alloc(void)
|
264
|
+
{
|
265
|
+
int rc = pthread_key_create(&space_key, free);
|
266
|
+
if (0 == rc)
|
267
|
+
atexit(fini_memusage);
|
268
|
+
}
|
269
|
+
#else
|
270
|
+
static space_t space;
|
271
|
+
#endif
|
272
|
+
|
273
|
+
static space_t * do_init_memusage(void)
|
274
|
+
{
|
275
|
+
space_t *s;
|
276
|
+
|
277
|
+
#ifdef USE_PTHREADS
|
278
|
+
s = (space_t *) malloc(sizeof(space_t));
|
279
|
+
pthread_setspecific(space_key, s);
|
280
|
+
#else
|
281
|
+
s = &space;
|
282
|
+
#endif
|
283
|
+
|
284
|
+
s->max_space_used = 0;
|
285
|
+
s->space_in_use = 0;
|
286
|
+
s->max_external_space_used = 0;
|
287
|
+
s->external_space_in_use = 0;
|
288
|
+
|
289
|
+
return s;
|
290
|
+
}
|
291
|
+
|
292
|
+
void init_memusage(void)
|
293
|
+
{
|
294
|
+
#ifdef USE_PTHREADS
|
295
|
+
pthread_once(&space_key_once, space_key_alloc);
|
296
|
+
#else
|
297
|
+
static int mem_inited = FALSE;
|
298
|
+
if (mem_inited) return;
|
299
|
+
mem_inited = TRUE;
|
300
|
+
#endif
|
301
|
+
do_init_memusage();
|
302
|
+
}
|
303
|
+
|
304
|
+
static inline space_t *getspace(void)
|
305
|
+
{
|
306
|
+
#ifdef USE_PTHREADS
|
307
|
+
space_t *s = pthread_getspecific(space_key);
|
308
|
+
if (s) return s;
|
309
|
+
return do_init_memusage();
|
310
|
+
#else
|
311
|
+
return &space;
|
312
|
+
#endif
|
313
|
+
}
|
314
|
+
|
315
|
+
/**
|
316
|
+
* space used but not yet freed during parse
|
317
|
+
*/
|
318
|
+
size_t get_space_in_use(void)
|
319
|
+
{
|
320
|
+
return getspace()->space_in_use;
|
321
|
+
}
|
322
|
+
|
323
|
+
/**
|
324
|
+
* maximum space used during the parse
|
325
|
+
*/
|
326
|
+
size_t get_max_space_used(void)
|
327
|
+
{
|
328
|
+
return getspace()->max_space_used;
|
329
|
+
}
|
330
|
+
#else /* TRACK_SPACE_USAGE */
|
331
|
+
void init_memusage(void) {}
|
332
|
+
size_t get_space_in_use(void) { return 0; }
|
333
|
+
size_t get_max_space_used(void) { return 0; }
|
334
|
+
#endif /* TRACK_SPACE_USAGE */
|
335
|
+
|
336
|
+
/**
|
337
|
+
* alloc some memory, and keep track of the space allocated.
|
338
|
+
*/
|
339
|
+
void * xalloc(size_t size)
|
340
|
+
{
|
341
|
+
void * p = malloc(size);
|
342
|
+
|
343
|
+
#ifdef TRACK_SPACE_USAGE
|
344
|
+
space_t *s = getspace();
|
345
|
+
s->space_in_use += size;
|
346
|
+
if (s->max_space_used < s->space_in_use) s->max_space_used = s->space_in_use;
|
347
|
+
#endif /* TRACK_SPACE_USAGE */
|
348
|
+
if ((p == NULL) && (size != 0))
|
349
|
+
{
|
350
|
+
prt_error("Fatal Error: Ran out of space.\n");
|
351
|
+
abort();
|
352
|
+
exit(1);
|
353
|
+
}
|
354
|
+
return p;
|
355
|
+
}
|
356
|
+
|
357
|
+
void * xrealloc(void *p, size_t oldsize, size_t newsize)
|
358
|
+
{
|
359
|
+
#ifdef TRACK_SPACE_USAGE
|
360
|
+
space_t *s = getspace();
|
361
|
+
s->space_in_use -= oldsize;
|
362
|
+
#endif /* TRACK_SPACE_USAGE */
|
363
|
+
p = realloc(p, newsize);
|
364
|
+
if ((p == NULL) && (newsize != 0))
|
365
|
+
{
|
366
|
+
prt_error("Fatal Error: Ran out of space on realloc.\n");
|
367
|
+
abort();
|
368
|
+
exit(1);
|
369
|
+
}
|
370
|
+
#ifdef TRACK_SPACE_USAGE
|
371
|
+
s->space_in_use += newsize;
|
372
|
+
if (s->max_space_used < s->space_in_use) s->max_space_used = s->space_in_use;
|
373
|
+
#endif /* TRACK_SPACE_USAGE */
|
374
|
+
return p;
|
375
|
+
}
|
376
|
+
|
377
|
+
#ifdef TRACK_SPACE_USAGE
|
378
|
+
void xfree(void * p, size_t size)
|
379
|
+
{
|
380
|
+
getspace()->space_in_use -= size;
|
381
|
+
free(p);
|
382
|
+
}
|
383
|
+
#endif /* TRACK_SPACE_USAGE */
|
384
|
+
|
385
|
+
void * exalloc(size_t size)
|
386
|
+
{
|
387
|
+
void * p = malloc(size);
|
388
|
+
#ifdef TRACK_SPACE_USAGE
|
389
|
+
space_t *s = getspace();
|
390
|
+
s->external_space_in_use += size;
|
391
|
+
if (s->max_external_space_used < s->external_space_in_use)
|
392
|
+
s->max_external_space_used = s->external_space_in_use;
|
393
|
+
#endif /* TRACK_SPACE_USAGE */
|
394
|
+
|
395
|
+
if ((p == NULL) && (size != 0))
|
396
|
+
{
|
397
|
+
prt_error("Fatal Error: Ran out of space.\n");
|
398
|
+
abort();
|
399
|
+
exit(1);
|
400
|
+
}
|
401
|
+
return p;
|
402
|
+
}
|
403
|
+
|
404
|
+
#ifdef TRACK_SPACE_USAGE
|
405
|
+
void exfree(void * p, size_t size)
|
406
|
+
{
|
407
|
+
getspace()->external_space_in_use -= size;
|
408
|
+
free(p);
|
409
|
+
}
|
410
|
+
#endif /* TRACK_SPACE_USAGE */
|
411
|
+
|
412
|
+
/* =========================================================== */
|
413
|
+
/* File path and dictionary open routines below */
|
414
|
+
|
415
|
+
char * join_path(const char * prefix, const char * suffix)
|
416
|
+
{
|
417
|
+
char * path;
|
418
|
+
int path_len;
|
419
|
+
|
420
|
+
path_len = strlen(prefix) + 1 /* len(DIR_SEPARATOR) */ + strlen(suffix);
|
421
|
+
path = (char *) malloc(path_len + 1);
|
422
|
+
|
423
|
+
strcpy(path, prefix);
|
424
|
+
path[strlen(path)+1] = '\0';
|
425
|
+
path[strlen(path)] = DIR_SEPARATOR;
|
426
|
+
strcat(path, suffix);
|
427
|
+
|
428
|
+
return path;
|
429
|
+
}
|
430
|
+
|
431
|
+
#ifdef _WIN32
|
432
|
+
/* borrowed from glib */
|
433
|
+
/* Used only for Windows builds */
|
434
|
+
static char*
|
435
|
+
path_get_dirname (const char *file_name)
|
436
|
+
{
|
437
|
+
register char *base;
|
438
|
+
register int len;
|
439
|
+
|
440
|
+
base = strrchr (file_name, DIR_SEPARATOR);
|
441
|
+
#ifdef _WIN32
|
442
|
+
{
|
443
|
+
char *q = strrchr (file_name, '/');
|
444
|
+
if (base == NULL || (q != NULL && q > base))
|
445
|
+
base = q;
|
446
|
+
}
|
447
|
+
#endif
|
448
|
+
if (!base)
|
449
|
+
{
|
450
|
+
#ifdef _WIN32
|
451
|
+
if (is_utf8_alpha (file_name) && file_name[1] == ':')
|
452
|
+
{
|
453
|
+
char drive_colon_dot[4];
|
454
|
+
|
455
|
+
drive_colon_dot[0] = file_name[0];
|
456
|
+
drive_colon_dot[1] = ':';
|
457
|
+
drive_colon_dot[2] = '.';
|
458
|
+
drive_colon_dot[3] = '\0';
|
459
|
+
|
460
|
+
return safe_strdup (drive_colon_dot);
|
461
|
+
}
|
462
|
+
#endif
|
463
|
+
return safe_strdup (".");
|
464
|
+
}
|
465
|
+
|
466
|
+
while (base > file_name && IS_DIR_SEPARATOR (*base))
|
467
|
+
base--;
|
468
|
+
|
469
|
+
#ifdef _WIN32
|
470
|
+
/* base points to the char before the last slash.
|
471
|
+
*
|
472
|
+
* In case file_name is the root of a drive (X:\) or a child of the
|
473
|
+
* root of a drive (X:\foo), include the slash.
|
474
|
+
*
|
475
|
+
* In case file_name is the root share of an UNC path
|
476
|
+
* (\\server\share), add a slash, returning \\server\share\ .
|
477
|
+
*
|
478
|
+
* In case file_name is a direct child of a share in an UNC path
|
479
|
+
* (\\server\share\foo), include the slash after the share name,
|
480
|
+
* returning \\server\share\ .
|
481
|
+
*/
|
482
|
+
if (base == file_name + 1 && is_utf8_alpha (file_name) && file_name[1] == ':')
|
483
|
+
base++;
|
484
|
+
else if (IS_DIR_SEPARATOR (file_name[0]) &&
|
485
|
+
IS_DIR_SEPARATOR (file_name[1]) &&
|
486
|
+
file_name[2] &&
|
487
|
+
!IS_DIR_SEPARATOR (file_name[2]) &&
|
488
|
+
base >= file_name + 2)
|
489
|
+
{
|
490
|
+
const char *p = file_name + 2;
|
491
|
+
while (*p && !IS_DIR_SEPARATOR (*p))
|
492
|
+
p++;
|
493
|
+
if (p == base + 1)
|
494
|
+
{
|
495
|
+
len = (int) strlen (file_name) + 1;
|
496
|
+
base = (char *)malloc(len + 1);
|
497
|
+
strcpy (base, file_name);
|
498
|
+
base[len-1] = DIR_SEPARATOR;
|
499
|
+
base[len] = 0;
|
500
|
+
return base;
|
501
|
+
}
|
502
|
+
if (IS_DIR_SEPARATOR (*p))
|
503
|
+
{
|
504
|
+
p++;
|
505
|
+
while (*p && !IS_DIR_SEPARATOR (*p))
|
506
|
+
p++;
|
507
|
+
if (p == base + 1)
|
508
|
+
base++;
|
509
|
+
}
|
510
|
+
}
|
511
|
+
#endif
|
512
|
+
|
513
|
+
len = (int) 1 + base - file_name;
|
514
|
+
|
515
|
+
base = (char *)malloc(len + 1);
|
516
|
+
memmove (base, file_name, len);
|
517
|
+
base[len] = 0;
|
518
|
+
|
519
|
+
return base;
|
520
|
+
}
|
521
|
+
#endif /* _WIN32 */
|
522
|
+
|
523
|
+
/* global - but thats OK, since this is set only during initialization,
|
524
|
+
* and is is thenceforth a read-only item. So it doesn't need to be
|
525
|
+
* locked.
|
526
|
+
*/
|
527
|
+
static char * custom_data_dir = NULL;
|
528
|
+
|
529
|
+
void dictionary_set_data_dir(const char * path)
|
530
|
+
{
|
531
|
+
if (custom_data_dir) free (custom_data_dir);
|
532
|
+
custom_data_dir = safe_strdup(path);
|
533
|
+
}
|
534
|
+
|
535
|
+
char * dictionary_get_data_dir(void)
|
536
|
+
{
|
537
|
+
#ifdef _WIN32
|
538
|
+
HINSTANCE hInstance;
|
539
|
+
#endif
|
540
|
+
char * data_dir = NULL;
|
541
|
+
|
542
|
+
if (custom_data_dir != NULL) {
|
543
|
+
data_dir = safe_strdup(custom_data_dir);
|
544
|
+
return data_dir;
|
545
|
+
}
|
546
|
+
|
547
|
+
#ifdef ENABLE_BINRELOC
|
548
|
+
data_dir = safe_strdup (BR_DATADIR("/link-grammar"));
|
549
|
+
#elif defined(_WIN32)
|
550
|
+
/* Dynamically locate library and return containing directory */
|
551
|
+
hInstance = GetModuleHandle("link-grammar.dll");
|
552
|
+
if(hInstance != NULL)
|
553
|
+
{
|
554
|
+
char dll_path[MAX_PATH];
|
555
|
+
|
556
|
+
if(GetModuleFileName(hInstance,dll_path,MAX_PATH)) {
|
557
|
+
#ifdef _DEBUG
|
558
|
+
prt_error("Info: GetModuleFileName=%s\n", (dll_path?dll_path:"NULL"));
|
559
|
+
#endif
|
560
|
+
data_dir = path_get_dirname(dll_path);
|
561
|
+
}
|
562
|
+
}
|
563
|
+
#endif
|
564
|
+
|
565
|
+
return data_dir;
|
566
|
+
}
|
567
|
+
|
568
|
+
/**
|
569
|
+
* object_open() -- dictopen() - open a dictionary
|
570
|
+
*
|
571
|
+
* This function is used to open a dictionary file or a word file,
|
572
|
+
* or any associated data file (like a post process knowledge file).
|
573
|
+
*
|
574
|
+
* It works as follows. If the file name begins with a "/", then
|
575
|
+
* it's assumed to be an absolute file name and it tries to open
|
576
|
+
* that exact file.
|
577
|
+
*
|
578
|
+
* If the filename does not begin with a "/", then it uses the
|
579
|
+
* dictpath mechanism to find the right file to open. This looks
|
580
|
+
* for the file in a sequence of directories until it finds it. The
|
581
|
+
* sequence of directories is specified in a dictpath string, in
|
582
|
+
* which each directory is followed by a ":".
|
583
|
+
*/
|
584
|
+
void * object_open(const char *filename,
|
585
|
+
void * (*opencb)(const char *, void *),
|
586
|
+
void * user_data)
|
587
|
+
{
|
588
|
+
char completename[MAX_PATH_NAME+1];
|
589
|
+
char fulldictpath[MAX_PATH_NAME+1];
|
590
|
+
static char prevpath[MAX_PATH_NAME+1] = "";
|
591
|
+
static int first_time_ever = 1;
|
592
|
+
char *pos, *oldpos;
|
593
|
+
int filenamelen, len;
|
594
|
+
void *fp;
|
595
|
+
|
596
|
+
/* Record the first path ever used, so that we can recycle it */
|
597
|
+
if (first_time_ever)
|
598
|
+
{
|
599
|
+
strncpy (prevpath, filename, MAX_PATH_NAME);
|
600
|
+
prevpath[MAX_PATH_NAME] = 0;
|
601
|
+
pos = strrchr(prevpath, DIR_SEPARATOR);
|
602
|
+
if (pos) *pos = 0;
|
603
|
+
pos = strrchr(prevpath, DIR_SEPARATOR);
|
604
|
+
if (pos) *(pos+1) = 0;
|
605
|
+
first_time_ever = 0;
|
606
|
+
}
|
607
|
+
|
608
|
+
/* Look for absolute filename.
|
609
|
+
* Unix: starts with leading slash.
|
610
|
+
* Windows: starts with C:\ except that the drive letter may differ.
|
611
|
+
*/
|
612
|
+
if ((filename[0] == '/') || ((filename[1] == ':') && (filename[2] == '\\')))
|
613
|
+
{
|
614
|
+
/* fopen returns NULL if the file does not exist. */
|
615
|
+
fp = opencb(filename, user_data);
|
616
|
+
if (fp) return fp;
|
617
|
+
}
|
618
|
+
|
619
|
+
{
|
620
|
+
char * data_dir = dictionary_get_data_dir();
|
621
|
+
#ifdef _DEBUG
|
622
|
+
prt_error("Info: data_dir=%s\n", (data_dir?data_dir:"NULL"));
|
623
|
+
#endif
|
624
|
+
if (data_dir) {
|
625
|
+
snprintf(fulldictpath, MAX_PATH_NAME,
|
626
|
+
"%s%c%s%c", data_dir, PATH_SEPARATOR,
|
627
|
+
DEFAULTPATH, PATH_SEPARATOR);
|
628
|
+
free(data_dir);
|
629
|
+
}
|
630
|
+
else {
|
631
|
+
/* Always make sure that it ends with a path
|
632
|
+
* separator char for the below while() loop.
|
633
|
+
* For unix, this should look like:
|
634
|
+
* /usr/share/link-grammar:.:data:..:../data:
|
635
|
+
* For windows:
|
636
|
+
* C:\SOMWHERE;.;data;..;..\data;
|
637
|
+
*/
|
638
|
+
snprintf(fulldictpath, MAX_PATH_NAME,
|
639
|
+
"%s%c%s%c%s%c%s%c%s%c%s%c%s%c",
|
640
|
+
prevpath, PATH_SEPARATOR,
|
641
|
+
DEFAULTPATH, PATH_SEPARATOR,
|
642
|
+
".", PATH_SEPARATOR,
|
643
|
+
"data", PATH_SEPARATOR,
|
644
|
+
"..", PATH_SEPARATOR,
|
645
|
+
"..", DIR_SEPARATOR, "data", PATH_SEPARATOR);
|
646
|
+
}
|
647
|
+
}
|
648
|
+
|
649
|
+
/* Now fulldictpath is our dictpath, where each entry is
|
650
|
+
* followed by a ":" including the last one */
|
651
|
+
|
652
|
+
filenamelen = strlen(filename);
|
653
|
+
len = strlen(fulldictpath)+ filenamelen + 1 + 1;
|
654
|
+
oldpos = fulldictpath;
|
655
|
+
while ((pos = strchr(oldpos, PATH_SEPARATOR)) != NULL)
|
656
|
+
{
|
657
|
+
strncpy(completename, oldpos, (pos-oldpos));
|
658
|
+
*(completename+(pos-oldpos)) = DIR_SEPARATOR;
|
659
|
+
strcpy(completename+(pos-oldpos)+1,filename);
|
660
|
+
#ifdef _DEBUG
|
661
|
+
prt_error("Info: object_open() trying %s\n", completename);
|
662
|
+
#endif
|
663
|
+
if ((fp = opencb(completename, user_data)) != NULL) {
|
664
|
+
return fp;
|
665
|
+
}
|
666
|
+
oldpos = pos+1;
|
667
|
+
}
|
668
|
+
return NULL;
|
669
|
+
}
|
670
|
+
|
671
|
+
/* XXX static global variable used during dictionary open */
|
672
|
+
static char *path_found = NULL;
|
673
|
+
|
674
|
+
static void * dict_file_open(const char * fullname, void * user_data)
|
675
|
+
{
|
676
|
+
const char * how = (const char *) user_data;
|
677
|
+
FILE * fh = fopen(fullname, how);
|
678
|
+
if (fh && NULL == path_found)
|
679
|
+
{
|
680
|
+
path_found = strdup (fullname);
|
681
|
+
prt_error("Info: Dictionary found at %s\n", fullname);
|
682
|
+
}
|
683
|
+
return (void *) fh;
|
684
|
+
}
|
685
|
+
|
686
|
+
FILE *dictopen(const char *filename, const char *how)
|
687
|
+
{
|
688
|
+
FILE * fh = NULL;
|
689
|
+
void * ud = (void *) how;
|
690
|
+
|
691
|
+
/* If not the first time through, look for the other dictionaries
|
692
|
+
* in the *same* directory in which the first one was found.
|
693
|
+
* (The first one is typcailly "en/4.0.dict")
|
694
|
+
* The global "path_found" records where the first dict was found.
|
695
|
+
* The goal here is to avoid fractured install insanity.
|
696
|
+
*/
|
697
|
+
if (path_found)
|
698
|
+
{
|
699
|
+
size_t sz = strlen (path_found) + strlen(filename) + 1;
|
700
|
+
char * fullname = (char *) malloc (sz);
|
701
|
+
strcpy(fullname, path_found);
|
702
|
+
strcat(fullname, filename);
|
703
|
+
fh = (FILE *) object_open(fullname, dict_file_open, ud);
|
704
|
+
free(fullname);
|
705
|
+
}
|
706
|
+
else
|
707
|
+
{
|
708
|
+
fh = (FILE *) object_open(filename, dict_file_open, ud);
|
709
|
+
if (path_found)
|
710
|
+
{
|
711
|
+
char * root = strstr(path_found, filename);
|
712
|
+
*root = 0;
|
713
|
+
}
|
714
|
+
}
|
715
|
+
return fh;
|
716
|
+
}
|
717
|
+
|
718
|
+
/* ======================================================== */
|
719
|
+
/* Locale routines */
|
720
|
+
|
721
|
+
#ifdef _WIN32
|
722
|
+
|
723
|
+
static char *
|
724
|
+
win32_getlocale (void)
|
725
|
+
{
|
726
|
+
LCID lcid;
|
727
|
+
LANGID langid;
|
728
|
+
char *ev;
|
729
|
+
int primary, sub;
|
730
|
+
char bfr[64];
|
731
|
+
char iso639[10];
|
732
|
+
char iso3166[10];
|
733
|
+
const char *script = NULL;
|
734
|
+
|
735
|
+
/* Let the user override the system settings through environment
|
736
|
+
* variables, as on POSIX systems. Note that in GTK+ applications
|
737
|
+
* since GTK+ 2.10.7 setting either LC_ALL or LANG also sets the
|
738
|
+
* Win32 locale and C library locale through code in gtkmain.c.
|
739
|
+
*/
|
740
|
+
if (((ev = getenv ("LC_ALL")) != NULL && ev[0] != '\0')
|
741
|
+
|| ((ev = getenv ("LC_MESSAGES")) != NULL && ev[0] != '\0')
|
742
|
+
|| ((ev = getenv ("LANG")) != NULL && ev[0] != '\0'))
|
743
|
+
return safe_strdup (ev);
|
744
|
+
|
745
|
+
lcid = GetThreadLocale ();
|
746
|
+
|
747
|
+
if (!GetLocaleInfo (lcid, LOCALE_SISO639LANGNAME, iso639, sizeof (iso639)) ||
|
748
|
+
!GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso3166, sizeof (iso3166)))
|
749
|
+
return safe_strdup ("C");
|
750
|
+
|
751
|
+
/* Strip off the sorting rules, keep only the language part. */
|
752
|
+
langid = LANGIDFROMLCID (lcid);
|
753
|
+
|
754
|
+
/* Split into language and territory part. */
|
755
|
+
primary = PRIMARYLANGID (langid);
|
756
|
+
sub = SUBLANGID (langid);
|
757
|
+
|
758
|
+
/* Handle special cases */
|
759
|
+
switch (primary)
|
760
|
+
{
|
761
|
+
case LANG_AZERI:
|
762
|
+
switch (sub)
|
763
|
+
{
|
764
|
+
case SUBLANG_AZERI_LATIN:
|
765
|
+
script = "@Latn";
|
766
|
+
break;
|
767
|
+
case SUBLANG_AZERI_CYRILLIC:
|
768
|
+
script = "@Cyrl";
|
769
|
+
break;
|
770
|
+
}
|
771
|
+
break;
|
772
|
+
case LANG_SERBIAN: /* LANG_CROATIAN == LANG_SERBIAN */
|
773
|
+
switch (sub)
|
774
|
+
{
|
775
|
+
case SUBLANG_SERBIAN_LATIN:
|
776
|
+
case 0x06: /* Serbian (Latin) - Bosnia and Herzegovina */
|
777
|
+
script = "@Latn";
|
778
|
+
break;
|
779
|
+
}
|
780
|
+
break;
|
781
|
+
case LANG_UZBEK:
|
782
|
+
switch (sub)
|
783
|
+
{
|
784
|
+
case SUBLANG_UZBEK_LATIN:
|
785
|
+
script = "@Latn";
|
786
|
+
break;
|
787
|
+
case SUBLANG_UZBEK_CYRILLIC:
|
788
|
+
script = "@Cyrl";
|
789
|
+
break;
|
790
|
+
}
|
791
|
+
break;
|
792
|
+
}
|
793
|
+
|
794
|
+
strcat (bfr, iso639);
|
795
|
+
strcat (bfr, "_");
|
796
|
+
strcat (bfr, iso3166);
|
797
|
+
|
798
|
+
if (script)
|
799
|
+
strcat (bfr, script);
|
800
|
+
|
801
|
+
return safe_strdup (bfr);
|
802
|
+
}
|
803
|
+
|
804
|
+
#endif
|
805
|
+
|
806
|
+
char * get_default_locale(void)
|
807
|
+
{
|
808
|
+
char * locale, * needle;
|
809
|
+
|
810
|
+
locale = NULL;
|
811
|
+
|
812
|
+
#ifdef _WIN32
|
813
|
+
if(!locale)
|
814
|
+
locale = win32_getlocale ();
|
815
|
+
#endif
|
816
|
+
|
817
|
+
if(!locale)
|
818
|
+
locale = safe_strdup (getenv ("LANG"));
|
819
|
+
|
820
|
+
#if defined(HAVE_LC_MESSAGES)
|
821
|
+
if(!locale)
|
822
|
+
locale = safe_strdup (setlocale (LC_MESSAGES, NULL));
|
823
|
+
#endif
|
824
|
+
|
825
|
+
if(!locale)
|
826
|
+
locale = safe_strdup (setlocale (LC_ALL, NULL));
|
827
|
+
|
828
|
+
if(!locale || strcmp(locale, "C") == 0) {
|
829
|
+
free(locale);
|
830
|
+
locale = safe_strdup("en");
|
831
|
+
}
|
832
|
+
|
833
|
+
/* strip off "@euro" from en_GB@euro */
|
834
|
+
if ((needle = strchr (locale, '@')) != NULL)
|
835
|
+
*needle = '\0';
|
836
|
+
|
837
|
+
/* strip off ".UTF-8" from en_GB.UTF-8 */
|
838
|
+
if ((needle = strchr (locale, '.')) != NULL)
|
839
|
+
*needle = '\0';
|
840
|
+
|
841
|
+
/* strip off "_GB" from en_GB */
|
842
|
+
if ((needle = strchr (locale, '_')) != NULL)
|
843
|
+
*needle = '\0';
|
844
|
+
|
845
|
+
return locale;
|
846
|
+
}
|
847
|
+
/* ========================== END OF FILE =================== */
|