grammar_cop 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,15 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ int separate_sentence(Sentence, Parse_Options);
14
+ int build_sentence_expressions(Sentence, Parse_Options);
15
+ int sentence_in_dictionary(Sentence);
@@ -0,0 +1,847 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* Copyright 2008, 2009 Linas Vepstas */
5
+ /* All rights reserved */
6
+ /* */
7
+ /* Use of the link grammar parsing system is subject to the terms of the */
8
+ /* license set forth in the LICENSE file included with this software, */
9
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
10
+ /* This license allows free redistribution and use in source and binary */
11
+ /* forms, with or without modification, subject to certain conditions. */
12
+ /* */
13
+ /*************************************************************************/
14
+
15
+ //#ifndef GRAMMAR_UTILITIES_H
16
+ //#define
17
+
18
+ #include "api.h"
19
+ #include <limits.h>
20
+ #include <locale.h>
21
+ #include <stdlib.h>
22
+ #include <string.h>
23
+ #include <ctype.h>
24
+
25
+ #ifdef USE_PTHREADS
26
+ #include <pthread.h>
27
+ #endif
28
+
29
+
30
+
31
+ #ifdef ENABLE_BINRELOC
32
+ #include "prefix.h"
33
+ #endif /* BINRELOC */
34
+
35
+ #ifdef _WIN32
36
+ # include <windows.h>
37
+ # define DIR_SEPARATOR '\\'
38
+ # define PATH_SEPARATOR ';'
39
+ #else
40
+ # define DIR_SEPARATOR '/'
41
+ # define PATH_SEPARATOR ':'
42
+ #endif
43
+
44
+ #define IS_DIR_SEPARATOR(ch) (DIR_SEPARATOR == (ch))
45
+ //#ifdef _MSC_VER
46
+ //#define DICTIONARY_DIR "."
47
+ //#endif
48
+ #define DICTIONARY_DIR "../data/"
49
+ #define DEFAULTPATH DICTIONARY_DIR
50
+
51
+ /* This file contains certain general utilities. */
52
+ int verbosity;
53
+
54
+ /* ============================================================= */
55
+ /* String utilities */
56
+
57
+ char *safe_strdup(const char *u)
58
+ {
59
+ if(u)
60
+ return strdup(u);
61
+ return NULL;
62
+ }
63
+
64
+ /**
65
+ * Copies as much of v into u as it can assuming u is of size usize
66
+ * guaranteed to terminate u with a '\0'.
67
+ */
68
+ void safe_strcpy(char *u, const char * v, size_t usize)
69
+ {
70
+ strncpy(u, v, usize-1);
71
+ u[usize-1] = '\0';
72
+ }
73
+
74
+ /**
75
+ * A version of strlcpy, for those systems that don't have it.
76
+ */
77
+ size_t lg_strlcpy(char * dest, const char *src, size_t size)
78
+ {
79
+ size_t i=0;
80
+ while ((i<size) && (src[i] != 0x0))
81
+ {
82
+ dest[i] = src[i];
83
+ i++;
84
+ }
85
+ if (i < size) { dest[i] = 0x0; size = i; }
86
+ else if (0 < size) { size --; dest[size] = 0x0;}
87
+ return size;
88
+ }
89
+
90
+ /**
91
+ * Catenates as much of v onto u as it can assuming u is of size usize
92
+ * guaranteed to terminate u with a '\0'. Assumes u and v are null
93
+ * terminated.
94
+ */
95
+ void safe_strcat(char *u, const char *v, size_t usize)
96
+ {
97
+ strncat(u, v, usize-strlen(u)-1);
98
+ u[usize-1] = '\0';
99
+ }
100
+
101
+ /**
102
+ * prints s then prints the last |t|-|s| characters of t.
103
+ * if s is longer than t, it truncates s.
104
+ */
105
+ void left_print_string(FILE * fp, const char * s, const char * t)
106
+ {
107
+ int i, j, k;
108
+ j = strlen(t);
109
+ k = strlen(s);
110
+ for (i=0; i<j; i++) {
111
+ if (i<k) {
112
+ fprintf(fp, "%c", s[i]);
113
+ } else {
114
+ fprintf(fp, "%c", t[i]);
115
+ }
116
+ }
117
+ }
118
+
119
+ #ifdef _WIN32 /* should be !defined(HAVE_STRTOK_R) */
120
+
121
+ char *
122
+ strtok_r (char *s, const char *delim, char **saveptr)
123
+ {
124
+ char *p;
125
+
126
+ if (s == NULL)
127
+ s = *saveptr;
128
+
129
+ if (s == NULL)
130
+ return NULL;
131
+
132
+ /* Skip past any delimiters. */
133
+ /* while (*s && strchr (delim, *s)) s++; */
134
+ s += strspn(s, delim);
135
+
136
+ if (*s == '\0')
137
+ {
138
+ *saveptr = NULL;
139
+ return NULL;
140
+ }
141
+
142
+ /* Look for end of the token. */
143
+ /* p = s; while (*p && !strchr (delim, *p)) p++; */
144
+ p = strpbrk(s, delim);
145
+ if (p == NULL)
146
+ {
147
+ *saveptr = NULL;
148
+ return s;
149
+ }
150
+
151
+ *p = 0x0;
152
+ *saveptr = p+1;
153
+
154
+ return s;
155
+ }
156
+
157
+ #endif /* _WIN32 should be !HAVE_STROTOK_R */
158
+
159
+ /* ============================================================= */
160
+ /* UTF8 utilities */
161
+
162
+ /**
163
+ * Downcase the first letter of the word.
164
+ */
165
+ void downcase_utf8_str(char *to, const char * from, size_t usize)
166
+ {
167
+ wchar_t c;
168
+ int i, nbl, nbh;
169
+ char low[MB_LEN_MAX];
170
+ mbstate_t mbss;
171
+
172
+ nbh = mbtowc (&c, from, MB_CUR_MAX);
173
+ c = towlower(c);
174
+ memset(&mbss, 0, sizeof(mbss));
175
+ nbl = wctomb_check(low, c, &mbss);
176
+
177
+ /* Check for error on an in-place copy */
178
+ if ((nbh < nbl) && (to == from))
179
+ {
180
+ /* I'm to lazy to fix this */
181
+ prt_error("Error: can't downcase multi-byte string!\n");
182
+ return;
183
+ }
184
+
185
+ /* Downcase */
186
+ for (i=0; i<nbl; i++) { to[i] = low[i]; }
187
+
188
+ if ((nbh == nbl) && (to == from)) return;
189
+
190
+ from += nbh;
191
+ to += nbl;
192
+ safe_strcpy(to, from, usize-nbl);
193
+ }
194
+
195
+ /**
196
+ * Upcase the first letter of the word.
197
+ */
198
+ void upcase_utf8_str(char *to, const char * from, size_t usize)
199
+ {
200
+ wchar_t c;
201
+ int i, nbl, nbh;
202
+ char low[MB_LEN_MAX];
203
+ mbstate_t mbss;
204
+
205
+ nbh = mbtowc (&c, from, MB_CUR_MAX);
206
+ c = towupper(c);
207
+ memset(&mbss, 0, sizeof(mbss));
208
+ nbl = wctomb_check(low, c, &mbss);
209
+
210
+ /* Check for error on an in-place copy */
211
+ if ((nbh < nbl) && (to == from))
212
+ {
213
+ /* I'm to lazy to fix this */
214
+ prt_error("Error: can't upcase multi-byte string!\n");
215
+ return;
216
+ }
217
+
218
+ /* Upcase */
219
+ for (i=0; i<nbl; i++) { to[i] = low[i]; }
220
+
221
+ if ((nbh == nbl) && (to == from)) return;
222
+
223
+ from += nbh;
224
+ to += nbl;
225
+ safe_strcpy(to, from, usize-nbl);
226
+ }
227
+
228
+ /* ============================================================= */
229
+ /* Memory alloc routines below. These routines attempt to keep
230
+ * track of how much space is getting used during a parse.
231
+ *
232
+ * This code is probably obsolescent, and should probably be dumped.
233
+ * No one (that I know of) looks at the space usage; its one of the
234
+ * few areas that needs pthreads -- it would be great to just get
235
+ * rid of it (and thus get rid of pthreads).
236
+ */
237
+
238
+ #ifdef TRACK_SPACE_USAGE
239
+ typedef struct
240
+ {
241
+ size_t max_space_used;
242
+ size_t space_in_use;
243
+ size_t max_external_space_used;
244
+ size_t external_space_in_use;
245
+ } space_t;
246
+
247
+ #ifdef USE_PTHREADS
248
+ static pthread_key_t space_key;
249
+ static pthread_once_t space_key_once = PTHREAD_ONCE_INIT;
250
+
251
+ static void fini_memusage(void)
252
+ {
253
+ space_t *s = (space_t *) pthread_getspecific(space_key);
254
+ if (s)
255
+ {
256
+ free(s);
257
+ pthread_setspecific(space_key, NULL);
258
+ }
259
+ pthread_key_delete(space_key);
260
+ space_key = 0;
261
+ }
262
+
263
+ static void space_key_alloc(void)
264
+ {
265
+ int rc = pthread_key_create(&space_key, free);
266
+ if (0 == rc)
267
+ atexit(fini_memusage);
268
+ }
269
+ #else
270
+ static space_t space;
271
+ #endif
272
+
273
+ static space_t * do_init_memusage(void)
274
+ {
275
+ space_t *s;
276
+
277
+ #ifdef USE_PTHREADS
278
+ s = (space_t *) malloc(sizeof(space_t));
279
+ pthread_setspecific(space_key, s);
280
+ #else
281
+ s = &space;
282
+ #endif
283
+
284
+ s->max_space_used = 0;
285
+ s->space_in_use = 0;
286
+ s->max_external_space_used = 0;
287
+ s->external_space_in_use = 0;
288
+
289
+ return s;
290
+ }
291
+
292
+ void init_memusage(void)
293
+ {
294
+ #ifdef USE_PTHREADS
295
+ pthread_once(&space_key_once, space_key_alloc);
296
+ #else
297
+ static int mem_inited = FALSE;
298
+ if (mem_inited) return;
299
+ mem_inited = TRUE;
300
+ #endif
301
+ do_init_memusage();
302
+ }
303
+
304
+ static inline space_t *getspace(void)
305
+ {
306
+ #ifdef USE_PTHREADS
307
+ space_t *s = pthread_getspecific(space_key);
308
+ if (s) return s;
309
+ return do_init_memusage();
310
+ #else
311
+ return &space;
312
+ #endif
313
+ }
314
+
315
+ /**
316
+ * space used but not yet freed during parse
317
+ */
318
+ size_t get_space_in_use(void)
319
+ {
320
+ return getspace()->space_in_use;
321
+ }
322
+
323
+ /**
324
+ * maximum space used during the parse
325
+ */
326
+ size_t get_max_space_used(void)
327
+ {
328
+ return getspace()->max_space_used;
329
+ }
330
+ #else /* TRACK_SPACE_USAGE */
331
+ void init_memusage(void) {}
332
+ size_t get_space_in_use(void) { return 0; }
333
+ size_t get_max_space_used(void) { return 0; }
334
+ #endif /* TRACK_SPACE_USAGE */
335
+
336
+ /**
337
+ * alloc some memory, and keep track of the space allocated.
338
+ */
339
+ void * xalloc(size_t size)
340
+ {
341
+ void * p = malloc(size);
342
+
343
+ #ifdef TRACK_SPACE_USAGE
344
+ space_t *s = getspace();
345
+ s->space_in_use += size;
346
+ if (s->max_space_used < s->space_in_use) s->max_space_used = s->space_in_use;
347
+ #endif /* TRACK_SPACE_USAGE */
348
+ if ((p == NULL) && (size != 0))
349
+ {
350
+ prt_error("Fatal Error: Ran out of space.\n");
351
+ abort();
352
+ exit(1);
353
+ }
354
+ return p;
355
+ }
356
+
357
+ void * xrealloc(void *p, size_t oldsize, size_t newsize)
358
+ {
359
+ #ifdef TRACK_SPACE_USAGE
360
+ space_t *s = getspace();
361
+ s->space_in_use -= oldsize;
362
+ #endif /* TRACK_SPACE_USAGE */
363
+ p = realloc(p, newsize);
364
+ if ((p == NULL) && (newsize != 0))
365
+ {
366
+ prt_error("Fatal Error: Ran out of space on realloc.\n");
367
+ abort();
368
+ exit(1);
369
+ }
370
+ #ifdef TRACK_SPACE_USAGE
371
+ s->space_in_use += newsize;
372
+ if (s->max_space_used < s->space_in_use) s->max_space_used = s->space_in_use;
373
+ #endif /* TRACK_SPACE_USAGE */
374
+ return p;
375
+ }
376
+
377
+ #ifdef TRACK_SPACE_USAGE
378
+ void xfree(void * p, size_t size)
379
+ {
380
+ getspace()->space_in_use -= size;
381
+ free(p);
382
+ }
383
+ #endif /* TRACK_SPACE_USAGE */
384
+
385
+ void * exalloc(size_t size)
386
+ {
387
+ void * p = malloc(size);
388
+ #ifdef TRACK_SPACE_USAGE
389
+ space_t *s = getspace();
390
+ s->external_space_in_use += size;
391
+ if (s->max_external_space_used < s->external_space_in_use)
392
+ s->max_external_space_used = s->external_space_in_use;
393
+ #endif /* TRACK_SPACE_USAGE */
394
+
395
+ if ((p == NULL) && (size != 0))
396
+ {
397
+ prt_error("Fatal Error: Ran out of space.\n");
398
+ abort();
399
+ exit(1);
400
+ }
401
+ return p;
402
+ }
403
+
404
+ #ifdef TRACK_SPACE_USAGE
405
+ void exfree(void * p, size_t size)
406
+ {
407
+ getspace()->external_space_in_use -= size;
408
+ free(p);
409
+ }
410
+ #endif /* TRACK_SPACE_USAGE */
411
+
412
+ /* =========================================================== */
413
+ /* File path and dictionary open routines below */
414
+
415
+ char * join_path(const char * prefix, const char * suffix)
416
+ {
417
+ char * path;
418
+ int path_len;
419
+
420
+ path_len = strlen(prefix) + 1 /* len(DIR_SEPARATOR) */ + strlen(suffix);
421
+ path = (char *) malloc(path_len + 1);
422
+
423
+ strcpy(path, prefix);
424
+ path[strlen(path)+1] = '\0';
425
+ path[strlen(path)] = DIR_SEPARATOR;
426
+ strcat(path, suffix);
427
+
428
+ return path;
429
+ }
430
+
431
+ #ifdef _WIN32
432
+ /* borrowed from glib */
433
+ /* Used only for Windows builds */
434
+ static char*
435
+ path_get_dirname (const char *file_name)
436
+ {
437
+ register char *base;
438
+ register int len;
439
+
440
+ base = strrchr (file_name, DIR_SEPARATOR);
441
+ #ifdef _WIN32
442
+ {
443
+ char *q = strrchr (file_name, '/');
444
+ if (base == NULL || (q != NULL && q > base))
445
+ base = q;
446
+ }
447
+ #endif
448
+ if (!base)
449
+ {
450
+ #ifdef _WIN32
451
+ if (is_utf8_alpha (file_name) && file_name[1] == ':')
452
+ {
453
+ char drive_colon_dot[4];
454
+
455
+ drive_colon_dot[0] = file_name[0];
456
+ drive_colon_dot[1] = ':';
457
+ drive_colon_dot[2] = '.';
458
+ drive_colon_dot[3] = '\0';
459
+
460
+ return safe_strdup (drive_colon_dot);
461
+ }
462
+ #endif
463
+ return safe_strdup (".");
464
+ }
465
+
466
+ while (base > file_name && IS_DIR_SEPARATOR (*base))
467
+ base--;
468
+
469
+ #ifdef _WIN32
470
+ /* base points to the char before the last slash.
471
+ *
472
+ * In case file_name is the root of a drive (X:\) or a child of the
473
+ * root of a drive (X:\foo), include the slash.
474
+ *
475
+ * In case file_name is the root share of an UNC path
476
+ * (\\server\share), add a slash, returning \\server\share\ .
477
+ *
478
+ * In case file_name is a direct child of a share in an UNC path
479
+ * (\\server\share\foo), include the slash after the share name,
480
+ * returning \\server\share\ .
481
+ */
482
+ if (base == file_name + 1 && is_utf8_alpha (file_name) && file_name[1] == ':')
483
+ base++;
484
+ else if (IS_DIR_SEPARATOR (file_name[0]) &&
485
+ IS_DIR_SEPARATOR (file_name[1]) &&
486
+ file_name[2] &&
487
+ !IS_DIR_SEPARATOR (file_name[2]) &&
488
+ base >= file_name + 2)
489
+ {
490
+ const char *p = file_name + 2;
491
+ while (*p && !IS_DIR_SEPARATOR (*p))
492
+ p++;
493
+ if (p == base + 1)
494
+ {
495
+ len = (int) strlen (file_name) + 1;
496
+ base = (char *)malloc(len + 1);
497
+ strcpy (base, file_name);
498
+ base[len-1] = DIR_SEPARATOR;
499
+ base[len] = 0;
500
+ return base;
501
+ }
502
+ if (IS_DIR_SEPARATOR (*p))
503
+ {
504
+ p++;
505
+ while (*p && !IS_DIR_SEPARATOR (*p))
506
+ p++;
507
+ if (p == base + 1)
508
+ base++;
509
+ }
510
+ }
511
+ #endif
512
+
513
+ len = (int) 1 + base - file_name;
514
+
515
+ base = (char *)malloc(len + 1);
516
+ memmove (base, file_name, len);
517
+ base[len] = 0;
518
+
519
+ return base;
520
+ }
521
+ #endif /* _WIN32 */
522
+
523
+ /* global - but thats OK, since this is set only during initialization,
524
+ * and is is thenceforth a read-only item. So it doesn't need to be
525
+ * locked.
526
+ */
527
+ static char * custom_data_dir = NULL;
528
+
529
+ void dictionary_set_data_dir(const char * path)
530
+ {
531
+ if (custom_data_dir) free (custom_data_dir);
532
+ custom_data_dir = safe_strdup(path);
533
+ }
534
+
535
+ char * dictionary_get_data_dir(void)
536
+ {
537
+ #ifdef _WIN32
538
+ HINSTANCE hInstance;
539
+ #endif
540
+ char * data_dir = NULL;
541
+
542
+ if (custom_data_dir != NULL) {
543
+ data_dir = safe_strdup(custom_data_dir);
544
+ return data_dir;
545
+ }
546
+
547
+ #ifdef ENABLE_BINRELOC
548
+ data_dir = safe_strdup (BR_DATADIR("/link-grammar"));
549
+ #elif defined(_WIN32)
550
+ /* Dynamically locate library and return containing directory */
551
+ hInstance = GetModuleHandle("link-grammar.dll");
552
+ if(hInstance != NULL)
553
+ {
554
+ char dll_path[MAX_PATH];
555
+
556
+ if(GetModuleFileName(hInstance,dll_path,MAX_PATH)) {
557
+ #ifdef _DEBUG
558
+ prt_error("Info: GetModuleFileName=%s\n", (dll_path?dll_path:"NULL"));
559
+ #endif
560
+ data_dir = path_get_dirname(dll_path);
561
+ }
562
+ }
563
+ #endif
564
+
565
+ return data_dir;
566
+ }
567
+
568
+ /**
569
+ * object_open() -- dictopen() - open a dictionary
570
+ *
571
+ * This function is used to open a dictionary file or a word file,
572
+ * or any associated data file (like a post process knowledge file).
573
+ *
574
+ * It works as follows. If the file name begins with a "/", then
575
+ * it's assumed to be an absolute file name and it tries to open
576
+ * that exact file.
577
+ *
578
+ * If the filename does not begin with a "/", then it uses the
579
+ * dictpath mechanism to find the right file to open. This looks
580
+ * for the file in a sequence of directories until it finds it. The
581
+ * sequence of directories is specified in a dictpath string, in
582
+ * which each directory is followed by a ":".
583
+ */
584
+ void * object_open(const char *filename,
585
+ void * (*opencb)(const char *, void *),
586
+ void * user_data)
587
+ {
588
+ char completename[MAX_PATH_NAME+1];
589
+ char fulldictpath[MAX_PATH_NAME+1];
590
+ static char prevpath[MAX_PATH_NAME+1] = "";
591
+ static int first_time_ever = 1;
592
+ char *pos, *oldpos;
593
+ int filenamelen, len;
594
+ void *fp;
595
+
596
+ /* Record the first path ever used, so that we can recycle it */
597
+ if (first_time_ever)
598
+ {
599
+ strncpy (prevpath, filename, MAX_PATH_NAME);
600
+ prevpath[MAX_PATH_NAME] = 0;
601
+ pos = strrchr(prevpath, DIR_SEPARATOR);
602
+ if (pos) *pos = 0;
603
+ pos = strrchr(prevpath, DIR_SEPARATOR);
604
+ if (pos) *(pos+1) = 0;
605
+ first_time_ever = 0;
606
+ }
607
+
608
+ /* Look for absolute filename.
609
+ * Unix: starts with leading slash.
610
+ * Windows: starts with C:\ except that the drive letter may differ.
611
+ */
612
+ if ((filename[0] == '/') || ((filename[1] == ':') && (filename[2] == '\\')))
613
+ {
614
+ /* fopen returns NULL if the file does not exist. */
615
+ fp = opencb(filename, user_data);
616
+ if (fp) return fp;
617
+ }
618
+
619
+ {
620
+ char * data_dir = dictionary_get_data_dir();
621
+ #ifdef _DEBUG
622
+ prt_error("Info: data_dir=%s\n", (data_dir?data_dir:"NULL"));
623
+ #endif
624
+ if (data_dir) {
625
+ snprintf(fulldictpath, MAX_PATH_NAME,
626
+ "%s%c%s%c", data_dir, PATH_SEPARATOR,
627
+ DEFAULTPATH, PATH_SEPARATOR);
628
+ free(data_dir);
629
+ }
630
+ else {
631
+ /* Always make sure that it ends with a path
632
+ * separator char for the below while() loop.
633
+ * For unix, this should look like:
634
+ * /usr/share/link-grammar:.:data:..:../data:
635
+ * For windows:
636
+ * C:\SOMWHERE;.;data;..;..\data;
637
+ */
638
+ snprintf(fulldictpath, MAX_PATH_NAME,
639
+ "%s%c%s%c%s%c%s%c%s%c%s%c%s%c",
640
+ prevpath, PATH_SEPARATOR,
641
+ DEFAULTPATH, PATH_SEPARATOR,
642
+ ".", PATH_SEPARATOR,
643
+ "data", PATH_SEPARATOR,
644
+ "..", PATH_SEPARATOR,
645
+ "..", DIR_SEPARATOR, "data", PATH_SEPARATOR);
646
+ }
647
+ }
648
+
649
+ /* Now fulldictpath is our dictpath, where each entry is
650
+ * followed by a ":" including the last one */
651
+
652
+ filenamelen = strlen(filename);
653
+ len = strlen(fulldictpath)+ filenamelen + 1 + 1;
654
+ oldpos = fulldictpath;
655
+ while ((pos = strchr(oldpos, PATH_SEPARATOR)) != NULL)
656
+ {
657
+ strncpy(completename, oldpos, (pos-oldpos));
658
+ *(completename+(pos-oldpos)) = DIR_SEPARATOR;
659
+ strcpy(completename+(pos-oldpos)+1,filename);
660
+ #ifdef _DEBUG
661
+ prt_error("Info: object_open() trying %s\n", completename);
662
+ #endif
663
+ if ((fp = opencb(completename, user_data)) != NULL) {
664
+ return fp;
665
+ }
666
+ oldpos = pos+1;
667
+ }
668
+ return NULL;
669
+ }
670
+
671
+ /* XXX static global variable used during dictionary open */
672
+ static char *path_found = NULL;
673
+
674
+ static void * dict_file_open(const char * fullname, void * user_data)
675
+ {
676
+ const char * how = (const char *) user_data;
677
+ FILE * fh = fopen(fullname, how);
678
+ if (fh && NULL == path_found)
679
+ {
680
+ path_found = strdup (fullname);
681
+ prt_error("Info: Dictionary found at %s\n", fullname);
682
+ }
683
+ return (void *) fh;
684
+ }
685
+
686
+ FILE *dictopen(const char *filename, const char *how)
687
+ {
688
+ FILE * fh = NULL;
689
+ void * ud = (void *) how;
690
+
691
+ /* If not the first time through, look for the other dictionaries
692
+ * in the *same* directory in which the first one was found.
693
+ * (The first one is typcailly "en/4.0.dict")
694
+ * The global "path_found" records where the first dict was found.
695
+ * The goal here is to avoid fractured install insanity.
696
+ */
697
+ if (path_found)
698
+ {
699
+ size_t sz = strlen (path_found) + strlen(filename) + 1;
700
+ char * fullname = (char *) malloc (sz);
701
+ strcpy(fullname, path_found);
702
+ strcat(fullname, filename);
703
+ fh = (FILE *) object_open(fullname, dict_file_open, ud);
704
+ free(fullname);
705
+ }
706
+ else
707
+ {
708
+ fh = (FILE *) object_open(filename, dict_file_open, ud);
709
+ if (path_found)
710
+ {
711
+ char * root = strstr(path_found, filename);
712
+ *root = 0;
713
+ }
714
+ }
715
+ return fh;
716
+ }
717
+
718
+ /* ======================================================== */
719
+ /* Locale routines */
720
+
721
+ #ifdef _WIN32
722
+
723
+ static char *
724
+ win32_getlocale (void)
725
+ {
726
+ LCID lcid;
727
+ LANGID langid;
728
+ char *ev;
729
+ int primary, sub;
730
+ char bfr[64];
731
+ char iso639[10];
732
+ char iso3166[10];
733
+ const char *script = NULL;
734
+
735
+ /* Let the user override the system settings through environment
736
+ * variables, as on POSIX systems. Note that in GTK+ applications
737
+ * since GTK+ 2.10.7 setting either LC_ALL or LANG also sets the
738
+ * Win32 locale and C library locale through code in gtkmain.c.
739
+ */
740
+ if (((ev = getenv ("LC_ALL")) != NULL && ev[0] != '\0')
741
+ || ((ev = getenv ("LC_MESSAGES")) != NULL && ev[0] != '\0')
742
+ || ((ev = getenv ("LANG")) != NULL && ev[0] != '\0'))
743
+ return safe_strdup (ev);
744
+
745
+ lcid = GetThreadLocale ();
746
+
747
+ if (!GetLocaleInfo (lcid, LOCALE_SISO639LANGNAME, iso639, sizeof (iso639)) ||
748
+ !GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso3166, sizeof (iso3166)))
749
+ return safe_strdup ("C");
750
+
751
+ /* Strip off the sorting rules, keep only the language part. */
752
+ langid = LANGIDFROMLCID (lcid);
753
+
754
+ /* Split into language and territory part. */
755
+ primary = PRIMARYLANGID (langid);
756
+ sub = SUBLANGID (langid);
757
+
758
+ /* Handle special cases */
759
+ switch (primary)
760
+ {
761
+ case LANG_AZERI:
762
+ switch (sub)
763
+ {
764
+ case SUBLANG_AZERI_LATIN:
765
+ script = "@Latn";
766
+ break;
767
+ case SUBLANG_AZERI_CYRILLIC:
768
+ script = "@Cyrl";
769
+ break;
770
+ }
771
+ break;
772
+ case LANG_SERBIAN: /* LANG_CROATIAN == LANG_SERBIAN */
773
+ switch (sub)
774
+ {
775
+ case SUBLANG_SERBIAN_LATIN:
776
+ case 0x06: /* Serbian (Latin) - Bosnia and Herzegovina */
777
+ script = "@Latn";
778
+ break;
779
+ }
780
+ break;
781
+ case LANG_UZBEK:
782
+ switch (sub)
783
+ {
784
+ case SUBLANG_UZBEK_LATIN:
785
+ script = "@Latn";
786
+ break;
787
+ case SUBLANG_UZBEK_CYRILLIC:
788
+ script = "@Cyrl";
789
+ break;
790
+ }
791
+ break;
792
+ }
793
+
794
+ strcat (bfr, iso639);
795
+ strcat (bfr, "_");
796
+ strcat (bfr, iso3166);
797
+
798
+ if (script)
799
+ strcat (bfr, script);
800
+
801
+ return safe_strdup (bfr);
802
+ }
803
+
804
+ #endif
805
+
806
+ char * get_default_locale(void)
807
+ {
808
+ char * locale, * needle;
809
+
810
+ locale = NULL;
811
+
812
+ #ifdef _WIN32
813
+ if(!locale)
814
+ locale = win32_getlocale ();
815
+ #endif
816
+
817
+ if(!locale)
818
+ locale = safe_strdup (getenv ("LANG"));
819
+
820
+ #if defined(HAVE_LC_MESSAGES)
821
+ if(!locale)
822
+ locale = safe_strdup (setlocale (LC_MESSAGES, NULL));
823
+ #endif
824
+
825
+ if(!locale)
826
+ locale = safe_strdup (setlocale (LC_ALL, NULL));
827
+
828
+ if(!locale || strcmp(locale, "C") == 0) {
829
+ free(locale);
830
+ locale = safe_strdup("en");
831
+ }
832
+
833
+ /* strip off "@euro" from en_GB@euro */
834
+ if ((needle = strchr (locale, '@')) != NULL)
835
+ *needle = '\0';
836
+
837
+ /* strip off ".UTF-8" from en_GB.UTF-8 */
838
+ if ((needle = strchr (locale, '.')) != NULL)
839
+ *needle = '\0';
840
+
841
+ /* strip off "_GB" from en_GB */
842
+ if ((needle = strchr (locale, '_')) != NULL)
843
+ *needle = '\0';
844
+
845
+ return locale;
846
+ }
847
+ /* ========================== END OF FILE =================== */