grammar_cop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +8 -0
  5. data/data/.DS_Store +0 -0
  6. data/data/Makefile +511 -0
  7. data/data/Makefile.am +4 -0
  8. data/data/Makefile.in +511 -0
  9. data/data/de/.DS_Store +0 -0
  10. data/data/de/4.0.affix +7 -0
  11. data/data/de/4.0.dict +474 -0
  12. data/data/de/Makefile +387 -0
  13. data/data/de/Makefile.am +9 -0
  14. data/data/de/Makefile.in +387 -0
  15. data/data/en/.DS_Store +0 -0
  16. data/data/en/4.0.affix +26 -0
  17. data/data/en/4.0.batch +1002 -0
  18. data/data/en/4.0.biolg.batch +411 -0
  19. data/data/en/4.0.constituent-knowledge +127 -0
  20. data/data/en/4.0.dict +8759 -0
  21. data/data/en/4.0.dict.m4 +6928 -0
  22. data/data/en/4.0.enwiki.batch +14 -0
  23. data/data/en/4.0.fixes.batch +2776 -0
  24. data/data/en/4.0.knowledge +306 -0
  25. data/data/en/4.0.regex +225 -0
  26. data/data/en/4.0.voa.batch +114 -0
  27. data/data/en/Makefile +554 -0
  28. data/data/en/Makefile.am +19 -0
  29. data/data/en/Makefile.in +554 -0
  30. data/data/en/README +173 -0
  31. data/data/en/tiny.dict +157 -0
  32. data/data/en/words/.DS_Store +0 -0
  33. data/data/en/words/Makefile +456 -0
  34. data/data/en/words/Makefile.am +78 -0
  35. data/data/en/words/Makefile.in +456 -0
  36. data/data/en/words/currency +205 -0
  37. data/data/en/words/currency.p +28 -0
  38. data/data/en/words/entities.given-bisex.sing +39 -0
  39. data/data/en/words/entities.given-female.sing +4141 -0
  40. data/data/en/words/entities.given-male.sing +1633 -0
  41. data/data/en/words/entities.locations.sing +68 -0
  42. data/data/en/words/entities.national.sing +253 -0
  43. data/data/en/words/entities.organizations.sing +7 -0
  44. data/data/en/words/entities.us-states.sing +11 -0
  45. data/data/en/words/units.1 +45 -0
  46. data/data/en/words/units.1.dot +4 -0
  47. data/data/en/words/units.3 +2 -0
  48. data/data/en/words/units.4 +5 -0
  49. data/data/en/words/units.4.dot +1 -0
  50. data/data/en/words/words-medical.adv.1 +1191 -0
  51. data/data/en/words/words-medical.prep.1 +67 -0
  52. data/data/en/words/words-medical.v.4.1 +2835 -0
  53. data/data/en/words/words-medical.v.4.2 +2848 -0
  54. data/data/en/words/words-medical.v.4.3 +3011 -0
  55. data/data/en/words/words-medical.v.4.4 +3036 -0
  56. data/data/en/words/words-medical.v.4.5 +3050 -0
  57. data/data/en/words/words.adj.1 +6794 -0
  58. data/data/en/words/words.adj.2 +638 -0
  59. data/data/en/words/words.adj.3 +667 -0
  60. data/data/en/words/words.adv.1 +1573 -0
  61. data/data/en/words/words.adv.2 +67 -0
  62. data/data/en/words/words.adv.3 +157 -0
  63. data/data/en/words/words.adv.4 +80 -0
  64. data/data/en/words/words.n.1 +11464 -0
  65. data/data/en/words/words.n.1.wiki +264 -0
  66. data/data/en/words/words.n.2.s +2017 -0
  67. data/data/en/words/words.n.2.s.biolg +1 -0
  68. data/data/en/words/words.n.2.s.wiki +298 -0
  69. data/data/en/words/words.n.2.x +65 -0
  70. data/data/en/words/words.n.2.x.wiki +10 -0
  71. data/data/en/words/words.n.3 +5717 -0
  72. data/data/en/words/words.n.t +23 -0
  73. data/data/en/words/words.v.1.1 +1038 -0
  74. data/data/en/words/words.v.1.2 +1043 -0
  75. data/data/en/words/words.v.1.3 +1052 -0
  76. data/data/en/words/words.v.1.4 +1023 -0
  77. data/data/en/words/words.v.1.p +17 -0
  78. data/data/en/words/words.v.10.1 +14 -0
  79. data/data/en/words/words.v.10.2 +15 -0
  80. data/data/en/words/words.v.10.3 +88 -0
  81. data/data/en/words/words.v.10.4 +17 -0
  82. data/data/en/words/words.v.2.1 +1253 -0
  83. data/data/en/words/words.v.2.2 +1304 -0
  84. data/data/en/words/words.v.2.3 +1280 -0
  85. data/data/en/words/words.v.2.4 +1285 -0
  86. data/data/en/words/words.v.2.5 +1287 -0
  87. data/data/en/words/words.v.4.1 +2472 -0
  88. data/data/en/words/words.v.4.2 +2487 -0
  89. data/data/en/words/words.v.4.3 +2441 -0
  90. data/data/en/words/words.v.4.4 +2478 -0
  91. data/data/en/words/words.v.4.5 +2483 -0
  92. data/data/en/words/words.v.5.1 +98 -0
  93. data/data/en/words/words.v.5.2 +98 -0
  94. data/data/en/words/words.v.5.3 +103 -0
  95. data/data/en/words/words.v.5.4 +102 -0
  96. data/data/en/words/words.v.6.1 +388 -0
  97. data/data/en/words/words.v.6.2 +401 -0
  98. data/data/en/words/words.v.6.3 +397 -0
  99. data/data/en/words/words.v.6.4 +405 -0
  100. data/data/en/words/words.v.6.5 +401 -0
  101. data/data/en/words/words.v.8.1 +117 -0
  102. data/data/en/words/words.v.8.2 +118 -0
  103. data/data/en/words/words.v.8.3 +118 -0
  104. data/data/en/words/words.v.8.4 +119 -0
  105. data/data/en/words/words.v.8.5 +119 -0
  106. data/data/en/words/words.y +104 -0
  107. data/data/lt/.DS_Store +0 -0
  108. data/data/lt/4.0.affix +6 -0
  109. data/data/lt/4.0.constituent-knowledge +24 -0
  110. data/data/lt/4.0.dict +135 -0
  111. data/data/lt/4.0.knowledge +38 -0
  112. data/data/lt/Makefile +389 -0
  113. data/data/lt/Makefile.am +11 -0
  114. data/data/lt/Makefile.in +389 -0
  115. data/ext/.DS_Store +0 -0
  116. data/ext/link_grammar/.DS_Store +0 -0
  117. data/ext/link_grammar/extconf.rb +2 -0
  118. data/ext/link_grammar/link-grammar/.DS_Store +0 -0
  119. data/ext/link_grammar/link-grammar/.deps/analyze-linkage.Plo +198 -0
  120. data/ext/link_grammar/link-grammar/.deps/and.Plo +202 -0
  121. data/ext/link_grammar/link-grammar/.deps/api.Plo +244 -0
  122. data/ext/link_grammar/link-grammar/.deps/build-disjuncts.Plo +212 -0
  123. data/ext/link_grammar/link-grammar/.deps/command-line.Plo +201 -0
  124. data/ext/link_grammar/link-grammar/.deps/constituents.Plo +201 -0
  125. data/ext/link_grammar/link-grammar/.deps/count.Plo +202 -0
  126. data/ext/link_grammar/link-grammar/.deps/disjunct-utils.Plo +126 -0
  127. data/ext/link_grammar/link-grammar/.deps/disjuncts.Plo +123 -0
  128. data/ext/link_grammar/link-grammar/.deps/error.Plo +121 -0
  129. data/ext/link_grammar/link-grammar/.deps/expand.Plo +133 -0
  130. data/ext/link_grammar/link-grammar/.deps/extract-links.Plo +198 -0
  131. data/ext/link_grammar/link-grammar/.deps/fast-match.Plo +200 -0
  132. data/ext/link_grammar/link-grammar/.deps/idiom.Plo +200 -0
  133. data/ext/link_grammar/link-grammar/.deps/jni-client.Plo +217 -0
  134. data/ext/link_grammar/link-grammar/.deps/link-parser.Po +1 -0
  135. data/ext/link_grammar/link-grammar/.deps/massage.Plo +202 -0
  136. data/ext/link_grammar/link-grammar/.deps/post-process.Plo +202 -0
  137. data/ext/link_grammar/link-grammar/.deps/pp_knowledge.Plo +202 -0
  138. data/ext/link_grammar/link-grammar/.deps/pp_lexer.Plo +201 -0
  139. data/ext/link_grammar/link-grammar/.deps/pp_linkset.Plo +200 -0
  140. data/ext/link_grammar/link-grammar/.deps/prefix.Plo +102 -0
  141. data/ext/link_grammar/link-grammar/.deps/preparation.Plo +202 -0
  142. data/ext/link_grammar/link-grammar/.deps/print-util.Plo +200 -0
  143. data/ext/link_grammar/link-grammar/.deps/print.Plo +201 -0
  144. data/ext/link_grammar/link-grammar/.deps/prune.Plo +202 -0
  145. data/ext/link_grammar/link-grammar/.deps/read-dict.Plo +223 -0
  146. data/ext/link_grammar/link-grammar/.deps/read-regex.Plo +123 -0
  147. data/ext/link_grammar/link-grammar/.deps/regex-morph.Plo +131 -0
  148. data/ext/link_grammar/link-grammar/.deps/resources.Plo +203 -0
  149. data/ext/link_grammar/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  150. data/ext/link_grammar/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  151. data/ext/link_grammar/link-grammar/.deps/string-set.Plo +198 -0
  152. data/ext/link_grammar/link-grammar/.deps/tokenize.Plo +160 -0
  153. data/ext/link_grammar/link-grammar/.deps/utilities.Plo +222 -0
  154. data/ext/link_grammar/link-grammar/.deps/word-file.Plo +201 -0
  155. data/ext/link_grammar/link-grammar/.deps/word-utils.Plo +212 -0
  156. data/ext/link_grammar/link-grammar/.libs/analyze-linkage.o +0 -0
  157. data/ext/link_grammar/link-grammar/.libs/and.o +0 -0
  158. data/ext/link_grammar/link-grammar/.libs/api.o +0 -0
  159. data/ext/link_grammar/link-grammar/.libs/build-disjuncts.o +0 -0
  160. data/ext/link_grammar/link-grammar/.libs/command-line.o +0 -0
  161. data/ext/link_grammar/link-grammar/.libs/constituents.o +0 -0
  162. data/ext/link_grammar/link-grammar/.libs/count.o +0 -0
  163. data/ext/link_grammar/link-grammar/.libs/disjunct-utils.o +0 -0
  164. data/ext/link_grammar/link-grammar/.libs/disjuncts.o +0 -0
  165. data/ext/link_grammar/link-grammar/.libs/error.o +0 -0
  166. data/ext/link_grammar/link-grammar/.libs/expand.o +0 -0
  167. data/ext/link_grammar/link-grammar/.libs/extract-links.o +0 -0
  168. data/ext/link_grammar/link-grammar/.libs/fast-match.o +0 -0
  169. data/ext/link_grammar/link-grammar/.libs/idiom.o +0 -0
  170. data/ext/link_grammar/link-grammar/.libs/jni-client.o +0 -0
  171. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  172. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  173. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  174. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  175. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.a +0 -0
  176. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  177. data/ext/link_grammar/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  178. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  179. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  180. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  181. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.a +0 -0
  182. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.dylib +0 -0
  183. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.la +41 -0
  184. data/ext/link_grammar/link-grammar/.libs/liblink-grammar.lai +41 -0
  185. data/ext/link_grammar/link-grammar/.libs/massage.o +0 -0
  186. data/ext/link_grammar/link-grammar/.libs/post-process.o +0 -0
  187. data/ext/link_grammar/link-grammar/.libs/pp_knowledge.o +0 -0
  188. data/ext/link_grammar/link-grammar/.libs/pp_lexer.o +0 -0
  189. data/ext/link_grammar/link-grammar/.libs/pp_linkset.o +0 -0
  190. data/ext/link_grammar/link-grammar/.libs/prefix.o +0 -0
  191. data/ext/link_grammar/link-grammar/.libs/preparation.o +0 -0
  192. data/ext/link_grammar/link-grammar/.libs/print-util.o +0 -0
  193. data/ext/link_grammar/link-grammar/.libs/print.o +0 -0
  194. data/ext/link_grammar/link-grammar/.libs/prune.o +0 -0
  195. data/ext/link_grammar/link-grammar/.libs/read-dict.o +0 -0
  196. data/ext/link_grammar/link-grammar/.libs/read-regex.o +0 -0
  197. data/ext/link_grammar/link-grammar/.libs/regex-morph.o +0 -0
  198. data/ext/link_grammar/link-grammar/.libs/resources.o +0 -0
  199. data/ext/link_grammar/link-grammar/.libs/spellcheck-aspell.o +0 -0
  200. data/ext/link_grammar/link-grammar/.libs/spellcheck-hun.o +0 -0
  201. data/ext/link_grammar/link-grammar/.libs/string-set.o +0 -0
  202. data/ext/link_grammar/link-grammar/.libs/tokenize.o +0 -0
  203. data/ext/link_grammar/link-grammar/.libs/utilities.o +0 -0
  204. data/ext/link_grammar/link-grammar/.libs/word-file.o +0 -0
  205. data/ext/link_grammar/link-grammar/.libs/word-utils.o +0 -0
  206. data/ext/link_grammar/link-grammar/Makefile +900 -0
  207. data/ext/link_grammar/link-grammar/Makefile.am +202 -0
  208. data/ext/link_grammar/link-grammar/Makefile.in +900 -0
  209. data/ext/link_grammar/link-grammar/analyze-linkage.c +1317 -0
  210. data/ext/link_grammar/link-grammar/analyze-linkage.h +24 -0
  211. data/ext/link_grammar/link-grammar/and.c +1603 -0
  212. data/ext/link_grammar/link-grammar/and.h +27 -0
  213. data/ext/link_grammar/link-grammar/api-structures.h +362 -0
  214. data/ext/link_grammar/link-grammar/api-types.h +72 -0
  215. data/ext/link_grammar/link-grammar/api.c +1887 -0
  216. data/ext/link_grammar/link-grammar/api.h +96 -0
  217. data/ext/link_grammar/link-grammar/autoit/.DS_Store +0 -0
  218. data/ext/link_grammar/link-grammar/autoit/README +10 -0
  219. data/ext/link_grammar/link-grammar/autoit/_LGTest.au3 +22 -0
  220. data/ext/link_grammar/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  221. data/ext/link_grammar/link-grammar/build-disjuncts.c +487 -0
  222. data/ext/link_grammar/link-grammar/build-disjuncts.h +21 -0
  223. data/ext/link_grammar/link-grammar/command-line.c +458 -0
  224. data/ext/link_grammar/link-grammar/command-line.h +15 -0
  225. data/ext/link_grammar/link-grammar/constituents.c +1836 -0
  226. data/ext/link_grammar/link-grammar/constituents.h +26 -0
  227. data/ext/link_grammar/link-grammar/corpus/.DS_Store +0 -0
  228. data/ext/link_grammar/link-grammar/corpus/.deps/cluster.Plo +1 -0
  229. data/ext/link_grammar/link-grammar/corpus/.deps/corpus.Plo +1 -0
  230. data/ext/link_grammar/link-grammar/corpus/Makefile +527 -0
  231. data/ext/link_grammar/link-grammar/corpus/Makefile.am +46 -0
  232. data/ext/link_grammar/link-grammar/corpus/Makefile.in +527 -0
  233. data/ext/link_grammar/link-grammar/corpus/README +17 -0
  234. data/ext/link_grammar/link-grammar/corpus/cluster.c +286 -0
  235. data/ext/link_grammar/link-grammar/corpus/cluster.h +32 -0
  236. data/ext/link_grammar/link-grammar/corpus/corpus.c +483 -0
  237. data/ext/link_grammar/link-grammar/corpus/corpus.h +46 -0
  238. data/ext/link_grammar/link-grammar/count.c +828 -0
  239. data/ext/link_grammar/link-grammar/count.h +25 -0
  240. data/ext/link_grammar/link-grammar/disjunct-utils.c +261 -0
  241. data/ext/link_grammar/link-grammar/disjunct-utils.h +27 -0
  242. data/ext/link_grammar/link-grammar/disjuncts.c +138 -0
  243. data/ext/link_grammar/link-grammar/disjuncts.h +13 -0
  244. data/ext/link_grammar/link-grammar/error.c +92 -0
  245. data/ext/link_grammar/link-grammar/error.h +35 -0
  246. data/ext/link_grammar/link-grammar/expand.c +67 -0
  247. data/ext/link_grammar/link-grammar/expand.h +13 -0
  248. data/ext/link_grammar/link-grammar/externs.h +22 -0
  249. data/ext/link_grammar/link-grammar/extract-links.c +625 -0
  250. data/ext/link_grammar/link-grammar/extract-links.h +16 -0
  251. data/ext/link_grammar/link-grammar/fast-match.c +309 -0
  252. data/ext/link_grammar/link-grammar/fast-match.h +17 -0
  253. data/ext/link_grammar/link-grammar/idiom.c +373 -0
  254. data/ext/link_grammar/link-grammar/idiom.h +15 -0
  255. data/ext/link_grammar/link-grammar/jni-client.c +779 -0
  256. data/ext/link_grammar/link-grammar/jni-client.h +236 -0
  257. data/ext/link_grammar/link-grammar/liblink-grammar-java.la +42 -0
  258. data/ext/link_grammar/link-grammar/liblink-grammar.la +41 -0
  259. data/ext/link_grammar/link-grammar/link-features.h +37 -0
  260. data/ext/link_grammar/link-grammar/link-features.h.in +37 -0
  261. data/ext/link_grammar/link-grammar/link-grammar-java.def +31 -0
  262. data/ext/link_grammar/link-grammar/link-grammar.def +194 -0
  263. data/ext/link_grammar/link-grammar/link-includes.h +465 -0
  264. data/ext/link_grammar/link-grammar/link-parser.c +849 -0
  265. data/ext/link_grammar/link-grammar/massage.c +329 -0
  266. data/ext/link_grammar/link-grammar/massage.h +13 -0
  267. data/ext/link_grammar/link-grammar/post-process.c +1113 -0
  268. data/ext/link_grammar/link-grammar/post-process.h +45 -0
  269. data/ext/link_grammar/link-grammar/pp_knowledge.c +376 -0
  270. data/ext/link_grammar/link-grammar/pp_knowledge.h +14 -0
  271. data/ext/link_grammar/link-grammar/pp_lexer.c +1920 -0
  272. data/ext/link_grammar/link-grammar/pp_lexer.h +19 -0
  273. data/ext/link_grammar/link-grammar/pp_linkset.c +158 -0
  274. data/ext/link_grammar/link-grammar/pp_linkset.h +20 -0
  275. data/ext/link_grammar/link-grammar/prefix.c +482 -0
  276. data/ext/link_grammar/link-grammar/prefix.h +139 -0
  277. data/ext/link_grammar/link-grammar/preparation.c +412 -0
  278. data/ext/link_grammar/link-grammar/preparation.h +20 -0
  279. data/ext/link_grammar/link-grammar/print-util.c +87 -0
  280. data/ext/link_grammar/link-grammar/print-util.h +32 -0
  281. data/ext/link_grammar/link-grammar/print.c +1085 -0
  282. data/ext/link_grammar/link-grammar/print.h +16 -0
  283. data/ext/link_grammar/link-grammar/prune.c +1864 -0
  284. data/ext/link_grammar/link-grammar/prune.h +17 -0
  285. data/ext/link_grammar/link-grammar/read-dict.c +1785 -0
  286. data/ext/link_grammar/link-grammar/read-dict.h +29 -0
  287. data/ext/link_grammar/link-grammar/read-regex.c +161 -0
  288. data/ext/link_grammar/link-grammar/read-regex.h +12 -0
  289. data/ext/link_grammar/link-grammar/regex-morph.c +126 -0
  290. data/ext/link_grammar/link-grammar/regex-morph.h +17 -0
  291. data/ext/link_grammar/link-grammar/resources.c +180 -0
  292. data/ext/link_grammar/link-grammar/resources.h +23 -0
  293. data/ext/link_grammar/link-grammar/sat-solver/.DS_Store +0 -0
  294. data/ext/link_grammar/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  295. data/ext/link_grammar/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  296. data/ext/link_grammar/link-grammar/sat-solver/.deps/util.Plo +1 -0
  297. data/ext/link_grammar/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  298. data/ext/link_grammar/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  299. data/ext/link_grammar/link-grammar/sat-solver/Makefile +527 -0
  300. data/ext/link_grammar/link-grammar/sat-solver/Makefile.am +29 -0
  301. data/ext/link_grammar/link-grammar/sat-solver/Makefile.in +527 -0
  302. data/ext/link_grammar/link-grammar/sat-solver/clock.hpp +33 -0
  303. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  304. data/ext/link_grammar/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  305. data/ext/link_grammar/link-grammar/sat-solver/guiding.hpp +244 -0
  306. data/ext/link_grammar/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  307. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  308. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.h +11 -0
  309. data/ext/link_grammar/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  310. data/ext/link_grammar/link-grammar/sat-solver/trie.hpp +118 -0
  311. data/ext/link_grammar/link-grammar/sat-solver/util.cpp +23 -0
  312. data/ext/link_grammar/link-grammar/sat-solver/util.hpp +14 -0
  313. data/ext/link_grammar/link-grammar/sat-solver/variables.cpp +5 -0
  314. data/ext/link_grammar/link-grammar/sat-solver/variables.hpp +829 -0
  315. data/ext/link_grammar/link-grammar/sat-solver/word-tag.cpp +159 -0
  316. data/ext/link_grammar/link-grammar/sat-solver/word-tag.hpp +162 -0
  317. data/ext/link_grammar/link-grammar/spellcheck-aspell.c +148 -0
  318. data/ext/link_grammar/link-grammar/spellcheck-hun.c +136 -0
  319. data/ext/link_grammar/link-grammar/spellcheck.h +34 -0
  320. data/ext/link_grammar/link-grammar/string-set.c +169 -0
  321. data/ext/link_grammar/link-grammar/string-set.h +16 -0
  322. data/ext/link_grammar/link-grammar/structures.h +498 -0
  323. data/ext/link_grammar/link-grammar/tokenize.c +1049 -0
  324. data/ext/link_grammar/link-grammar/tokenize.h +15 -0
  325. data/ext/link_grammar/link-grammar/utilities.c +847 -0
  326. data/ext/link_grammar/link-grammar/utilities.h +281 -0
  327. data/ext/link_grammar/link-grammar/word-file.c +124 -0
  328. data/ext/link_grammar/link-grammar/word-file.h +15 -0
  329. data/ext/link_grammar/link-grammar/word-utils.c +526 -0
  330. data/ext/link_grammar/link-grammar/word-utils.h +152 -0
  331. data/ext/link_grammar/link_grammar.c +202 -0
  332. data/ext/link_grammar/link_grammar.h +99 -0
  333. data/grammar_cop.gemspec +24 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_cop.rb +9 -0
  336. data/lib/grammar_cop/.DS_Store +0 -0
  337. data/lib/grammar_cop/dictionary.rb +19 -0
  338. data/lib/grammar_cop/linkage.rb +30 -0
  339. data/lib/grammar_cop/parse_options.rb +32 -0
  340. data/lib/grammar_cop/sentence.rb +36 -0
  341. data/lib/grammar_cop/version.rb +3 -0
  342. data/test/.DS_Store +0 -0
  343. data/test/grammar_cop_test.rb +27 -0
  344. metadata +407 -0
@@ -0,0 +1,15 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+ int separate_sentence(Sentence, Parse_Options);
14
+ int build_sentence_expressions(Sentence, Parse_Options);
15
+ int sentence_in_dictionary(Sentence);
@@ -0,0 +1,847 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* Copyright 2008, 2009 Linas Vepstas */
5
+ /* All rights reserved */
6
+ /* */
7
+ /* Use of the link grammar parsing system is subject to the terms of the */
8
+ /* license set forth in the LICENSE file included with this software, */
9
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
10
+ /* This license allows free redistribution and use in source and binary */
11
+ /* forms, with or without modification, subject to certain conditions. */
12
+ /* */
13
+ /*************************************************************************/
14
+
15
+ //#ifndef GRAMMAR_UTILITIES_H
16
+ //#define
17
+
18
+ #include "api.h"
19
+ #include <limits.h>
20
+ #include <locale.h>
21
+ #include <stdlib.h>
22
+ #include <string.h>
23
+ #include <ctype.h>
24
+
25
+ #ifdef USE_PTHREADS
26
+ #include <pthread.h>
27
+ #endif
28
+
29
+
30
+
31
+ #ifdef ENABLE_BINRELOC
32
+ #include "prefix.h"
33
+ #endif /* BINRELOC */
34
+
35
+ #ifdef _WIN32
36
+ # include <windows.h>
37
+ # define DIR_SEPARATOR '\\'
38
+ # define PATH_SEPARATOR ';'
39
+ #else
40
+ # define DIR_SEPARATOR '/'
41
+ # define PATH_SEPARATOR ':'
42
+ #endif
43
+
44
+ #define IS_DIR_SEPARATOR(ch) (DIR_SEPARATOR == (ch))
45
+ //#ifdef _MSC_VER
46
+ //#define DICTIONARY_DIR "."
47
+ //#endif
48
+ #define DICTIONARY_DIR "../data/"
49
+ #define DEFAULTPATH DICTIONARY_DIR
50
+
51
+ /* This file contains certain general utilities. */
52
+ int verbosity;
53
+
54
+ /* ============================================================= */
55
+ /* String utilities */
56
+
57
+ char *safe_strdup(const char *u)
58
+ {
59
+ if(u)
60
+ return strdup(u);
61
+ return NULL;
62
+ }
63
+
64
+ /**
65
+ * Copies as much of v into u as it can assuming u is of size usize
66
+ * guaranteed to terminate u with a '\0'.
67
+ */
68
+ void safe_strcpy(char *u, const char * v, size_t usize)
69
+ {
70
+ strncpy(u, v, usize-1);
71
+ u[usize-1] = '\0';
72
+ }
73
+
74
+ /**
75
+ * A version of strlcpy, for those systems that don't have it.
76
+ */
77
+ size_t lg_strlcpy(char * dest, const char *src, size_t size)
78
+ {
79
+ size_t i=0;
80
+ while ((i<size) && (src[i] != 0x0))
81
+ {
82
+ dest[i] = src[i];
83
+ i++;
84
+ }
85
+ if (i < size) { dest[i] = 0x0; size = i; }
86
+ else if (0 < size) { size --; dest[size] = 0x0;}
87
+ return size;
88
+ }
89
+
90
+ /**
91
+ * Catenates as much of v onto u as it can assuming u is of size usize
92
+ * guaranteed to terminate u with a '\0'. Assumes u and v are null
93
+ * terminated.
94
+ */
95
+ void safe_strcat(char *u, const char *v, size_t usize)
96
+ {
97
+ strncat(u, v, usize-strlen(u)-1);
98
+ u[usize-1] = '\0';
99
+ }
100
+
101
+ /**
102
+ * prints s then prints the last |t|-|s| characters of t.
103
+ * if s is longer than t, it truncates s.
104
+ */
105
+ void left_print_string(FILE * fp, const char * s, const char * t)
106
+ {
107
+ int i, j, k;
108
+ j = strlen(t);
109
+ k = strlen(s);
110
+ for (i=0; i<j; i++) {
111
+ if (i<k) {
112
+ fprintf(fp, "%c", s[i]);
113
+ } else {
114
+ fprintf(fp, "%c", t[i]);
115
+ }
116
+ }
117
+ }
118
+
119
+ #ifdef _WIN32 /* should be !defined(HAVE_STRTOK_R) */
120
+
121
+ char *
122
+ strtok_r (char *s, const char *delim, char **saveptr)
123
+ {
124
+ char *p;
125
+
126
+ if (s == NULL)
127
+ s = *saveptr;
128
+
129
+ if (s == NULL)
130
+ return NULL;
131
+
132
+ /* Skip past any delimiters. */
133
+ /* while (*s && strchr (delim, *s)) s++; */
134
+ s += strspn(s, delim);
135
+
136
+ if (*s == '\0')
137
+ {
138
+ *saveptr = NULL;
139
+ return NULL;
140
+ }
141
+
142
+ /* Look for end of the token. */
143
+ /* p = s; while (*p && !strchr (delim, *p)) p++; */
144
+ p = strpbrk(s, delim);
145
+ if (p == NULL)
146
+ {
147
+ *saveptr = NULL;
148
+ return s;
149
+ }
150
+
151
+ *p = 0x0;
152
+ *saveptr = p+1;
153
+
154
+ return s;
155
+ }
156
+
157
+ #endif /* _WIN32 should be !HAVE_STROTOK_R */
158
+
159
+ /* ============================================================= */
160
+ /* UTF8 utilities */
161
+
162
+ /**
163
+ * Downcase the first letter of the word.
164
+ */
165
+ void downcase_utf8_str(char *to, const char * from, size_t usize)
166
+ {
167
+ wchar_t c;
168
+ int i, nbl, nbh;
169
+ char low[MB_LEN_MAX];
170
+ mbstate_t mbss;
171
+
172
+ nbh = mbtowc (&c, from, MB_CUR_MAX);
173
+ c = towlower(c);
174
+ memset(&mbss, 0, sizeof(mbss));
175
+ nbl = wctomb_check(low, c, &mbss);
176
+
177
+ /* Check for error on an in-place copy */
178
+ if ((nbh < nbl) && (to == from))
179
+ {
180
+ /* I'm to lazy to fix this */
181
+ prt_error("Error: can't downcase multi-byte string!\n");
182
+ return;
183
+ }
184
+
185
+ /* Downcase */
186
+ for (i=0; i<nbl; i++) { to[i] = low[i]; }
187
+
188
+ if ((nbh == nbl) && (to == from)) return;
189
+
190
+ from += nbh;
191
+ to += nbl;
192
+ safe_strcpy(to, from, usize-nbl);
193
+ }
194
+
195
+ /**
196
+ * Upcase the first letter of the word.
197
+ */
198
+ void upcase_utf8_str(char *to, const char * from, size_t usize)
199
+ {
200
+ wchar_t c;
201
+ int i, nbl, nbh;
202
+ char low[MB_LEN_MAX];
203
+ mbstate_t mbss;
204
+
205
+ nbh = mbtowc (&c, from, MB_CUR_MAX);
206
+ c = towupper(c);
207
+ memset(&mbss, 0, sizeof(mbss));
208
+ nbl = wctomb_check(low, c, &mbss);
209
+
210
+ /* Check for error on an in-place copy */
211
+ if ((nbh < nbl) && (to == from))
212
+ {
213
+ /* I'm to lazy to fix this */
214
+ prt_error("Error: can't upcase multi-byte string!\n");
215
+ return;
216
+ }
217
+
218
+ /* Upcase */
219
+ for (i=0; i<nbl; i++) { to[i] = low[i]; }
220
+
221
+ if ((nbh == nbl) && (to == from)) return;
222
+
223
+ from += nbh;
224
+ to += nbl;
225
+ safe_strcpy(to, from, usize-nbl);
226
+ }
227
+
228
+ /* ============================================================= */
229
+ /* Memory alloc routines below. These routines attempt to keep
230
+ * track of how much space is getting used during a parse.
231
+ *
232
+ * This code is probably obsolescent, and should probably be dumped.
233
+ * No one (that I know of) looks at the space usage; its one of the
234
+ * few areas that needs pthreads -- it would be great to just get
235
+ * rid of it (and thus get rid of pthreads).
236
+ */
237
+
238
+ #ifdef TRACK_SPACE_USAGE
239
+ typedef struct
240
+ {
241
+ size_t max_space_used;
242
+ size_t space_in_use;
243
+ size_t max_external_space_used;
244
+ size_t external_space_in_use;
245
+ } space_t;
246
+
247
+ #ifdef USE_PTHREADS
248
+ static pthread_key_t space_key;
249
+ static pthread_once_t space_key_once = PTHREAD_ONCE_INIT;
250
+
251
+ static void fini_memusage(void)
252
+ {
253
+ space_t *s = (space_t *) pthread_getspecific(space_key);
254
+ if (s)
255
+ {
256
+ free(s);
257
+ pthread_setspecific(space_key, NULL);
258
+ }
259
+ pthread_key_delete(space_key);
260
+ space_key = 0;
261
+ }
262
+
263
+ static void space_key_alloc(void)
264
+ {
265
+ int rc = pthread_key_create(&space_key, free);
266
+ if (0 == rc)
267
+ atexit(fini_memusage);
268
+ }
269
+ #else
270
+ static space_t space;
271
+ #endif
272
+
273
+ static space_t * do_init_memusage(void)
274
+ {
275
+ space_t *s;
276
+
277
+ #ifdef USE_PTHREADS
278
+ s = (space_t *) malloc(sizeof(space_t));
279
+ pthread_setspecific(space_key, s);
280
+ #else
281
+ s = &space;
282
+ #endif
283
+
284
+ s->max_space_used = 0;
285
+ s->space_in_use = 0;
286
+ s->max_external_space_used = 0;
287
+ s->external_space_in_use = 0;
288
+
289
+ return s;
290
+ }
291
+
292
+ void init_memusage(void)
293
+ {
294
+ #ifdef USE_PTHREADS
295
+ pthread_once(&space_key_once, space_key_alloc);
296
+ #else
297
+ static int mem_inited = FALSE;
298
+ if (mem_inited) return;
299
+ mem_inited = TRUE;
300
+ #endif
301
+ do_init_memusage();
302
+ }
303
+
304
+ static inline space_t *getspace(void)
305
+ {
306
+ #ifdef USE_PTHREADS
307
+ space_t *s = pthread_getspecific(space_key);
308
+ if (s) return s;
309
+ return do_init_memusage();
310
+ #else
311
+ return &space;
312
+ #endif
313
+ }
314
+
315
+ /**
316
+ * space used but not yet freed during parse
317
+ */
318
+ size_t get_space_in_use(void)
319
+ {
320
+ return getspace()->space_in_use;
321
+ }
322
+
323
+ /**
324
+ * maximum space used during the parse
325
+ */
326
+ size_t get_max_space_used(void)
327
+ {
328
+ return getspace()->max_space_used;
329
+ }
330
+ #else /* TRACK_SPACE_USAGE */
331
+ void init_memusage(void) {}
332
+ size_t get_space_in_use(void) { return 0; }
333
+ size_t get_max_space_used(void) { return 0; }
334
+ #endif /* TRACK_SPACE_USAGE */
335
+
336
+ /**
337
+ * alloc some memory, and keep track of the space allocated.
338
+ */
339
+ void * xalloc(size_t size)
340
+ {
341
+ void * p = malloc(size);
342
+
343
+ #ifdef TRACK_SPACE_USAGE
344
+ space_t *s = getspace();
345
+ s->space_in_use += size;
346
+ if (s->max_space_used < s->space_in_use) s->max_space_used = s->space_in_use;
347
+ #endif /* TRACK_SPACE_USAGE */
348
+ if ((p == NULL) && (size != 0))
349
+ {
350
+ prt_error("Fatal Error: Ran out of space.\n");
351
+ abort();
352
+ exit(1);
353
+ }
354
+ return p;
355
+ }
356
+
357
+ void * xrealloc(void *p, size_t oldsize, size_t newsize)
358
+ {
359
+ #ifdef TRACK_SPACE_USAGE
360
+ space_t *s = getspace();
361
+ s->space_in_use -= oldsize;
362
+ #endif /* TRACK_SPACE_USAGE */
363
+ p = realloc(p, newsize);
364
+ if ((p == NULL) && (newsize != 0))
365
+ {
366
+ prt_error("Fatal Error: Ran out of space on realloc.\n");
367
+ abort();
368
+ exit(1);
369
+ }
370
+ #ifdef TRACK_SPACE_USAGE
371
+ s->space_in_use += newsize;
372
+ if (s->max_space_used < s->space_in_use) s->max_space_used = s->space_in_use;
373
+ #endif /* TRACK_SPACE_USAGE */
374
+ return p;
375
+ }
376
+
377
+ #ifdef TRACK_SPACE_USAGE
378
+ void xfree(void * p, size_t size)
379
+ {
380
+ getspace()->space_in_use -= size;
381
+ free(p);
382
+ }
383
+ #endif /* TRACK_SPACE_USAGE */
384
+
385
+ void * exalloc(size_t size)
386
+ {
387
+ void * p = malloc(size);
388
+ #ifdef TRACK_SPACE_USAGE
389
+ space_t *s = getspace();
390
+ s->external_space_in_use += size;
391
+ if (s->max_external_space_used < s->external_space_in_use)
392
+ s->max_external_space_used = s->external_space_in_use;
393
+ #endif /* TRACK_SPACE_USAGE */
394
+
395
+ if ((p == NULL) && (size != 0))
396
+ {
397
+ prt_error("Fatal Error: Ran out of space.\n");
398
+ abort();
399
+ exit(1);
400
+ }
401
+ return p;
402
+ }
403
+
404
+ #ifdef TRACK_SPACE_USAGE
405
+ void exfree(void * p, size_t size)
406
+ {
407
+ getspace()->external_space_in_use -= size;
408
+ free(p);
409
+ }
410
+ #endif /* TRACK_SPACE_USAGE */
411
+
412
+ /* =========================================================== */
413
+ /* File path and dictionary open routines below */
414
+
415
+ char * join_path(const char * prefix, const char * suffix)
416
+ {
417
+ char * path;
418
+ int path_len;
419
+
420
+ path_len = strlen(prefix) + 1 /* len(DIR_SEPARATOR) */ + strlen(suffix);
421
+ path = (char *) malloc(path_len + 1);
422
+
423
+ strcpy(path, prefix);
424
+ path[strlen(path)+1] = '\0';
425
+ path[strlen(path)] = DIR_SEPARATOR;
426
+ strcat(path, suffix);
427
+
428
+ return path;
429
+ }
430
+
431
+ #ifdef _WIN32
432
+ /* borrowed from glib */
433
+ /* Used only for Windows builds */
434
+ static char*
435
+ path_get_dirname (const char *file_name)
436
+ {
437
+ register char *base;
438
+ register int len;
439
+
440
+ base = strrchr (file_name, DIR_SEPARATOR);
441
+ #ifdef _WIN32
442
+ {
443
+ char *q = strrchr (file_name, '/');
444
+ if (base == NULL || (q != NULL && q > base))
445
+ base = q;
446
+ }
447
+ #endif
448
+ if (!base)
449
+ {
450
+ #ifdef _WIN32
451
+ if (is_utf8_alpha (file_name) && file_name[1] == ':')
452
+ {
453
+ char drive_colon_dot[4];
454
+
455
+ drive_colon_dot[0] = file_name[0];
456
+ drive_colon_dot[1] = ':';
457
+ drive_colon_dot[2] = '.';
458
+ drive_colon_dot[3] = '\0';
459
+
460
+ return safe_strdup (drive_colon_dot);
461
+ }
462
+ #endif
463
+ return safe_strdup (".");
464
+ }
465
+
466
+ while (base > file_name && IS_DIR_SEPARATOR (*base))
467
+ base--;
468
+
469
+ #ifdef _WIN32
470
+ /* base points to the char before the last slash.
471
+ *
472
+ * In case file_name is the root of a drive (X:\) or a child of the
473
+ * root of a drive (X:\foo), include the slash.
474
+ *
475
+ * In case file_name is the root share of an UNC path
476
+ * (\\server\share), add a slash, returning \\server\share\ .
477
+ *
478
+ * In case file_name is a direct child of a share in an UNC path
479
+ * (\\server\share\foo), include the slash after the share name,
480
+ * returning \\server\share\ .
481
+ */
482
+ if (base == file_name + 1 && is_utf8_alpha (file_name) && file_name[1] == ':')
483
+ base++;
484
+ else if (IS_DIR_SEPARATOR (file_name[0]) &&
485
+ IS_DIR_SEPARATOR (file_name[1]) &&
486
+ file_name[2] &&
487
+ !IS_DIR_SEPARATOR (file_name[2]) &&
488
+ base >= file_name + 2)
489
+ {
490
+ const char *p = file_name + 2;
491
+ while (*p && !IS_DIR_SEPARATOR (*p))
492
+ p++;
493
+ if (p == base + 1)
494
+ {
495
+ len = (int) strlen (file_name) + 1;
496
+ base = (char *)malloc(len + 1);
497
+ strcpy (base, file_name);
498
+ base[len-1] = DIR_SEPARATOR;
499
+ base[len] = 0;
500
+ return base;
501
+ }
502
+ if (IS_DIR_SEPARATOR (*p))
503
+ {
504
+ p++;
505
+ while (*p && !IS_DIR_SEPARATOR (*p))
506
+ p++;
507
+ if (p == base + 1)
508
+ base++;
509
+ }
510
+ }
511
+ #endif
512
+
513
+ len = (int) 1 + base - file_name;
514
+
515
+ base = (char *)malloc(len + 1);
516
+ memmove (base, file_name, len);
517
+ base[len] = 0;
518
+
519
+ return base;
520
+ }
521
+ #endif /* _WIN32 */
522
+
523
+ /* global - but thats OK, since this is set only during initialization,
524
+ * and is is thenceforth a read-only item. So it doesn't need to be
525
+ * locked.
526
+ */
527
+ static char * custom_data_dir = NULL;
528
+
529
+ void dictionary_set_data_dir(const char * path)
530
+ {
531
+ if (custom_data_dir) free (custom_data_dir);
532
+ custom_data_dir = safe_strdup(path);
533
+ }
534
+
535
+ char * dictionary_get_data_dir(void)
536
+ {
537
+ #ifdef _WIN32
538
+ HINSTANCE hInstance;
539
+ #endif
540
+ char * data_dir = NULL;
541
+
542
+ if (custom_data_dir != NULL) {
543
+ data_dir = safe_strdup(custom_data_dir);
544
+ return data_dir;
545
+ }
546
+
547
+ #ifdef ENABLE_BINRELOC
548
+ data_dir = safe_strdup (BR_DATADIR("/link-grammar"));
549
+ #elif defined(_WIN32)
550
+ /* Dynamically locate library and return containing directory */
551
+ hInstance = GetModuleHandle("link-grammar.dll");
552
+ if(hInstance != NULL)
553
+ {
554
+ char dll_path[MAX_PATH];
555
+
556
+ if(GetModuleFileName(hInstance,dll_path,MAX_PATH)) {
557
+ #ifdef _DEBUG
558
+ prt_error("Info: GetModuleFileName=%s\n", (dll_path?dll_path:"NULL"));
559
+ #endif
560
+ data_dir = path_get_dirname(dll_path);
561
+ }
562
+ }
563
+ #endif
564
+
565
+ return data_dir;
566
+ }
567
+
568
+ /**
569
+ * object_open() -- dictopen() - open a dictionary
570
+ *
571
+ * This function is used to open a dictionary file or a word file,
572
+ * or any associated data file (like a post process knowledge file).
573
+ *
574
+ * It works as follows. If the file name begins with a "/", then
575
+ * it's assumed to be an absolute file name and it tries to open
576
+ * that exact file.
577
+ *
578
+ * If the filename does not begin with a "/", then it uses the
579
+ * dictpath mechanism to find the right file to open. This looks
580
+ * for the file in a sequence of directories until it finds it. The
581
+ * sequence of directories is specified in a dictpath string, in
582
+ * which each directory is followed by a ":".
583
+ */
584
+ void * object_open(const char *filename,
585
+ void * (*opencb)(const char *, void *),
586
+ void * user_data)
587
+ {
588
+ char completename[MAX_PATH_NAME+1];
589
+ char fulldictpath[MAX_PATH_NAME+1];
590
+ static char prevpath[MAX_PATH_NAME+1] = "";
591
+ static int first_time_ever = 1;
592
+ char *pos, *oldpos;
593
+ int filenamelen, len;
594
+ void *fp;
595
+
596
+ /* Record the first path ever used, so that we can recycle it */
597
+ if (first_time_ever)
598
+ {
599
+ strncpy (prevpath, filename, MAX_PATH_NAME);
600
+ prevpath[MAX_PATH_NAME] = 0;
601
+ pos = strrchr(prevpath, DIR_SEPARATOR);
602
+ if (pos) *pos = 0;
603
+ pos = strrchr(prevpath, DIR_SEPARATOR);
604
+ if (pos) *(pos+1) = 0;
605
+ first_time_ever = 0;
606
+ }
607
+
608
+ /* Look for absolute filename.
609
+ * Unix: starts with leading slash.
610
+ * Windows: starts with C:\ except that the drive letter may differ.
611
+ */
612
+ if ((filename[0] == '/') || ((filename[1] == ':') && (filename[2] == '\\')))
613
+ {
614
+ /* fopen returns NULL if the file does not exist. */
615
+ fp = opencb(filename, user_data);
616
+ if (fp) return fp;
617
+ }
618
+
619
+ {
620
+ char * data_dir = dictionary_get_data_dir();
621
+ #ifdef _DEBUG
622
+ prt_error("Info: data_dir=%s\n", (data_dir?data_dir:"NULL"));
623
+ #endif
624
+ if (data_dir) {
625
+ snprintf(fulldictpath, MAX_PATH_NAME,
626
+ "%s%c%s%c", data_dir, PATH_SEPARATOR,
627
+ DEFAULTPATH, PATH_SEPARATOR);
628
+ free(data_dir);
629
+ }
630
+ else {
631
+ /* Always make sure that it ends with a path
632
+ * separator char for the below while() loop.
633
+ * For unix, this should look like:
634
+ * /usr/share/link-grammar:.:data:..:../data:
635
+ * For windows:
636
+ * C:\SOMWHERE;.;data;..;..\data;
637
+ */
638
+ snprintf(fulldictpath, MAX_PATH_NAME,
639
+ "%s%c%s%c%s%c%s%c%s%c%s%c%s%c",
640
+ prevpath, PATH_SEPARATOR,
641
+ DEFAULTPATH, PATH_SEPARATOR,
642
+ ".", PATH_SEPARATOR,
643
+ "data", PATH_SEPARATOR,
644
+ "..", PATH_SEPARATOR,
645
+ "..", DIR_SEPARATOR, "data", PATH_SEPARATOR);
646
+ }
647
+ }
648
+
649
+ /* Now fulldictpath is our dictpath, where each entry is
650
+ * followed by a ":" including the last one */
651
+
652
+ filenamelen = strlen(filename);
653
+ len = strlen(fulldictpath)+ filenamelen + 1 + 1;
654
+ oldpos = fulldictpath;
655
+ while ((pos = strchr(oldpos, PATH_SEPARATOR)) != NULL)
656
+ {
657
+ strncpy(completename, oldpos, (pos-oldpos));
658
+ *(completename+(pos-oldpos)) = DIR_SEPARATOR;
659
+ strcpy(completename+(pos-oldpos)+1,filename);
660
+ #ifdef _DEBUG
661
+ prt_error("Info: object_open() trying %s\n", completename);
662
+ #endif
663
+ if ((fp = opencb(completename, user_data)) != NULL) {
664
+ return fp;
665
+ }
666
+ oldpos = pos+1;
667
+ }
668
+ return NULL;
669
+ }
670
+
671
+ /* XXX static global variable used during dictionary open */
672
+ static char *path_found = NULL;
673
+
674
+ static void * dict_file_open(const char * fullname, void * user_data)
675
+ {
676
+ const char * how = (const char *) user_data;
677
+ FILE * fh = fopen(fullname, how);
678
+ if (fh && NULL == path_found)
679
+ {
680
+ path_found = strdup (fullname);
681
+ prt_error("Info: Dictionary found at %s\n", fullname);
682
+ }
683
+ return (void *) fh;
684
+ }
685
+
686
+ FILE *dictopen(const char *filename, const char *how)
687
+ {
688
+ FILE * fh = NULL;
689
+ void * ud = (void *) how;
690
+
691
+ /* If not the first time through, look for the other dictionaries
692
+ * in the *same* directory in which the first one was found.
693
+ * (The first one is typcailly "en/4.0.dict")
694
+ * The global "path_found" records where the first dict was found.
695
+ * The goal here is to avoid fractured install insanity.
696
+ */
697
+ if (path_found)
698
+ {
699
+ size_t sz = strlen (path_found) + strlen(filename) + 1;
700
+ char * fullname = (char *) malloc (sz);
701
+ strcpy(fullname, path_found);
702
+ strcat(fullname, filename);
703
+ fh = (FILE *) object_open(fullname, dict_file_open, ud);
704
+ free(fullname);
705
+ }
706
+ else
707
+ {
708
+ fh = (FILE *) object_open(filename, dict_file_open, ud);
709
+ if (path_found)
710
+ {
711
+ char * root = strstr(path_found, filename);
712
+ *root = 0;
713
+ }
714
+ }
715
+ return fh;
716
+ }
717
+
718
+ /* ======================================================== */
719
+ /* Locale routines */
720
+
721
+ #ifdef _WIN32
722
+
723
+ static char *
724
+ win32_getlocale (void)
725
+ {
726
+ LCID lcid;
727
+ LANGID langid;
728
+ char *ev;
729
+ int primary, sub;
730
+ char bfr[64];
731
+ char iso639[10];
732
+ char iso3166[10];
733
+ const char *script = NULL;
734
+
735
+ /* Let the user override the system settings through environment
736
+ * variables, as on POSIX systems. Note that in GTK+ applications
737
+ * since GTK+ 2.10.7 setting either LC_ALL or LANG also sets the
738
+ * Win32 locale and C library locale through code in gtkmain.c.
739
+ */
740
+ if (((ev = getenv ("LC_ALL")) != NULL && ev[0] != '\0')
741
+ || ((ev = getenv ("LC_MESSAGES")) != NULL && ev[0] != '\0')
742
+ || ((ev = getenv ("LANG")) != NULL && ev[0] != '\0'))
743
+ return safe_strdup (ev);
744
+
745
+ lcid = GetThreadLocale ();
746
+
747
+ if (!GetLocaleInfo (lcid, LOCALE_SISO639LANGNAME, iso639, sizeof (iso639)) ||
748
+ !GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso3166, sizeof (iso3166)))
749
+ return safe_strdup ("C");
750
+
751
+ /* Strip off the sorting rules, keep only the language part. */
752
+ langid = LANGIDFROMLCID (lcid);
753
+
754
+ /* Split into language and territory part. */
755
+ primary = PRIMARYLANGID (langid);
756
+ sub = SUBLANGID (langid);
757
+
758
+ /* Handle special cases */
759
+ switch (primary)
760
+ {
761
+ case LANG_AZERI:
762
+ switch (sub)
763
+ {
764
+ case SUBLANG_AZERI_LATIN:
765
+ script = "@Latn";
766
+ break;
767
+ case SUBLANG_AZERI_CYRILLIC:
768
+ script = "@Cyrl";
769
+ break;
770
+ }
771
+ break;
772
+ case LANG_SERBIAN: /* LANG_CROATIAN == LANG_SERBIAN */
773
+ switch (sub)
774
+ {
775
+ case SUBLANG_SERBIAN_LATIN:
776
+ case 0x06: /* Serbian (Latin) - Bosnia and Herzegovina */
777
+ script = "@Latn";
778
+ break;
779
+ }
780
+ break;
781
+ case LANG_UZBEK:
782
+ switch (sub)
783
+ {
784
+ case SUBLANG_UZBEK_LATIN:
785
+ script = "@Latn";
786
+ break;
787
+ case SUBLANG_UZBEK_CYRILLIC:
788
+ script = "@Cyrl";
789
+ break;
790
+ }
791
+ break;
792
+ }
793
+
794
+ strcat (bfr, iso639);
795
+ strcat (bfr, "_");
796
+ strcat (bfr, iso3166);
797
+
798
+ if (script)
799
+ strcat (bfr, script);
800
+
801
+ return safe_strdup (bfr);
802
+ }
803
+
804
+ #endif
805
+
806
+ char * get_default_locale(void)
807
+ {
808
+ char * locale, * needle;
809
+
810
+ locale = NULL;
811
+
812
+ #ifdef _WIN32
813
+ if(!locale)
814
+ locale = win32_getlocale ();
815
+ #endif
816
+
817
+ if(!locale)
818
+ locale = safe_strdup (getenv ("LANG"));
819
+
820
+ #if defined(HAVE_LC_MESSAGES)
821
+ if(!locale)
822
+ locale = safe_strdup (setlocale (LC_MESSAGES, NULL));
823
+ #endif
824
+
825
+ if(!locale)
826
+ locale = safe_strdup (setlocale (LC_ALL, NULL));
827
+
828
+ if(!locale || strcmp(locale, "C") == 0) {
829
+ free(locale);
830
+ locale = safe_strdup("en");
831
+ }
832
+
833
+ /* strip off "@euro" from en_GB@euro */
834
+ if ((needle = strchr (locale, '@')) != NULL)
835
+ *needle = '\0';
836
+
837
+ /* strip off ".UTF-8" from en_GB.UTF-8 */
838
+ if ((needle = strchr (locale, '.')) != NULL)
839
+ *needle = '\0';
840
+
841
+ /* strip off "_GB" from en_GB */
842
+ if ((needle = strchr (locale, '_')) != NULL)
843
+ *needle = '\0';
844
+
845
+ return locale;
846
+ }
847
+ /* ========================== END OF FILE =================== */