grammar_police 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (345) hide show
  1. data/.DS_Store +0 -0
  2. data/.gitignore +4 -0
  3. data/Gemfile +4 -0
  4. data/Rakefile +2 -0
  5. data/c/.DS_Store +0 -0
  6. data/c/link-grammar.c +65 -0
  7. data/c/link-grammar.h +60 -0
  8. data/c/link-grammar.o +0 -0
  9. data/c/link-grammar.so +0 -0
  10. data/c/link-grammar/.DS_Store +0 -0
  11. data/c/link-grammar/.deps/analyze-linkage.Plo +198 -0
  12. data/c/link-grammar/.deps/and.Plo +202 -0
  13. data/c/link-grammar/.deps/api.Plo +244 -0
  14. data/c/link-grammar/.deps/build-disjuncts.Plo +212 -0
  15. data/c/link-grammar/.deps/command-line.Plo +201 -0
  16. data/c/link-grammar/.deps/constituents.Plo +201 -0
  17. data/c/link-grammar/.deps/count.Plo +202 -0
  18. data/c/link-grammar/.deps/disjunct-utils.Plo +126 -0
  19. data/c/link-grammar/.deps/disjuncts.Plo +123 -0
  20. data/c/link-grammar/.deps/error.Plo +121 -0
  21. data/c/link-grammar/.deps/expand.Plo +133 -0
  22. data/c/link-grammar/.deps/extract-links.Plo +198 -0
  23. data/c/link-grammar/.deps/fast-match.Plo +200 -0
  24. data/c/link-grammar/.deps/idiom.Plo +200 -0
  25. data/c/link-grammar/.deps/jni-client.Plo +217 -0
  26. data/c/link-grammar/.deps/link-parser.Po +1 -0
  27. data/c/link-grammar/.deps/massage.Plo +202 -0
  28. data/c/link-grammar/.deps/post-process.Plo +202 -0
  29. data/c/link-grammar/.deps/pp_knowledge.Plo +202 -0
  30. data/c/link-grammar/.deps/pp_lexer.Plo +201 -0
  31. data/c/link-grammar/.deps/pp_linkset.Plo +200 -0
  32. data/c/link-grammar/.deps/prefix.Plo +102 -0
  33. data/c/link-grammar/.deps/preparation.Plo +202 -0
  34. data/c/link-grammar/.deps/print-util.Plo +200 -0
  35. data/c/link-grammar/.deps/print.Plo +201 -0
  36. data/c/link-grammar/.deps/prune.Plo +202 -0
  37. data/c/link-grammar/.deps/read-dict.Plo +223 -0
  38. data/c/link-grammar/.deps/read-regex.Plo +123 -0
  39. data/c/link-grammar/.deps/regex-morph.Plo +131 -0
  40. data/c/link-grammar/.deps/resources.Plo +203 -0
  41. data/c/link-grammar/.deps/spellcheck-aspell.Plo +1 -0
  42. data/c/link-grammar/.deps/spellcheck-hun.Plo +115 -0
  43. data/c/link-grammar/.deps/string-set.Plo +198 -0
  44. data/c/link-grammar/.deps/tokenize.Plo +160 -0
  45. data/c/link-grammar/.deps/utilities.Plo +222 -0
  46. data/c/link-grammar/.deps/word-file.Plo +201 -0
  47. data/c/link-grammar/.deps/word-utils.Plo +212 -0
  48. data/c/link-grammar/.libs/analyze-linkage.o +0 -0
  49. data/c/link-grammar/.libs/and.o +0 -0
  50. data/c/link-grammar/.libs/api.o +0 -0
  51. data/c/link-grammar/.libs/build-disjuncts.o +0 -0
  52. data/c/link-grammar/.libs/command-line.o +0 -0
  53. data/c/link-grammar/.libs/constituents.o +0 -0
  54. data/c/link-grammar/.libs/count.o +0 -0
  55. data/c/link-grammar/.libs/disjunct-utils.o +0 -0
  56. data/c/link-grammar/.libs/disjuncts.o +0 -0
  57. data/c/link-grammar/.libs/error.o +0 -0
  58. data/c/link-grammar/.libs/expand.o +0 -0
  59. data/c/link-grammar/.libs/extract-links.o +0 -0
  60. data/c/link-grammar/.libs/fast-match.o +0 -0
  61. data/c/link-grammar/.libs/idiom.o +0 -0
  62. data/c/link-grammar/.libs/jni-client.o +0 -0
  63. data/c/link-grammar/.libs/liblink-grammar-java-symbols.expsym +31 -0
  64. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib +0 -0
  65. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Info.plist +20 -0
  66. data/c/link-grammar/.libs/liblink-grammar-java.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar-java.4.dylib +0 -0
  67. data/c/link-grammar/.libs/liblink-grammar-java.a +0 -0
  68. data/c/link-grammar/.libs/liblink-grammar-java.dylib +0 -0
  69. data/c/link-grammar/.libs/liblink-grammar-symbols.expsym +194 -0
  70. data/c/link-grammar/.libs/liblink-grammar.4.dylib +0 -0
  71. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Info.plist +20 -0
  72. data/c/link-grammar/.libs/liblink-grammar.4.dylib.dSYM/Contents/Resources/DWARF/liblink-grammar.4.dylib +0 -0
  73. data/c/link-grammar/.libs/liblink-grammar.a +0 -0
  74. data/c/link-grammar/.libs/liblink-grammar.dylib +0 -0
  75. data/c/link-grammar/.libs/liblink-grammar.la +41 -0
  76. data/c/link-grammar/.libs/liblink-grammar.lai +41 -0
  77. data/c/link-grammar/.libs/massage.o +0 -0
  78. data/c/link-grammar/.libs/post-process.o +0 -0
  79. data/c/link-grammar/.libs/pp_knowledge.o +0 -0
  80. data/c/link-grammar/.libs/pp_lexer.o +0 -0
  81. data/c/link-grammar/.libs/pp_linkset.o +0 -0
  82. data/c/link-grammar/.libs/prefix.o +0 -0
  83. data/c/link-grammar/.libs/preparation.o +0 -0
  84. data/c/link-grammar/.libs/print-util.o +0 -0
  85. data/c/link-grammar/.libs/print.o +0 -0
  86. data/c/link-grammar/.libs/prune.o +0 -0
  87. data/c/link-grammar/.libs/read-dict.o +0 -0
  88. data/c/link-grammar/.libs/read-regex.o +0 -0
  89. data/c/link-grammar/.libs/regex-morph.o +0 -0
  90. data/c/link-grammar/.libs/resources.o +0 -0
  91. data/c/link-grammar/.libs/spellcheck-aspell.o +0 -0
  92. data/c/link-grammar/.libs/spellcheck-hun.o +0 -0
  93. data/c/link-grammar/.libs/string-set.o +0 -0
  94. data/c/link-grammar/.libs/tokenize.o +0 -0
  95. data/c/link-grammar/.libs/utilities.o +0 -0
  96. data/c/link-grammar/.libs/word-file.o +0 -0
  97. data/c/link-grammar/.libs/word-utils.o +0 -0
  98. data/c/link-grammar/Makefile +900 -0
  99. data/c/link-grammar/Makefile.am +202 -0
  100. data/c/link-grammar/Makefile.in +900 -0
  101. data/c/link-grammar/analyze-linkage.c +1317 -0
  102. data/c/link-grammar/analyze-linkage.h +24 -0
  103. data/c/link-grammar/and.c +1603 -0
  104. data/c/link-grammar/and.h +27 -0
  105. data/c/link-grammar/api-structures.h +362 -0
  106. data/c/link-grammar/api-types.h +72 -0
  107. data/c/link-grammar/api.c +1887 -0
  108. data/c/link-grammar/api.h +96 -0
  109. data/c/link-grammar/autoit/.DS_Store +0 -0
  110. data/c/link-grammar/autoit/README +10 -0
  111. data/c/link-grammar/autoit/_LGTest.au3 +22 -0
  112. data/c/link-grammar/autoit/_LinkGrammar.au3 +545 -0
  113. data/c/link-grammar/build-disjuncts.c +487 -0
  114. data/c/link-grammar/build-disjuncts.h +21 -0
  115. data/c/link-grammar/command-line.c +458 -0
  116. data/c/link-grammar/command-line.h +15 -0
  117. data/c/link-grammar/constituents.c +1836 -0
  118. data/c/link-grammar/constituents.h +26 -0
  119. data/c/link-grammar/corpus/.DS_Store +0 -0
  120. data/c/link-grammar/corpus/.deps/cluster.Plo +1 -0
  121. data/c/link-grammar/corpus/.deps/corpus.Plo +1 -0
  122. data/c/link-grammar/corpus/Makefile +527 -0
  123. data/c/link-grammar/corpus/Makefile.am +46 -0
  124. data/c/link-grammar/corpus/Makefile.in +527 -0
  125. data/c/link-grammar/corpus/README +17 -0
  126. data/c/link-grammar/corpus/cluster.c +286 -0
  127. data/c/link-grammar/corpus/cluster.h +32 -0
  128. data/c/link-grammar/corpus/corpus.c +483 -0
  129. data/c/link-grammar/corpus/corpus.h +46 -0
  130. data/c/link-grammar/count.c +828 -0
  131. data/c/link-grammar/count.h +25 -0
  132. data/c/link-grammar/disjunct-utils.c +261 -0
  133. data/c/link-grammar/disjunct-utils.h +27 -0
  134. data/c/link-grammar/disjuncts.c +138 -0
  135. data/c/link-grammar/disjuncts.h +13 -0
  136. data/c/link-grammar/error.c +92 -0
  137. data/c/link-grammar/error.h +35 -0
  138. data/c/link-grammar/expand.c +67 -0
  139. data/c/link-grammar/expand.h +13 -0
  140. data/c/link-grammar/externs.h +22 -0
  141. data/c/link-grammar/extract-links.c +625 -0
  142. data/c/link-grammar/extract-links.h +16 -0
  143. data/c/link-grammar/fast-match.c +309 -0
  144. data/c/link-grammar/fast-match.h +17 -0
  145. data/c/link-grammar/idiom.c +373 -0
  146. data/c/link-grammar/idiom.h +15 -0
  147. data/c/link-grammar/jni-client.c +779 -0
  148. data/c/link-grammar/jni-client.h +236 -0
  149. data/c/link-grammar/liblink-grammar-java.la +42 -0
  150. data/c/link-grammar/liblink-grammar.la +41 -0
  151. data/c/link-grammar/link-features.h +37 -0
  152. data/c/link-grammar/link-features.h.in +37 -0
  153. data/c/link-grammar/link-grammar-java.def +31 -0
  154. data/c/link-grammar/link-grammar.def +194 -0
  155. data/c/link-grammar/link-includes.h +465 -0
  156. data/c/link-grammar/link-parser.c +849 -0
  157. data/c/link-grammar/massage.c +329 -0
  158. data/c/link-grammar/massage.h +13 -0
  159. data/c/link-grammar/post-process.c +1113 -0
  160. data/c/link-grammar/post-process.h +45 -0
  161. data/c/link-grammar/pp_knowledge.c +376 -0
  162. data/c/link-grammar/pp_knowledge.h +14 -0
  163. data/c/link-grammar/pp_lexer.c +1920 -0
  164. data/c/link-grammar/pp_lexer.h +19 -0
  165. data/c/link-grammar/pp_linkset.c +158 -0
  166. data/c/link-grammar/pp_linkset.h +20 -0
  167. data/c/link-grammar/prefix.c +482 -0
  168. data/c/link-grammar/prefix.h +139 -0
  169. data/c/link-grammar/preparation.c +412 -0
  170. data/c/link-grammar/preparation.h +20 -0
  171. data/c/link-grammar/print-util.c +87 -0
  172. data/c/link-grammar/print-util.h +32 -0
  173. data/c/link-grammar/print.c +1085 -0
  174. data/c/link-grammar/print.h +16 -0
  175. data/c/link-grammar/prune.c +1864 -0
  176. data/c/link-grammar/prune.h +17 -0
  177. data/c/link-grammar/read-dict.c +1785 -0
  178. data/c/link-grammar/read-dict.h +29 -0
  179. data/c/link-grammar/read-regex.c +161 -0
  180. data/c/link-grammar/read-regex.h +12 -0
  181. data/c/link-grammar/regex-morph.c +126 -0
  182. data/c/link-grammar/regex-morph.h +17 -0
  183. data/c/link-grammar/resources.c +180 -0
  184. data/c/link-grammar/resources.h +23 -0
  185. data/c/link-grammar/sat-solver/.DS_Store +0 -0
  186. data/c/link-grammar/sat-solver/.deps/fast-sprintf.Plo +1 -0
  187. data/c/link-grammar/sat-solver/.deps/sat-encoder.Plo +1 -0
  188. data/c/link-grammar/sat-solver/.deps/util.Plo +1 -0
  189. data/c/link-grammar/sat-solver/.deps/variables.Plo +1 -0
  190. data/c/link-grammar/sat-solver/.deps/word-tag.Plo +1 -0
  191. data/c/link-grammar/sat-solver/Makefile +527 -0
  192. data/c/link-grammar/sat-solver/Makefile.am +29 -0
  193. data/c/link-grammar/sat-solver/Makefile.in +527 -0
  194. data/c/link-grammar/sat-solver/clock.hpp +33 -0
  195. data/c/link-grammar/sat-solver/fast-sprintf.cpp +26 -0
  196. data/c/link-grammar/sat-solver/fast-sprintf.hpp +7 -0
  197. data/c/link-grammar/sat-solver/guiding.hpp +244 -0
  198. data/c/link-grammar/sat-solver/matrix-ut.hpp +79 -0
  199. data/c/link-grammar/sat-solver/sat-encoder.cpp +2811 -0
  200. data/c/link-grammar/sat-solver/sat-encoder.h +11 -0
  201. data/c/link-grammar/sat-solver/sat-encoder.hpp +381 -0
  202. data/c/link-grammar/sat-solver/trie.hpp +118 -0
  203. data/c/link-grammar/sat-solver/util.cpp +23 -0
  204. data/c/link-grammar/sat-solver/util.hpp +14 -0
  205. data/c/link-grammar/sat-solver/variables.cpp +5 -0
  206. data/c/link-grammar/sat-solver/variables.hpp +829 -0
  207. data/c/link-grammar/sat-solver/word-tag.cpp +159 -0
  208. data/c/link-grammar/sat-solver/word-tag.hpp +162 -0
  209. data/c/link-grammar/spellcheck-aspell.c +148 -0
  210. data/c/link-grammar/spellcheck-hun.c +136 -0
  211. data/c/link-grammar/spellcheck.h +34 -0
  212. data/c/link-grammar/string-set.c +169 -0
  213. data/c/link-grammar/string-set.h +16 -0
  214. data/c/link-grammar/structures.h +498 -0
  215. data/c/link-grammar/tokenize.c +1049 -0
  216. data/c/link-grammar/tokenize.h +15 -0
  217. data/c/link-grammar/utilities.c +847 -0
  218. data/c/link-grammar/utilities.h +281 -0
  219. data/c/link-grammar/word-file.c +124 -0
  220. data/c/link-grammar/word-file.h +15 -0
  221. data/c/link-grammar/word-utils.c +526 -0
  222. data/c/link-grammar/word-utils.h +152 -0
  223. data/data/.DS_Store +0 -0
  224. data/data/Makefile +511 -0
  225. data/data/Makefile.am +4 -0
  226. data/data/Makefile.in +511 -0
  227. data/data/de/.DS_Store +0 -0
  228. data/data/de/4.0.affix +7 -0
  229. data/data/de/4.0.dict +474 -0
  230. data/data/de/Makefile +387 -0
  231. data/data/de/Makefile.am +9 -0
  232. data/data/de/Makefile.in +387 -0
  233. data/data/en/.DS_Store +0 -0
  234. data/data/en/4.0.affix +26 -0
  235. data/data/en/4.0.batch +1002 -0
  236. data/data/en/4.0.biolg.batch +411 -0
  237. data/data/en/4.0.constituent-knowledge +127 -0
  238. data/data/en/4.0.dict +8759 -0
  239. data/data/en/4.0.dict.m4 +6928 -0
  240. data/data/en/4.0.enwiki.batch +14 -0
  241. data/data/en/4.0.fixes.batch +2776 -0
  242. data/data/en/4.0.knowledge +306 -0
  243. data/data/en/4.0.regex +225 -0
  244. data/data/en/4.0.voa.batch +114 -0
  245. data/data/en/Makefile +554 -0
  246. data/data/en/Makefile.am +19 -0
  247. data/data/en/Makefile.in +554 -0
  248. data/data/en/README +173 -0
  249. data/data/en/tiny.dict +157 -0
  250. data/data/en/words/.DS_Store +0 -0
  251. data/data/en/words/Makefile +456 -0
  252. data/data/en/words/Makefile.am +78 -0
  253. data/data/en/words/Makefile.in +456 -0
  254. data/data/en/words/currency +205 -0
  255. data/data/en/words/currency.p +28 -0
  256. data/data/en/words/entities.given-bisex.sing +39 -0
  257. data/data/en/words/entities.given-female.sing +4141 -0
  258. data/data/en/words/entities.given-male.sing +1633 -0
  259. data/data/en/words/entities.locations.sing +68 -0
  260. data/data/en/words/entities.national.sing +253 -0
  261. data/data/en/words/entities.organizations.sing +7 -0
  262. data/data/en/words/entities.us-states.sing +11 -0
  263. data/data/en/words/units.1 +45 -0
  264. data/data/en/words/units.1.dot +4 -0
  265. data/data/en/words/units.3 +2 -0
  266. data/data/en/words/units.4 +5 -0
  267. data/data/en/words/units.4.dot +1 -0
  268. data/data/en/words/words-medical.adv.1 +1191 -0
  269. data/data/en/words/words-medical.prep.1 +67 -0
  270. data/data/en/words/words-medical.v.4.1 +2835 -0
  271. data/data/en/words/words-medical.v.4.2 +2848 -0
  272. data/data/en/words/words-medical.v.4.3 +3011 -0
  273. data/data/en/words/words-medical.v.4.4 +3036 -0
  274. data/data/en/words/words-medical.v.4.5 +3050 -0
  275. data/data/en/words/words.adj.1 +6794 -0
  276. data/data/en/words/words.adj.2 +638 -0
  277. data/data/en/words/words.adj.3 +667 -0
  278. data/data/en/words/words.adv.1 +1573 -0
  279. data/data/en/words/words.adv.2 +67 -0
  280. data/data/en/words/words.adv.3 +157 -0
  281. data/data/en/words/words.adv.4 +80 -0
  282. data/data/en/words/words.n.1 +11464 -0
  283. data/data/en/words/words.n.1.wiki +264 -0
  284. data/data/en/words/words.n.2.s +2017 -0
  285. data/data/en/words/words.n.2.s.biolg +1 -0
  286. data/data/en/words/words.n.2.s.wiki +298 -0
  287. data/data/en/words/words.n.2.x +65 -0
  288. data/data/en/words/words.n.2.x.wiki +10 -0
  289. data/data/en/words/words.n.3 +5717 -0
  290. data/data/en/words/words.n.t +23 -0
  291. data/data/en/words/words.v.1.1 +1038 -0
  292. data/data/en/words/words.v.1.2 +1043 -0
  293. data/data/en/words/words.v.1.3 +1052 -0
  294. data/data/en/words/words.v.1.4 +1023 -0
  295. data/data/en/words/words.v.1.p +17 -0
  296. data/data/en/words/words.v.10.1 +14 -0
  297. data/data/en/words/words.v.10.2 +15 -0
  298. data/data/en/words/words.v.10.3 +88 -0
  299. data/data/en/words/words.v.10.4 +17 -0
  300. data/data/en/words/words.v.2.1 +1253 -0
  301. data/data/en/words/words.v.2.2 +1304 -0
  302. data/data/en/words/words.v.2.3 +1280 -0
  303. data/data/en/words/words.v.2.4 +1285 -0
  304. data/data/en/words/words.v.2.5 +1287 -0
  305. data/data/en/words/words.v.4.1 +2472 -0
  306. data/data/en/words/words.v.4.2 +2487 -0
  307. data/data/en/words/words.v.4.3 +2441 -0
  308. data/data/en/words/words.v.4.4 +2478 -0
  309. data/data/en/words/words.v.4.5 +2483 -0
  310. data/data/en/words/words.v.5.1 +98 -0
  311. data/data/en/words/words.v.5.2 +98 -0
  312. data/data/en/words/words.v.5.3 +103 -0
  313. data/data/en/words/words.v.5.4 +102 -0
  314. data/data/en/words/words.v.6.1 +388 -0
  315. data/data/en/words/words.v.6.2 +401 -0
  316. data/data/en/words/words.v.6.3 +397 -0
  317. data/data/en/words/words.v.6.4 +405 -0
  318. data/data/en/words/words.v.6.5 +401 -0
  319. data/data/en/words/words.v.8.1 +117 -0
  320. data/data/en/words/words.v.8.2 +118 -0
  321. data/data/en/words/words.v.8.3 +118 -0
  322. data/data/en/words/words.v.8.4 +119 -0
  323. data/data/en/words/words.v.8.5 +119 -0
  324. data/data/en/words/words.y +104 -0
  325. data/data/lt/.DS_Store +0 -0
  326. data/data/lt/4.0.affix +6 -0
  327. data/data/lt/4.0.constituent-knowledge +24 -0
  328. data/data/lt/4.0.dict +135 -0
  329. data/data/lt/4.0.knowledge +38 -0
  330. data/data/lt/Makefile +389 -0
  331. data/data/lt/Makefile.am +11 -0
  332. data/data/lt/Makefile.in +389 -0
  333. data/grammar_police.gemspec +23 -0
  334. data/lib/.DS_Store +0 -0
  335. data/lib/grammar_police.rb +11 -0
  336. data/lib/grammar_police/.DS_Store +0 -0
  337. data/lib/grammar_police/dictionary.rb +30 -0
  338. data/lib/grammar_police/linkage.rb +26 -0
  339. data/lib/grammar_police/parse_options.rb +32 -0
  340. data/lib/grammar_police/sentence.rb +44 -0
  341. data/lib/grammar_police/version.rb +3 -0
  342. data/tests/.DS_Store +0 -0
  343. data/tests/count_linkages.rb +29 -0
  344. data/tests/sentences.txt +86 -0
  345. metadata +408 -0
@@ -0,0 +1,29 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ int read_dictionary(Dictionary dict);
15
+ void dict_display_word_info(Dictionary dict, const char * s);
16
+ void dict_display_word_expr(Dictionary dict, const char * s);
17
+ void print_dictionary_data(Dictionary dict);
18
+ void print_dictionary_words(Dictionary dict);
19
+ void print_expression(Exp *);
20
+ int boolean_dictionary_lookup(Dictionary dict, const char *);
21
+ int delete_dictionary_words(Dictionary dict, const char *);
22
+
23
+ Dict_node * dictionary_lookup_list(Dictionary dict, const char *);
24
+ Dict_node * abridged_lookup_list(Dictionary dict, const char *);
25
+ void free_lookup_list(Dict_node *);
26
+
27
+ Dict_node * insert_dict(Dictionary dict, Dict_node * n, Dict_node * newnode);
28
+ void free_dictionary(Dictionary dict);
29
+ Exp * Exp_create(Dictionary dict);
@@ -0,0 +1,161 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2005 Sampo Pyysalo */
3
+ /* */
4
+ /* Use of the link grammar parsing system is subject to the terms of the */
5
+ /* license set forth in the LICENSE file included with this software, */
6
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
7
+ /* This license allows free redistribution and use in source and binary */
8
+ /* forms, with or without modification, subject to certain conditions. */
9
+ /* */
10
+ /*************************************************************************/
11
+
12
+ #include <string.h>
13
+ #include "link-includes.h"
14
+ #include "api-structures.h"
15
+ #include "structures.h"
16
+ #include "read-regex.h"
17
+
18
+ /*
19
+ Function for reading regular expression name:pattern combinations
20
+ into the Dictionary from a given file.
21
+
22
+ The format of the regex file is as follows:
23
+
24
+ Lines starting with "%" are comments and are ignored.
25
+ All other nonempty lines must follow the following format:
26
+
27
+ REGEX_NAME: /pattern/
28
+
29
+ here REGEX_NAME is an identifying unique name for the regex.
30
+ This name is used to determine the disjuncts that will be assigned to
31
+ tokens matching the pattern, so in the dictionary file (e.g. 4.0.dict)
32
+ you must have something like
33
+
34
+ REGEX_NAME: (({@MX+} & (JG- or <noun-main-s>)) or YS+)) or AN+ or G+);
35
+
36
+ using the same name. The pattern itself must be surrounded by slashes.
37
+ Extra whitespace is ignored.
38
+ */
39
+
40
+ #define MAX_REGEX_NAME_LENGTH 50
41
+ #define MAX_REGEX_LENGTH 255
42
+
43
+ int read_regex_file(Dictionary dict, const char *file_name)
44
+ {
45
+ Regex_node **tail = &dict->regex_root; /* Last Regex_node * in list */
46
+ Regex_node *new_re;
47
+ char name[MAX_REGEX_NAME_LENGTH];
48
+ char regex[MAX_REGEX_LENGTH];
49
+ int c,prev,i,line=1;
50
+ FILE *fp;
51
+
52
+ fp = dictopen(file_name, "r");
53
+ if (fp == NULL)
54
+ {
55
+ prt_error("Error: cannot open regex file %s\n", file_name);
56
+ return 1;
57
+ }
58
+
59
+ /* read in regexs. loop broken on EOF. */
60
+ while (1)
61
+ {
62
+ /* skip whitespace and comments. */
63
+ do
64
+ {
65
+ do
66
+ {
67
+ c = fgetc(fp);
68
+ if (c == '\n') { line++; }
69
+ }
70
+ while(isspace(c));
71
+
72
+ if (c == '%')
73
+ {
74
+ while ((c != EOF) && (c != '\n')) { c = fgetc(fp); }
75
+ line++;
76
+ }
77
+ }
78
+ while(isspace(c));
79
+
80
+ if (c == EOF) { break; } /* done. */
81
+
82
+ /* read in the name of the regex. */
83
+ i = 0;
84
+ do
85
+ {
86
+ if (i > MAX_REGEX_NAME_LENGTH-1)
87
+ {
88
+ prt_error("Error: Regex name too long on line %d\n", line);
89
+ goto failure;
90
+ }
91
+ name[i++] = c;
92
+ c = fgetc(fp);
93
+ }
94
+ while ((!isspace(c)) && (c != ':') && (c != EOF));
95
+ name[i] = '\0';
96
+
97
+ /* Skip possible whitespace after name, expect colon. */
98
+ while (isspace(c))
99
+ {
100
+ if (c == '\n') { line++; }
101
+ c = fgetc(fp);
102
+ }
103
+ if (c != ':')
104
+ {
105
+ prt_error("Error: Regex missing colon on line %d\n", line);
106
+ goto failure;
107
+ }
108
+
109
+ /* Skip whitespace after colon, expect slash. */
110
+ do
111
+ {
112
+ if (c == '\n') { line++; }
113
+ c = fgetc(fp);
114
+ }
115
+ while (isspace(c));
116
+ if (c != '/') {
117
+ prt_error("Error: Regex missing leading slash on line %d\n", line);
118
+ goto failure;
119
+ }
120
+
121
+ /* Read in the regex. */
122
+ prev = 0;
123
+ i = 0;
124
+ do
125
+ {
126
+ if (i > MAX_REGEX_LENGTH-1)
127
+ {
128
+ prt_error("Error: Regex too long on line %d\n", line);
129
+ goto failure;
130
+ }
131
+ prev = c;
132
+ c = fgetc(fp);
133
+ regex[i++] = c;
134
+ }
135
+ while ((c != '/' || prev == '\\') && (c != EOF));
136
+ regex[i-1] = '\0';
137
+
138
+ /* Expect termination by a slash. */
139
+ if (c != '/')
140
+ {
141
+ prt_error("Error: Regex missing trailing slash on line %d\n", line);
142
+ goto failure;
143
+ }
144
+
145
+ /* Create new Regex_node and add to dict list. */
146
+ new_re = (Regex_node *) malloc(sizeof(Regex_node));
147
+ new_re->name = strdup(name);
148
+ new_re->pattern = strdup(regex);
149
+ new_re->re = NULL;
150
+ new_re->next = NULL;
151
+ *tail = new_re;
152
+ tail = &new_re->next;
153
+ }
154
+
155
+ fclose(fp);
156
+ return 0;
157
+ failure:
158
+ fclose(fp);
159
+ return 1;
160
+ }
161
+
@@ -0,0 +1,12 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2005 Sampo Pyysalo */
3
+ /* */
4
+ /* Use of the link grammar parsing system is subject to the terms of the */
5
+ /* license set forth in the LICENSE file included with this software, */
6
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
7
+ /* This license allows free redistribution and use in source and binary */
8
+ /* forms, with or without modification, subject to certain conditions. */
9
+ /* */
10
+ /*************************************************************************/
11
+
12
+ int read_regex_file(Dictionary dict, const char *file_name);
@@ -0,0 +1,126 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2005 Sampo Pyysalo */
3
+ /* All rights reserved */
4
+ /* */
5
+ /* Use of the link grammar parsing system is subject to the terms of the */
6
+ /* license set forth in the LICENSE file included with this software, */
7
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
8
+ /* This license allows free redistribution and use in source and binary */
9
+ /* forms, with or without modification, subject to certain conditions. */
10
+ /* */
11
+ /*************************************************************************/
12
+
13
+ /* On MS Windows, regex.h fails to pull in size_t, so work around this by
14
+ * including <stddef.h> before <regex.h> (<sys/types.h> is not enough) */
15
+ #include <stddef.h>
16
+ #include <regex.h>
17
+ #include "api-structures.h"
18
+ #include "link-includes.h"
19
+ #include "read-dict.h"
20
+ #include "regex-morph.h"
21
+ #include "structures.h"
22
+
23
+ /**
24
+ * Support for the regular-expression based token matching system
25
+ * using standard POSIX regex.
26
+ */
27
+
28
+ /* Compiles all the regexs in the Dictionary. Returns 0 on success,
29
+ * else an error code.
30
+ */
31
+ int compile_regexs(Dictionary dict)
32
+ {
33
+ regex_t *preg;
34
+ int rc;
35
+
36
+ Regex_node *re = dict->regex_root;
37
+ while (re != NULL)
38
+ {
39
+ /* If re->re non-null, assume compiled already. */
40
+ if(re->re == NULL)
41
+ {
42
+ /* Compile with default options (0) and default character
43
+ * tables (NULL). */
44
+ /* re->re = pcre_compile(re->pattern, 0, &error, &erroroffset, NULL); */
45
+ preg = (regex_t *) malloc (sizeof(regex_t));
46
+ re->re = preg;
47
+ rc = regcomp(preg, re->pattern, REG_EXTENDED);
48
+ if (rc)
49
+ {
50
+ /*
51
+ prt_error("Error: Failed to compile regex '%s' (%s) at %d: %s\n",
52
+ re->pattern, re->name, erroroffset, error);
53
+ */
54
+ prt_error("Error: Failed to compile regex '%s' (%s)\n",
55
+ re->pattern, re->name);
56
+ return rc;
57
+ }
58
+
59
+ /* Check that the regex name is defined in the dictionary. */
60
+ if (!boolean_dictionary_lookup(dict, re->name))
61
+ {
62
+ /* TODO: better error handing. Maybe remove the regex? */
63
+ prt_error("Error: Regex name %s not found in dictionary!\n",
64
+ re->name);
65
+ }
66
+ }
67
+ re = re->next;
68
+ }
69
+ return 0;
70
+ }
71
+
72
+ /**
73
+ * Tries to match each regex in turn to word s.
74
+ * On match, returns the name of the first matching regex.
75
+ * If no match is found, returns NULL.
76
+ */
77
+ const char *match_regex(Dictionary dict, const char *s)
78
+ {
79
+ int rc;
80
+
81
+ Regex_node *re = dict->regex_root;
82
+ while (re != NULL)
83
+ {
84
+ if (re->re == NULL)
85
+ {
86
+ /* Re not compiled; if this happens, it's likely an
87
+ * internal error, but nevermind for now. */
88
+ continue;
89
+ }
90
+ /* Try to match with no extra data (NULL), whole str (0 to strlen(s)),
91
+ * and default options (second 0). */
92
+ /* int rc = pcre_exec(re->re, NULL, s, strlen(s), 0,
93
+ * 0, ovector, PCRE_OVEC_SIZE); */
94
+
95
+ rc = regexec((regex_t*) re->re, s, 0, NULL, 0);
96
+ if (0 == rc)
97
+ {
98
+ return re->name; /* match found. just return--no multiple matches. */
99
+ }
100
+ else if (rc != REG_NOMATCH)
101
+ {
102
+ /* We have an error. TODO: more appropriate error handling.*/
103
+ fprintf(stderr,"Regex matching error %d occurred!\n", rc);
104
+ }
105
+ re = re->next;
106
+ }
107
+ return NULL; /* no matches. */
108
+ }
109
+
110
+ /**
111
+ * Delete associated storage
112
+ */
113
+ void free_regexs(Dictionary dict)
114
+ {
115
+ Regex_node *re = dict->regex_root;
116
+ while (re != NULL)
117
+ {
118
+ Regex_node *next = re->next;
119
+ regfree((regex_t *)re->re);
120
+ free(re->re);
121
+ free(re->name);
122
+ free(re->pattern);
123
+ free(re);
124
+ re = next;
125
+ }
126
+ }
@@ -0,0 +1,17 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2005 Sampo Pyysalo */
3
+ /* All rights reserved */
4
+ /* */
5
+ /* Use of the link grammar parsing system is subject to the terms of the */
6
+ /* license set forth in the LICENSE file included with this software, */
7
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
8
+ /* This license allows free redistribution and use in source and binary */
9
+ /* forms, with or without modification, subject to certain conditions. */
10
+ /* */
11
+ /*************************************************************************/
12
+
13
+ #include "api-structures.h"
14
+
15
+ int compile_regexs(Dictionary);
16
+ const char *match_regex(Dictionary, const char *);
17
+ void free_regexs(Dictionary dict);
@@ -0,0 +1,180 @@
1
+ /*************************************************************************/
2
+ /* Copyright (c) 2004 */
3
+ /* Daniel Sleator, David Temperley, and John Lafferty */
4
+ /* All rights reserved */
5
+ /* */
6
+ /* Use of the link grammar parsing system is subject to the terms of the */
7
+ /* license set forth in the LICENSE file included with this software, */
8
+ /* and also available at http://www.link.cs.cmu.edu/link/license.html */
9
+ /* This license allows free redistribution and use in source and binary */
10
+ /* forms, with or without modification, subject to certain conditions. */
11
+ /* */
12
+ /*************************************************************************/
13
+
14
+ #include "api.h"
15
+ #include "api.c"
16
+
17
+ #include <time.h>
18
+
19
+ #if !defined(_WIN32)
20
+ #include <sys/time.h>
21
+ #include <sys/resource.h>
22
+ #endif
23
+
24
+ #if defined(__linux__)
25
+ /* based on reading the man page for getrusage on linux, I inferred that
26
+ I needed to include this. However it doesn't seem to be necessary */
27
+ #include <unistd.h>
28
+ #endif
29
+
30
+ #if defined(__hpux__)
31
+ #include <sys/syscall.h>
32
+ int syscall(int, int, struct rusage *rusage); /* can't find
33
+ the prototype for this */
34
+ #define getrusage(a, b) syscall(SYS_GETRUSAGE, (a), (b))
35
+ #endif /* __hpux__ */
36
+
37
+ #if defined(__sun__)
38
+ int getrusage(int who, struct rusage *rusage);
39
+ /* Declaration missing from sys/resource.h in sun operating systems (?) */
40
+ #endif /* __sun__ */
41
+
42
+ #define MAX_PARSE_TIME_UNLIMITED -1
43
+ #define MAX_MEMORY_UNLIMITED ((size_t) -1)
44
+
45
+ /** returns the current usage time clock in seconds */
46
+ static double current_usage_time(void)
47
+ {
48
+ #if !defined(_WIN32)
49
+ struct rusage u;
50
+ getrusage (RUSAGE_SELF, &u);
51
+ return (u.ru_utime.tv_sec + ((double) u.ru_utime.tv_usec) / 1000000.0);
52
+ #else
53
+ return ((double) clock())/CLOCKS_PER_SEC;
54
+ #endif
55
+ }
56
+
57
+ Resources resources_create(void)
58
+ {
59
+ Resources r;
60
+
61
+ r = (Resources) xalloc(sizeof(struct Resources_s));
62
+ r->max_parse_time = MAX_PARSE_TIME_UNLIMITED;
63
+ r->when_created = current_usage_time();
64
+ r->when_last_called = current_usage_time();
65
+ r->time_when_parse_started = current_usage_time();
66
+ r->space_when_parse_started = get_space_in_use();
67
+ r->max_memory = MAX_MEMORY_UNLIMITED;
68
+ r->cumulative_time = 0;
69
+ r->memory_exhausted = FALSE;
70
+ r->timer_expired = FALSE;
71
+
72
+ return r;
73
+ }
74
+
75
+ void resources_delete(Resources r)
76
+ {
77
+ xfree(r, sizeof(struct Resources_s));
78
+ }
79
+
80
+ void resources_reset(Resources r)
81
+ {
82
+ r->when_last_called = r->time_when_parse_started = current_usage_time();
83
+ r->space_when_parse_started = get_space_in_use();
84
+ r->timer_expired = FALSE;
85
+ r->memory_exhausted = FALSE;
86
+ }
87
+
88
+ #if 0
89
+ static void resources_reset_time(Resources r)
90
+ {
91
+ r->when_last_called = r->time_when_parse_started = current_usage_time();
92
+ }
93
+ #endif
94
+
95
+ void resources_reset_space(Resources r)
96
+ {
97
+ r->space_when_parse_started = get_space_in_use();
98
+ }
99
+
100
+ int resources_exhausted(Resources r)
101
+ {
102
+ if (resources_timer_expired(r)) {
103
+ r->timer_expired = TRUE;
104
+ }
105
+ if (resources_memory_exhausted(r)) {
106
+ r->memory_exhausted = TRUE;
107
+ }
108
+ return (r->timer_expired || r->memory_exhausted);
109
+ }
110
+
111
+ int resources_timer_expired(Resources r)
112
+ {
113
+ if (r->max_parse_time == MAX_PARSE_TIME_UNLIMITED) return 0;
114
+ else return (r->timer_expired ||
115
+ (current_usage_time() - r->time_when_parse_started > r->max_parse_time));
116
+ }
117
+
118
+ int resources_memory_exhausted(Resources r)
119
+ {
120
+ if (r->max_memory == MAX_MEMORY_UNLIMITED) return 0;
121
+ else return (r->memory_exhausted || (get_space_in_use() > r->max_memory));
122
+ }
123
+
124
+ /** print out the cpu ticks since this was last called */
125
+ static void resources_print_time(int verbosity, Resources r, const char * s)
126
+ {
127
+ double new_t;
128
+ new_t = current_usage_time();
129
+ if (verbosity > 1) {
130
+ printf("++++");
131
+ left_print_string(stdout, s,
132
+ " ");
133
+ printf("%7.2f seconds\n", new_t - r->when_last_called);
134
+ }
135
+ r->when_last_called = new_t;
136
+ }
137
+
138
+ /** print out the cpu ticks since this was last called */
139
+ static void resources_print_total_time(int verbosity, Resources r)
140
+ {
141
+ double new_t;
142
+ new_t = current_usage_time();
143
+ r->cumulative_time += (new_t - r->time_when_parse_started) ;
144
+ if (verbosity > 0) {
145
+ printf("++++");
146
+ left_print_string(stdout, "Time",
147
+ " ");
148
+ printf("%7.2f seconds (%.2f total)\n",
149
+ new_t - r->time_when_parse_started, r->cumulative_time);
150
+ }
151
+ r->time_when_parse_started = new_t;
152
+ }
153
+
154
+ static void resources_print_total_space(int verbosity, Resources r)
155
+ {
156
+ if (verbosity > 1) {
157
+ printf("++++");
158
+ left_print_string(stdout, "Total space",
159
+ " ");
160
+ printf("%lu bytes (%lu max)\n",
161
+ (long unsigned int) get_space_in_use(),
162
+ (long unsigned int) get_max_space_used());
163
+ }
164
+ }
165
+
166
+ void print_time(Parse_Options opts, const char * s)
167
+ {
168
+ resources_print_time(opts->verbosity, opts->resources, s);
169
+ }
170
+
171
+ void parse_options_print_total_time(Parse_Options opts)
172
+ {
173
+ resources_print_total_time(opts->verbosity, opts->resources);
174
+ }
175
+
176
+ void print_total_space(Parse_Options opts)
177
+ {
178
+ resources_print_total_space(opts->verbosity, opts->resources);
179
+ }
180
+