gtl-parsley-ruby 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. data/CHANGELOG +3 -0
  2. data/README +32 -0
  3. data/Rakefile +57 -0
  4. data/VERSION +1 -0
  5. data/ext/cparsley.c +152 -0
  6. data/ext/extconf.rb +82 -0
  7. data/ext/parsley/.gitignore +32 -0
  8. data/ext/parsley/AUTHORS +1 -0
  9. data/ext/parsley/ChangeLog +0 -0
  10. data/ext/parsley/HACKING +4 -0
  11. data/ext/parsley/INSTALL +73 -0
  12. data/ext/parsley/INTRO +84 -0
  13. data/ext/parsley/Makefile.am +80 -0
  14. data/ext/parsley/Makefile.in +1009 -0
  15. data/ext/parsley/NEWS +0 -0
  16. data/ext/parsley/PAPER +36 -0
  17. data/ext/parsley/Portfile +18 -0
  18. data/ext/parsley/Portfile.in +17 -0
  19. data/ext/parsley/README.C-LANG +92 -0
  20. data/ext/parsley/README.markdown +1 -0
  21. data/ext/parsley/TODO +39 -0
  22. data/ext/parsley/VERSION +1 -0
  23. data/ext/parsley/aclocal.m4 +8918 -0
  24. data/ext/parsley/bootstrap.sh +6 -0
  25. data/ext/parsley/config.guess +1561 -0
  26. data/ext/parsley/config.sub +1686 -0
  27. data/ext/parsley/configure +13437 -0
  28. data/ext/parsley/configure.ac +46 -0
  29. data/ext/parsley/depcomp +630 -0
  30. data/ext/parsley/functions.c +368 -0
  31. data/ext/parsley/functions.h +19 -0
  32. data/ext/parsley/generate_bisect.sh +12 -0
  33. data/ext/parsley/hooks/prepare-commit-msg +16 -0
  34. data/ext/parsley/install-sh +520 -0
  35. data/ext/parsley/json-c-0.9/AUTHORS +2 -0
  36. data/ext/parsley/json-c-0.9/COPYING +19 -0
  37. data/ext/parsley/json-c-0.9/ChangeLog +103 -0
  38. data/ext/parsley/json-c-0.9/INSTALL +302 -0
  39. data/ext/parsley/json-c-0.9/Makefile.am +43 -0
  40. data/ext/parsley/json-c-0.9/Makefile.in +800 -0
  41. data/ext/parsley/json-c-0.9/NEWS +1 -0
  42. data/ext/parsley/json-c-0.9/README +20 -0
  43. data/ext/parsley/json-c-0.9/README-WIN32.html +57 -0
  44. data/ext/parsley/json-c-0.9/README.html +32 -0
  45. data/ext/parsley/json-c-0.9/aclocal.m4 +8909 -0
  46. data/ext/parsley/json-c-0.9/arraylist.c +94 -0
  47. data/ext/parsley/json-c-0.9/arraylist.h +53 -0
  48. data/ext/parsley/json-c-0.9/bits.h +27 -0
  49. data/ext/parsley/json-c-0.9/config.guess +1561 -0
  50. data/ext/parsley/json-c-0.9/config.h +125 -0
  51. data/ext/parsley/json-c-0.9/config.h.in +124 -0
  52. data/ext/parsley/json-c-0.9/config.h.win32 +94 -0
  53. data/ext/parsley/json-c-0.9/config.sub +1686 -0
  54. data/ext/parsley/json-c-0.9/configure +13084 -0
  55. data/ext/parsley/json-c-0.9/configure.in +33 -0
  56. data/ext/parsley/json-c-0.9/debug.c +98 -0
  57. data/ext/parsley/json-c-0.9/debug.h +50 -0
  58. data/ext/parsley/json-c-0.9/depcomp +630 -0
  59. data/ext/parsley/json-c-0.9/doc/html/annotated.html +40 -0
  60. data/ext/parsley/json-c-0.9/doc/html/arraylist_8h.html +240 -0
  61. data/ext/parsley/json-c-0.9/doc/html/bits_8h.html +150 -0
  62. data/ext/parsley/json-c-0.9/doc/html/classes.html +36 -0
  63. data/ext/parsley/json-c-0.9/doc/html/config_8h.html +612 -0
  64. data/ext/parsley/json-c-0.9/doc/html/debug_8h.html +392 -0
  65. data/ext/parsley/json-c-0.9/doc/html/doxygen.css +441 -0
  66. data/ext/parsley/json-c-0.9/doc/html/doxygen.png +0 -0
  67. data/ext/parsley/json-c-0.9/doc/html/files.html +42 -0
  68. data/ext/parsley/json-c-0.9/doc/html/functions.html +206 -0
  69. data/ext/parsley/json-c-0.9/doc/html/functions_vars.html +206 -0
  70. data/ext/parsley/json-c-0.9/doc/html/globals.html +459 -0
  71. data/ext/parsley/json-c-0.9/doc/html/globals_defs.html +202 -0
  72. data/ext/parsley/json-c-0.9/doc/html/globals_enum.html +50 -0
  73. data/ext/parsley/json-c-0.9/doc/html/globals_eval.html +135 -0
  74. data/ext/parsley/json-c-0.9/doc/html/globals_func.html +194 -0
  75. data/ext/parsley/json-c-0.9/doc/html/globals_type.html +70 -0
  76. data/ext/parsley/json-c-0.9/doc/html/globals_vars.html +50 -0
  77. data/ext/parsley/json-c-0.9/doc/html/index.html +25 -0
  78. data/ext/parsley/json-c-0.9/doc/html/json_8h.html +32 -0
  79. data/ext/parsley/json-c-0.9/doc/html/json__object_8h.html +1150 -0
  80. data/ext/parsley/json-c-0.9/doc/html/json__object__private_8h.html +75 -0
  81. data/ext/parsley/json-c-0.9/doc/html/json__tokener_8h.html +366 -0
  82. data/ext/parsley/json-c-0.9/doc/html/json__util_8h.html +106 -0
  83. data/ext/parsley/json-c-0.9/doc/html/linkhash_8h.html +740 -0
  84. data/ext/parsley/json-c-0.9/doc/html/printbuf_8h.html +214 -0
  85. data/ext/parsley/json-c-0.9/doc/html/structarray__list.html +104 -0
  86. data/ext/parsley/json-c-0.9/doc/html/structjson__object.html +141 -0
  87. data/ext/parsley/json-c-0.9/doc/html/structjson__object__iter.html +87 -0
  88. data/ext/parsley/json-c-0.9/doc/html/structjson__tokener.html +206 -0
  89. data/ext/parsley/json-c-0.9/doc/html/structjson__tokener__srec.html +104 -0
  90. data/ext/parsley/json-c-0.9/doc/html/structlh__entry.html +105 -0
  91. data/ext/parsley/json-c-0.9/doc/html/structlh__table.html +275 -0
  92. data/ext/parsley/json-c-0.9/doc/html/structprintbuf.html +87 -0
  93. data/ext/parsley/json-c-0.9/doc/html/tab_b.gif +0 -0
  94. data/ext/parsley/json-c-0.9/doc/html/tab_l.gif +0 -0
  95. data/ext/parsley/json-c-0.9/doc/html/tab_r.gif +0 -0
  96. data/ext/parsley/json-c-0.9/doc/html/tabs.css +105 -0
  97. data/ext/parsley/json-c-0.9/doc/html/unionjson__object_1_1data.html +140 -0
  98. data/ext/parsley/json-c-0.9/install-sh +520 -0
  99. data/ext/parsley/json-c-0.9/json.h +31 -0
  100. data/ext/parsley/json-c-0.9/json.pc +11 -0
  101. data/ext/parsley/json-c-0.9/json.pc.in +11 -0
  102. data/ext/parsley/json-c-0.9/json_object.c +512 -0
  103. data/ext/parsley/json-c-0.9/json_object.h +319 -0
  104. data/ext/parsley/json-c-0.9/json_object_private.h +52 -0
  105. data/ext/parsley/json-c-0.9/json_tokener.c +628 -0
  106. data/ext/parsley/json-c-0.9/json_tokener.h +98 -0
  107. data/ext/parsley/json-c-0.9/json_util.c +122 -0
  108. data/ext/parsley/json-c-0.9/json_util.h +31 -0
  109. data/ext/parsley/json-c-0.9/libjson.la +41 -0
  110. data/ext/parsley/json-c-0.9/libtool +8890 -0
  111. data/ext/parsley/json-c-0.9/linkhash.c +216 -0
  112. data/ext/parsley/json-c-0.9/linkhash.h +272 -0
  113. data/ext/parsley/json-c-0.9/ltmain.sh +8406 -0
  114. data/ext/parsley/json-c-0.9/missing +376 -0
  115. data/ext/parsley/json-c-0.9/printbuf.c +149 -0
  116. data/ext/parsley/json-c-0.9/printbuf.h +64 -0
  117. data/ext/parsley/json-c-0.9/stamp-h1 +1 -0
  118. data/ext/parsley/json-c-0.9/test1 +130 -0
  119. data/ext/parsley/json-c-0.9/test1.c +164 -0
  120. data/ext/parsley/json-c-0.9/test2 +130 -0
  121. data/ext/parsley/json-c-0.9/test2.c +20 -0
  122. data/ext/parsley/json-c-0.9/test3 +130 -0
  123. data/ext/parsley/json-c-0.9/test3.c +23 -0
  124. data/ext/parsley/libtool +8890 -0
  125. data/ext/parsley/ltmain.sh +8406 -0
  126. data/ext/parsley/missing +376 -0
  127. data/ext/parsley/parsed_xpath.c +168 -0
  128. data/ext/parsley/parsed_xpath.h +34 -0
  129. data/ext/parsley/parser.y +631 -0
  130. data/ext/parsley/parsley.c +793 -0
  131. data/ext/parsley/parsley.h +87 -0
  132. data/ext/parsley/parsley_main.c +185 -0
  133. data/ext/parsley/parsleyc_main.c +108 -0
  134. data/ext/parsley/regexp.c +359 -0
  135. data/ext/parsley/regexp.h +36 -0
  136. data/ext/parsley/scanner.l +221 -0
  137. data/ext/parsley/test/ambiguous.html +207 -0
  138. data/ext/parsley/test/ambiguous.json +1 -0
  139. data/ext/parsley/test/ambiguous.let +6 -0
  140. data/ext/parsley/test/array-regression.html +5 -0
  141. data/ext/parsley/test/array-regression.json +1 -0
  142. data/ext/parsley/test/array-regression.let +10 -0
  143. data/ext/parsley/test/backslash.html +5 -0
  144. data/ext/parsley/test/backslash.json +1 -0
  145. data/ext/parsley/test/backslash.let +3 -0
  146. data/ext/parsley/test/bang.html +17 -0
  147. data/ext/parsley/test/bang.json +1 -0
  148. data/ext/parsley/test/bang.let +6 -0
  149. data/ext/parsley/test/collate_regression.html +324 -0
  150. data/ext/parsley/test/collate_regression.json +1 -0
  151. data/ext/parsley/test/collate_regression.let +9 -0
  152. data/ext/parsley/test/contains.html +3 -0
  153. data/ext/parsley/test/contains.json +1 -0
  154. data/ext/parsley/test/contains.let +3 -0
  155. data/ext/parsley/test/content.html +13 -0
  156. data/ext/parsley/test/content.json +1 -0
  157. data/ext/parsley/test/content.let +7 -0
  158. data/ext/parsley/test/cool.html +575 -0
  159. data/ext/parsley/test/cool.json +1 -0
  160. data/ext/parsley/test/cool.let +9 -0
  161. data/ext/parsley/test/craigs-simple.html +207 -0
  162. data/ext/parsley/test/craigs-simple.json +1 -0
  163. data/ext/parsley/test/craigs-simple.let +6 -0
  164. data/ext/parsley/test/craigs.html +207 -0
  165. data/ext/parsley/test/craigs.json +1 -0
  166. data/ext/parsley/test/craigs.let +9 -0
  167. data/ext/parsley/test/crash.html +157 -0
  168. data/ext/parsley/test/crash.json +1 -0
  169. data/ext/parsley/test/crash.let +1 -0
  170. data/ext/parsley/test/css_attr.html +3 -0
  171. data/ext/parsley/test/css_attr.json +1 -0
  172. data/ext/parsley/test/css_attr.let +3 -0
  173. data/ext/parsley/test/default-namespace.json +1 -0
  174. data/ext/parsley/test/default-namespace.let +3 -0
  175. data/ext/parsley/test/default-namespace.xml +1493 -0
  176. data/ext/parsley/test/div.html +8 -0
  177. data/ext/parsley/test/div.json +1 -0
  178. data/ext/parsley/test/div.let +10 -0
  179. data/ext/parsley/test/empty.html +3 -0
  180. data/ext/parsley/test/empty.json +1 -0
  181. data/ext/parsley/test/empty.let +1 -0
  182. data/ext/parsley/test/emptyish.html +207 -0
  183. data/ext/parsley/test/emptyish.let +3 -0
  184. data/ext/parsley/test/fictional-opt.html +43 -0
  185. data/ext/parsley/test/fictional-opt.json +1 -0
  186. data/ext/parsley/test/fictional-opt.let +14 -0
  187. data/ext/parsley/test/fictional.html +43 -0
  188. data/ext/parsley/test/fictional.json +1 -0
  189. data/ext/parsley/test/fictional.let +14 -0
  190. data/ext/parsley/test/function-magic.html +9 -0
  191. data/ext/parsley/test/function-magic.json +1 -0
  192. data/ext/parsley/test/function-magic.let +8 -0
  193. data/ext/parsley/test/hn.html +32 -0
  194. data/ext/parsley/test/hn.json +1 -0
  195. data/ext/parsley/test/hn.let +8 -0
  196. data/ext/parsley/test/malformed-array.html +2329 -0
  197. data/ext/parsley/test/malformed-array.json +1 -0
  198. data/ext/parsley/test/malformed-array.let +22 -0
  199. data/ext/parsley/test/malformed-expr.html +2329 -0
  200. data/ext/parsley/test/malformed-expr.json +1 -0
  201. data/ext/parsley/test/malformed-expr.let +16 -0
  202. data/ext/parsley/test/malformed-function.html +845 -0
  203. data/ext/parsley/test/malformed-function.json +197 -0
  204. data/ext/parsley/test/malformed-function.let +8 -0
  205. data/ext/parsley/test/malformed-json.html +2329 -0
  206. data/ext/parsley/test/malformed-json.json +1 -0
  207. data/ext/parsley/test/malformed-json.let +6 -0
  208. data/ext/parsley/test/malformed-xpath.html +8 -0
  209. data/ext/parsley/test/malformed-xpath.json +1 -0
  210. data/ext/parsley/test/malformed-xpath.let +7 -0
  211. data/ext/parsley/test/match.json +1 -0
  212. data/ext/parsley/test/match.let +9 -0
  213. data/ext/parsley/test/match.xml +11 -0
  214. data/ext/parsley/test/math_ambiguity.html +9 -0
  215. data/ext/parsley/test/math_ambiguity.json +1 -0
  216. data/ext/parsley/test/math_ambiguity.let +5 -0
  217. data/ext/parsley/test/nth-regression.html +13 -0
  218. data/ext/parsley/test/nth-regression.json +1 -0
  219. data/ext/parsley/test/nth-regression.let +3 -0
  220. data/ext/parsley/test/optional.html +2328 -0
  221. data/ext/parsley/test/optional.json +1 -0
  222. data/ext/parsley/test/optional.let +8 -0
  223. data/ext/parsley/test/outer-xml.html +6 -0
  224. data/ext/parsley/test/outer-xml.json +1 -0
  225. data/ext/parsley/test/outer-xml.let +5 -0
  226. data/ext/parsley/test/position.html +8 -0
  227. data/ext/parsley/test/position.json +1 -0
  228. data/ext/parsley/test/position.let +6 -0
  229. data/ext/parsley/test/question_regressions.html +443 -0
  230. data/ext/parsley/test/question_regressions.json +1 -0
  231. data/ext/parsley/test/question_regressions.let +6 -0
  232. data/ext/parsley/test/quote.json +1 -0
  233. data/ext/parsley/test/quote.let +8 -0
  234. data/ext/parsley/test/quote.xml +11 -0
  235. data/ext/parsley/test/reddit.html +1 -0
  236. data/ext/parsley/test/reddit.json +1 -0
  237. data/ext/parsley/test/reddit.let +12 -0
  238. data/ext/parsley/test/remote-fail.json +1 -0
  239. data/ext/parsley/test/remote.html +3 -0
  240. data/ext/parsley/test/remote.json +1 -0
  241. data/ext/parsley/test/remote.let +4 -0
  242. data/ext/parsley/test/replace.json +1 -0
  243. data/ext/parsley/test/replace.let +9 -0
  244. data/ext/parsley/test/replace.xml +11 -0
  245. data/ext/parsley/test/scope.html +10 -0
  246. data/ext/parsley/test/scope.json +1 -0
  247. data/ext/parsley/test/scope.let +6 -0
  248. data/ext/parsley/test/segfault.html +5 -0
  249. data/ext/parsley/test/segfault.json +1 -0
  250. data/ext/parsley/test/segfault.let +9 -0
  251. data/ext/parsley/test/sg-wrap.html +5 -0
  252. data/ext/parsley/test/sg-wrap.json +1 -0
  253. data/ext/parsley/test/sg-wrap.let +3 -0
  254. data/ext/parsley/test/sg_off.html +5 -0
  255. data/ext/parsley/test/sg_off.json +1 -0
  256. data/ext/parsley/test/sg_off.let +3 -0
  257. data/ext/parsley/test/test.json +1 -0
  258. data/ext/parsley/test/test.let +6 -0
  259. data/ext/parsley/test/test.xml +11 -0
  260. data/ext/parsley/test/trivial.html +2329 -0
  261. data/ext/parsley/test/trivial.json +1 -0
  262. data/ext/parsley/test/trivial.let +4 -0
  263. data/ext/parsley/test/trivial2.html +2329 -0
  264. data/ext/parsley/test/trivial2.json +1 -0
  265. data/ext/parsley/test/trivial2.let +7 -0
  266. data/ext/parsley/test/unbang.html +17 -0
  267. data/ext/parsley/test/unbang.json +1 -0
  268. data/ext/parsley/test/unbang.let +6 -0
  269. data/ext/parsley/test/unicode.html +3 -0
  270. data/ext/parsley/test/unicode.json +1 -0
  271. data/ext/parsley/test/unicode.let +1 -0
  272. data/ext/parsley/test/whitespace.html +8 -0
  273. data/ext/parsley/test/whitespace.json +1 -0
  274. data/ext/parsley/test/whitespace.let +3 -0
  275. data/ext/parsley/test/whitespace_regression.html +4 -0
  276. data/ext/parsley/test/whitespace_regression.json +1 -0
  277. data/ext/parsley/test/whitespace_regression.let +3 -0
  278. data/ext/parsley/test/yelp-benchmark.rb +53 -0
  279. data/ext/parsley/test/yelp-home.html +1004 -0
  280. data/ext/parsley/test/yelp-home.json +1 -0
  281. data/ext/parsley/test/yelp-home.let +6 -0
  282. data/ext/parsley/test/yelp.html +2329 -0
  283. data/ext/parsley/test/yelp.json +1 -0
  284. data/ext/parsley/test/yelp.let +12 -0
  285. data/ext/parsley/test/youtube.html +1940 -0
  286. data/ext/parsley/test/youtube.let +11 -0
  287. data/ext/parsley/util.c +237 -0
  288. data/ext/parsley/util.h +34 -0
  289. data/ext/parsley/xml2json.c +47 -0
  290. data/ext/parsley/xml2json.h +14 -0
  291. data/ext/parsley/y.tab.h +222 -0
  292. data/ext/parsley/ylwrap +222 -0
  293. data/lib/parsley.rb +84 -0
  294. data/test/test_parsley.rb +120 -0
  295. data/test/yelp-benchmark.rb +53 -0
  296. data/test/yelp-home.html +1004 -0
  297. data/test/yelp-home.let +6 -0
  298. data/test/yelp.html +2329 -0
  299. metadata +366 -0
@@ -0,0 +1,222 @@
1
+ #! /bin/sh
2
+ # ylwrap - wrapper for lex/yacc invocations.
3
+
4
+ scriptversion=2009-04-28.21; # UTC
5
+
6
+ # Copyright (C) 1996, 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005,
7
+ # 2007, 2009 Free Software Foundation, Inc.
8
+ #
9
+ # Written by Tom Tromey <tromey@cygnus.com>.
10
+ #
11
+ # This program is free software; you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation; either version 2, or (at your option)
14
+ # any later version.
15
+ #
16
+ # This program is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
23
+
24
+ # As a special exception to the GNU General Public License, if you
25
+ # distribute this file as part of a program that contains a
26
+ # configuration script generated by Autoconf, you may include it under
27
+ # the same distribution terms that you use for the rest of that program.
28
+
29
+ # This file is maintained in Automake, please report
30
+ # bugs to <bug-automake@gnu.org> or send patches to
31
+ # <automake-patches@gnu.org>.
32
+
33
+ case "$1" in
34
+ '')
35
+ echo "$0: No files given. Try \`$0 --help' for more information." 1>&2
36
+ exit 1
37
+ ;;
38
+ --basedir)
39
+ basedir=$2
40
+ shift 2
41
+ ;;
42
+ -h|--h*)
43
+ cat <<\EOF
44
+ Usage: ylwrap [--help|--version] INPUT [OUTPUT DESIRED]... -- PROGRAM [ARGS]...
45
+
46
+ Wrapper for lex/yacc invocations, renaming files as desired.
47
+
48
+ INPUT is the input file
49
+ OUTPUT is one file PROG generates
50
+ DESIRED is the file we actually want instead of OUTPUT
51
+ PROGRAM is program to run
52
+ ARGS are passed to PROG
53
+
54
+ Any number of OUTPUT,DESIRED pairs may be used.
55
+
56
+ Report bugs to <bug-automake@gnu.org>.
57
+ EOF
58
+ exit $?
59
+ ;;
60
+ -v|--v*)
61
+ echo "ylwrap $scriptversion"
62
+ exit $?
63
+ ;;
64
+ esac
65
+
66
+
67
+ # The input.
68
+ input="$1"
69
+ shift
70
+ case "$input" in
71
+ [\\/]* | ?:[\\/]*)
72
+ # Absolute path; do nothing.
73
+ ;;
74
+ *)
75
+ # Relative path. Make it absolute.
76
+ input="`pwd`/$input"
77
+ ;;
78
+ esac
79
+
80
+ pairlist=
81
+ while test "$#" -ne 0; do
82
+ if test "$1" = "--"; then
83
+ shift
84
+ break
85
+ fi
86
+ pairlist="$pairlist $1"
87
+ shift
88
+ done
89
+
90
+ # The program to run.
91
+ prog="$1"
92
+ shift
93
+ # Make any relative path in $prog absolute.
94
+ case "$prog" in
95
+ [\\/]* | ?:[\\/]*) ;;
96
+ *[\\/]*) prog="`pwd`/$prog" ;;
97
+ esac
98
+
99
+ # FIXME: add hostname here for parallel makes that run commands on
100
+ # other machines. But that might take us over the 14-char limit.
101
+ dirname=ylwrap$$
102
+ trap "cd '`pwd`'; rm -rf $dirname > /dev/null 2>&1" 1 2 3 15
103
+ mkdir $dirname || exit 1
104
+
105
+ cd $dirname
106
+
107
+ case $# in
108
+ 0) "$prog" "$input" ;;
109
+ *) "$prog" "$@" "$input" ;;
110
+ esac
111
+ ret=$?
112
+
113
+ if test $ret -eq 0; then
114
+ set X $pairlist
115
+ shift
116
+ first=yes
117
+ # Since DOS filename conventions don't allow two dots,
118
+ # the DOS version of Bison writes out y_tab.c instead of y.tab.c
119
+ # and y_tab.h instead of y.tab.h. Test to see if this is the case.
120
+ y_tab_nodot="no"
121
+ if test -f y_tab.c || test -f y_tab.h; then
122
+ y_tab_nodot="yes"
123
+ fi
124
+
125
+ # The directory holding the input.
126
+ input_dir=`echo "$input" | sed -e 's,\([\\/]\)[^\\/]*$,\1,'`
127
+ # Quote $INPUT_DIR so we can use it in a regexp.
128
+ # FIXME: really we should care about more than `.' and `\'.
129
+ input_rx=`echo "$input_dir" | sed 's,\\\\,\\\\\\\\,g;s,\\.,\\\\.,g'`
130
+
131
+ while test "$#" -ne 0; do
132
+ from="$1"
133
+ # Handle y_tab.c and y_tab.h output by DOS
134
+ if test $y_tab_nodot = "yes"; then
135
+ if test $from = "y.tab.c"; then
136
+ from="y_tab.c"
137
+ else
138
+ if test $from = "y.tab.h"; then
139
+ from="y_tab.h"
140
+ fi
141
+ fi
142
+ fi
143
+ if test -f "$from"; then
144
+ # If $2 is an absolute path name, then just use that,
145
+ # otherwise prepend `../'.
146
+ case "$2" in
147
+ [\\/]* | ?:[\\/]*) target="$2";;
148
+ *) target="../$2";;
149
+ esac
150
+
151
+ # We do not want to overwrite a header file if it hasn't
152
+ # changed. This avoid useless recompilations. However the
153
+ # parser itself (the first file) should always be updated,
154
+ # because it is the destination of the .y.c rule in the
155
+ # Makefile. Divert the output of all other files to a temporary
156
+ # file so we can compare them to existing versions.
157
+ if test $first = no; then
158
+ realtarget="$target"
159
+ target="tmp-`echo $target | sed s/.*[\\/]//g`"
160
+ fi
161
+ # Edit out `#line' or `#' directives.
162
+ #
163
+ # We don't want the resulting debug information to point at
164
+ # an absolute srcdir; it is better for it to just mention the
165
+ # .y file with no path.
166
+ #
167
+ # We want to use the real output file name, not yy.lex.c for
168
+ # instance.
169
+ #
170
+ # We want the include guards to be adjusted too.
171
+ FROM=`echo "$from" | sed \
172
+ -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'\
173
+ -e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`
174
+ TARGET=`echo "$2" | sed \
175
+ -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'\
176
+ -e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`
177
+
178
+ sed -e "/^#/!b" -e "s,$input_rx,," -e "s,$from,$2," \
179
+ -e "s,$FROM,$TARGET," "$from" >"$target" || ret=$?
180
+
181
+ # Check whether header files must be updated.
182
+ if test $first = no; then
183
+ if test -f "$realtarget" && cmp -s "$realtarget" "$target"; then
184
+ echo "$2" is unchanged
185
+ rm -f "$target"
186
+ else
187
+ echo updating "$2"
188
+ mv -f "$target" "$realtarget"
189
+ fi
190
+ fi
191
+ else
192
+ # A missing file is only an error for the first file. This
193
+ # is a blatant hack to let us support using "yacc -d". If -d
194
+ # is not specified, we don't want an error when the header
195
+ # file is "missing".
196
+ if test $first = yes; then
197
+ ret=1
198
+ fi
199
+ fi
200
+ shift
201
+ shift
202
+ first=no
203
+ done
204
+ else
205
+ ret=$?
206
+ fi
207
+
208
+ # Remove the directory.
209
+ cd ..
210
+ rm -rf $dirname
211
+
212
+ exit $ret
213
+
214
+ # Local Variables:
215
+ # mode: shell-script
216
+ # sh-indentation: 2
217
+ # eval: (add-hook 'write-file-hooks 'time-stamp)
218
+ # time-stamp-start: "scriptversion="
219
+ # time-stamp-format: "%:y-%02m-%02d.%02H"
220
+ # time-stamp-time-zone: "UTC"
221
+ # time-stamp-end: "; # UTC"
222
+ # End:
data/lib/parsley.rb ADDED
@@ -0,0 +1,84 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/../ext/cparsley")
2
+ require "rubygems"
3
+ require "json"
4
+ require "thread"
5
+
6
+ class Parsley
7
+
8
+ def self.user_agent=(agent)
9
+ @user_agent = agent
10
+ CParsley.set_user_agent(agent.to_s)
11
+ end
12
+
13
+ def self.user_agent
14
+ @user_agent
15
+ end
16
+
17
+ def initialize(parsley, incl = "")
18
+ if(parsley.is_a?(Hash))
19
+ parsley = recursive_stringify(parsley).to_json
20
+ end
21
+ @@mutex ||= Mutex.new
22
+ @@mutex.synchronize do
23
+ @parsley = CParsley.new(parsley, incl)
24
+ end
25
+ end
26
+
27
+ # Valid options:
28
+ #
29
+ # Requires one of:
30
+ # :file -- the input file path or url
31
+ # :string -- the input string
32
+ #
33
+ # And optionally (default is the first listed value):
34
+ # :input => [:html, :xml]
35
+ # :output => [:ruby, :json, :xml]
36
+ # :prune => [true, false]
37
+ # :sgwrap => [false, true]
38
+ # :collate => [true, false]
39
+ # :base => "http://some/base/href"
40
+ # :allow_net => [true, false]
41
+ # :allow_local => [true, false]
42
+ def parse(options = {})
43
+ options[:file] || options[:string] || (raise ParsleyError.new("must specify what to parse"))
44
+
45
+ options[:sgwrap] = !!options[:sgwrap]
46
+ options[:is_file] = !!options[:file]
47
+ options[:has_base] = !!options[:base]
48
+
49
+ options[:base] = options[:base].to_s
50
+ options[:file] = options[:file].to_s
51
+ options[:string] = options[:string].to_s
52
+
53
+ options[:input] ||= :html
54
+ options[:output] ||= :ruby
55
+
56
+ options[:collate] = true unless options.has_key?(:collate)
57
+ options[:prune] = true unless options.has_key?(:prune)
58
+ options[:allow_net] = true unless options.has_key?(:allow_net)
59
+ options[:allow_local] = true unless options.has_key?(:allow_local)
60
+
61
+ options[:collate] = !!options[:collate]
62
+ options[:prune] = !!options[:prune]
63
+ options[:allow_net] = !!options[:allow_net]
64
+ options[:allow_local] = !!options[:allow_local]
65
+
66
+ @parsley.parse(options)
67
+ end
68
+ private
69
+
70
+ def recursive_stringify(obj)
71
+ case obj
72
+ when Hash
73
+ obj.inject({}) do |memo, (k, v)|
74
+ memo[k.to_s] = recursive_stringify(v)
75
+ memo
76
+ end
77
+ when Array
78
+ obj.map{|e| recursive_stringify(e) }
79
+ else
80
+ obj.to_s
81
+ end
82
+ end
83
+
84
+ end
@@ -0,0 +1,120 @@
1
+ #encoding: UTF-8
2
+ require "test/unit"
3
+ require File.expand_path(File.dirname(__FILE__) + "/../lib/parsley")
4
+
5
+ class TestParsley < Test::Unit::TestCase
6
+ def setup
7
+ @page = File.expand_path(File.dirname(__FILE__) + "/yelp.html")
8
+ @home = File.expand_path(File.dirname(__FILE__) + "/yelp-home.html")
9
+ @let = File.expand_path(File.dirname(__FILE__) + "/yelp-home.let")
10
+ end
11
+
12
+ def test_segfault_regression
13
+ simple_html = <<-HTML
14
+ <html>
15
+ <body>
16
+ <h1 class="iCIMS_Header_JobTitle">CEO</h1>
17
+ <h2 class="temperature">28ºF</h2>
18
+ </body>
19
+ </html>
20
+ HTML
21
+
22
+ struct = {
23
+ 'jobs' => [{
24
+ 'title' => ".iCIMS_Header_JobTitle",
25
+ 'temperature' => ".temperature",
26
+ 'description?' => "blah",
27
+ 'location?' => "blah",
28
+ 'experience?' => "blah",
29
+ 'education?' => "blah"
30
+ }]
31
+ }
32
+ parselet = Parsley.new(struct)
33
+ result = parselet.parse(:string => simple_html)
34
+ assert_equal "CEO", result['jobs'].first['title']
35
+ assert_equal "28ºF", result['jobs'].first['temperature']
36
+ assert result['jobs'].first['description'].nil?
37
+ end
38
+ #
39
+ # def test_yelp
40
+ # @parsley = Parsley.new(File.read(@let))
41
+ # out = @parsley.parse(:file => @home)
42
+ # assert_equal "/c/sf/shopping", out["categories"][0]["href"]
43
+ # end
44
+ #
45
+ # def test_parsley_should_raise_if_value_syntax_error
46
+ # assert_raises(ParsleyError) do
47
+ # Parsley.new({"foo" => nil})
48
+ # end
49
+ #
50
+ # assert_raises(ParsleyError) do
51
+ # Parsley.new({"foo" => ""})
52
+ # end
53
+ #
54
+ # assert_raises(ParsleyError) do
55
+ # Parsley.new({"foo" => "<<<<<<<<<<<"})
56
+ # end
57
+ # end
58
+ #
59
+ # def test_yelp_xml
60
+ # @parsley = Parsley.new(File.read(@let))
61
+ # out = @parsley.parse(:file => @home, :output => :xml)
62
+ # end
63
+ #
64
+ # def test_broken
65
+ # @parsley = Parsley.new("hi" => "no-ns:match(h1)")
66
+ # assert_raises(ParsleyError) {
67
+ # @parsley.parse(:file => @page)
68
+ # }
69
+ # end
70
+ #
71
+ # def test_simple
72
+ # @parsley = Parsley.new("hi" => "h1")
73
+ # assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @page))
74
+ # end
75
+ #
76
+ # def test_simple_string
77
+ # @parsley = Parsley.new("hi" => "h1")
78
+ # assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:string => "<html><body><h1>Nick's Crispy Tacos</h1></body></html>"))
79
+ # end
80
+ #
81
+ # def test_xml
82
+ # @parsley = Parsley.new("hi" => "h1")
83
+ # xml = "<?xml version=\"1.0\"?>\n<parsley:root xmlns:parsley=\"http://parselets.com/json\"><hi position=\"63\">Nick's Crispy Tacos</hi></parsley:root>\n"
84
+ # assert_equal(xml, @parsley.parse(:file => @page, :output => :xml))
85
+ # end
86
+ #
87
+ # def test_sgwrap
88
+ # @parsley = Parsley.new("hi" => "p sg_wrap")
89
+ # html = "<p><b>hi</b>world</p>"
90
+ # assert_equal({"hi" => "world"}, @parsley.parse(:string => html, :sgwrap => true))
91
+ # end
92
+ #
93
+ # def test_sgwrap_off
94
+ # @parsley = Parsley.new("hi" => "p sg_wrap")
95
+ # html = "<p><b>hi</b>world</p>"
96
+ # assert_raises(ParsleyError) do
97
+ # @parsley.parse(:string => html, :sgwrap => false)
98
+ # end
99
+ # end
100
+ #
101
+ #
102
+ # def test_json
103
+ # @parsley = Parsley.new("hi" => "h1")
104
+ # assert_equal('{ "hi": "Nick\'s Crispy Tacos" }', @parsley.parse(:file => @page, :output => :json))
105
+ # end
106
+ #
107
+ # def test_rescuable_file_error
108
+ # @parsley = Parsley.new("hi" => "h1")
109
+ # @nonexistant_file = File.dirname(__FILE__) + "/../fixtures/yelp.html"
110
+ # assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @nonexistant_file)) rescue nil
111
+ # end
112
+ #
113
+ # def test_array_string
114
+ # @parsley = Parsley.new({"foo" => ["li"]})
115
+ # out = @parsley.parse(:file => @page)
116
+ # assert_kind_of Hash, out
117
+ # assert_kind_of Array, out["foo"], out.inspect
118
+ # assert out["foo"].length > 1
119
+ # end
120
+ end
@@ -0,0 +1,53 @@
1
+ require "rubygems"
2
+ require "nokogiri"
3
+ require "hpricot"
4
+ require "parsley"
5
+ require "benchmark"
6
+ require "pp"
7
+
8
+ YELP_HTML = File.dirname(__FILE__) + "/yelp.html"
9
+
10
+ def noko
11
+ parse Nokogiri.Hpricot(File.open(YELP_HTML))
12
+ end
13
+
14
+ def hpri
15
+ parse Hpricot(File.open(YELP_HTML))
16
+ end
17
+
18
+ def parse(doc)
19
+ out = {}
20
+ out["name"] = (doc / "h1").first.inner_text
21
+ out["phone"] = (doc / "#bizPhone").first.inner_text
22
+ out["address"] = (doc / "address").first.inner_text
23
+ out["reviews"] = (doc / ".nonfavoriteReview").map do |node|
24
+ review = {}
25
+ review["date"] = (node / ".ieSucks .smaller").first.inner_text
26
+ review["user_name"] = (node / ".reviewer_info a").first.inner_text
27
+ review["comment"] = (node / ".review_comment").first.inner_text
28
+ review
29
+ end
30
+ end
31
+
32
+ def pars
33
+ parslet = Parsley.new({
34
+ "name" => "h1",
35
+ "phone" => "#bizPhone",
36
+ "address" => "address",
37
+ "reviews(.nonfavoriteReview)" => [
38
+ {
39
+ "date" => ".ieSucks .smaller",
40
+ "user_name" => ".reviewer_info a",
41
+ "comment" => ".review_comment"
42
+ }
43
+ ]
44
+ })
45
+ pp parslet.parse(:file => YELP_HTML)
46
+ end
47
+
48
+ Benchmark.bm do |x|
49
+ x.report("nokogiri: ") { 3.times { noko } }
50
+ x.report("hpricot: ") { 3.times { hpri } }
51
+ x.report("parsley: ") { 3.times { pars } }
52
+ end
53
+