gtl-parsley-ruby 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (299) hide show
  1. data/CHANGELOG +3 -0
  2. data/README +32 -0
  3. data/Rakefile +57 -0
  4. data/VERSION +1 -0
  5. data/ext/cparsley.c +152 -0
  6. data/ext/extconf.rb +82 -0
  7. data/ext/parsley/.gitignore +32 -0
  8. data/ext/parsley/AUTHORS +1 -0
  9. data/ext/parsley/ChangeLog +0 -0
  10. data/ext/parsley/HACKING +4 -0
  11. data/ext/parsley/INSTALL +73 -0
  12. data/ext/parsley/INTRO +84 -0
  13. data/ext/parsley/Makefile.am +80 -0
  14. data/ext/parsley/Makefile.in +1009 -0
  15. data/ext/parsley/NEWS +0 -0
  16. data/ext/parsley/PAPER +36 -0
  17. data/ext/parsley/Portfile +18 -0
  18. data/ext/parsley/Portfile.in +17 -0
  19. data/ext/parsley/README.C-LANG +92 -0
  20. data/ext/parsley/README.markdown +1 -0
  21. data/ext/parsley/TODO +39 -0
  22. data/ext/parsley/VERSION +1 -0
  23. data/ext/parsley/aclocal.m4 +8918 -0
  24. data/ext/parsley/bootstrap.sh +6 -0
  25. data/ext/parsley/config.guess +1561 -0
  26. data/ext/parsley/config.sub +1686 -0
  27. data/ext/parsley/configure +13437 -0
  28. data/ext/parsley/configure.ac +46 -0
  29. data/ext/parsley/depcomp +630 -0
  30. data/ext/parsley/functions.c +368 -0
  31. data/ext/parsley/functions.h +19 -0
  32. data/ext/parsley/generate_bisect.sh +12 -0
  33. data/ext/parsley/hooks/prepare-commit-msg +16 -0
  34. data/ext/parsley/install-sh +520 -0
  35. data/ext/parsley/json-c-0.9/AUTHORS +2 -0
  36. data/ext/parsley/json-c-0.9/COPYING +19 -0
  37. data/ext/parsley/json-c-0.9/ChangeLog +103 -0
  38. data/ext/parsley/json-c-0.9/INSTALL +302 -0
  39. data/ext/parsley/json-c-0.9/Makefile.am +43 -0
  40. data/ext/parsley/json-c-0.9/Makefile.in +800 -0
  41. data/ext/parsley/json-c-0.9/NEWS +1 -0
  42. data/ext/parsley/json-c-0.9/README +20 -0
  43. data/ext/parsley/json-c-0.9/README-WIN32.html +57 -0
  44. data/ext/parsley/json-c-0.9/README.html +32 -0
  45. data/ext/parsley/json-c-0.9/aclocal.m4 +8909 -0
  46. data/ext/parsley/json-c-0.9/arraylist.c +94 -0
  47. data/ext/parsley/json-c-0.9/arraylist.h +53 -0
  48. data/ext/parsley/json-c-0.9/bits.h +27 -0
  49. data/ext/parsley/json-c-0.9/config.guess +1561 -0
  50. data/ext/parsley/json-c-0.9/config.h +125 -0
  51. data/ext/parsley/json-c-0.9/config.h.in +124 -0
  52. data/ext/parsley/json-c-0.9/config.h.win32 +94 -0
  53. data/ext/parsley/json-c-0.9/config.sub +1686 -0
  54. data/ext/parsley/json-c-0.9/configure +13084 -0
  55. data/ext/parsley/json-c-0.9/configure.in +33 -0
  56. data/ext/parsley/json-c-0.9/debug.c +98 -0
  57. data/ext/parsley/json-c-0.9/debug.h +50 -0
  58. data/ext/parsley/json-c-0.9/depcomp +630 -0
  59. data/ext/parsley/json-c-0.9/doc/html/annotated.html +40 -0
  60. data/ext/parsley/json-c-0.9/doc/html/arraylist_8h.html +240 -0
  61. data/ext/parsley/json-c-0.9/doc/html/bits_8h.html +150 -0
  62. data/ext/parsley/json-c-0.9/doc/html/classes.html +36 -0
  63. data/ext/parsley/json-c-0.9/doc/html/config_8h.html +612 -0
  64. data/ext/parsley/json-c-0.9/doc/html/debug_8h.html +392 -0
  65. data/ext/parsley/json-c-0.9/doc/html/doxygen.css +441 -0
  66. data/ext/parsley/json-c-0.9/doc/html/doxygen.png +0 -0
  67. data/ext/parsley/json-c-0.9/doc/html/files.html +42 -0
  68. data/ext/parsley/json-c-0.9/doc/html/functions.html +206 -0
  69. data/ext/parsley/json-c-0.9/doc/html/functions_vars.html +206 -0
  70. data/ext/parsley/json-c-0.9/doc/html/globals.html +459 -0
  71. data/ext/parsley/json-c-0.9/doc/html/globals_defs.html +202 -0
  72. data/ext/parsley/json-c-0.9/doc/html/globals_enum.html +50 -0
  73. data/ext/parsley/json-c-0.9/doc/html/globals_eval.html +135 -0
  74. data/ext/parsley/json-c-0.9/doc/html/globals_func.html +194 -0
  75. data/ext/parsley/json-c-0.9/doc/html/globals_type.html +70 -0
  76. data/ext/parsley/json-c-0.9/doc/html/globals_vars.html +50 -0
  77. data/ext/parsley/json-c-0.9/doc/html/index.html +25 -0
  78. data/ext/parsley/json-c-0.9/doc/html/json_8h.html +32 -0
  79. data/ext/parsley/json-c-0.9/doc/html/json__object_8h.html +1150 -0
  80. data/ext/parsley/json-c-0.9/doc/html/json__object__private_8h.html +75 -0
  81. data/ext/parsley/json-c-0.9/doc/html/json__tokener_8h.html +366 -0
  82. data/ext/parsley/json-c-0.9/doc/html/json__util_8h.html +106 -0
  83. data/ext/parsley/json-c-0.9/doc/html/linkhash_8h.html +740 -0
  84. data/ext/parsley/json-c-0.9/doc/html/printbuf_8h.html +214 -0
  85. data/ext/parsley/json-c-0.9/doc/html/structarray__list.html +104 -0
  86. data/ext/parsley/json-c-0.9/doc/html/structjson__object.html +141 -0
  87. data/ext/parsley/json-c-0.9/doc/html/structjson__object__iter.html +87 -0
  88. data/ext/parsley/json-c-0.9/doc/html/structjson__tokener.html +206 -0
  89. data/ext/parsley/json-c-0.9/doc/html/structjson__tokener__srec.html +104 -0
  90. data/ext/parsley/json-c-0.9/doc/html/structlh__entry.html +105 -0
  91. data/ext/parsley/json-c-0.9/doc/html/structlh__table.html +275 -0
  92. data/ext/parsley/json-c-0.9/doc/html/structprintbuf.html +87 -0
  93. data/ext/parsley/json-c-0.9/doc/html/tab_b.gif +0 -0
  94. data/ext/parsley/json-c-0.9/doc/html/tab_l.gif +0 -0
  95. data/ext/parsley/json-c-0.9/doc/html/tab_r.gif +0 -0
  96. data/ext/parsley/json-c-0.9/doc/html/tabs.css +105 -0
  97. data/ext/parsley/json-c-0.9/doc/html/unionjson__object_1_1data.html +140 -0
  98. data/ext/parsley/json-c-0.9/install-sh +520 -0
  99. data/ext/parsley/json-c-0.9/json.h +31 -0
  100. data/ext/parsley/json-c-0.9/json.pc +11 -0
  101. data/ext/parsley/json-c-0.9/json.pc.in +11 -0
  102. data/ext/parsley/json-c-0.9/json_object.c +512 -0
  103. data/ext/parsley/json-c-0.9/json_object.h +319 -0
  104. data/ext/parsley/json-c-0.9/json_object_private.h +52 -0
  105. data/ext/parsley/json-c-0.9/json_tokener.c +628 -0
  106. data/ext/parsley/json-c-0.9/json_tokener.h +98 -0
  107. data/ext/parsley/json-c-0.9/json_util.c +122 -0
  108. data/ext/parsley/json-c-0.9/json_util.h +31 -0
  109. data/ext/parsley/json-c-0.9/libjson.la +41 -0
  110. data/ext/parsley/json-c-0.9/libtool +8890 -0
  111. data/ext/parsley/json-c-0.9/linkhash.c +216 -0
  112. data/ext/parsley/json-c-0.9/linkhash.h +272 -0
  113. data/ext/parsley/json-c-0.9/ltmain.sh +8406 -0
  114. data/ext/parsley/json-c-0.9/missing +376 -0
  115. data/ext/parsley/json-c-0.9/printbuf.c +149 -0
  116. data/ext/parsley/json-c-0.9/printbuf.h +64 -0
  117. data/ext/parsley/json-c-0.9/stamp-h1 +1 -0
  118. data/ext/parsley/json-c-0.9/test1 +130 -0
  119. data/ext/parsley/json-c-0.9/test1.c +164 -0
  120. data/ext/parsley/json-c-0.9/test2 +130 -0
  121. data/ext/parsley/json-c-0.9/test2.c +20 -0
  122. data/ext/parsley/json-c-0.9/test3 +130 -0
  123. data/ext/parsley/json-c-0.9/test3.c +23 -0
  124. data/ext/parsley/libtool +8890 -0
  125. data/ext/parsley/ltmain.sh +8406 -0
  126. data/ext/parsley/missing +376 -0
  127. data/ext/parsley/parsed_xpath.c +168 -0
  128. data/ext/parsley/parsed_xpath.h +34 -0
  129. data/ext/parsley/parser.y +631 -0
  130. data/ext/parsley/parsley.c +793 -0
  131. data/ext/parsley/parsley.h +87 -0
  132. data/ext/parsley/parsley_main.c +185 -0
  133. data/ext/parsley/parsleyc_main.c +108 -0
  134. data/ext/parsley/regexp.c +359 -0
  135. data/ext/parsley/regexp.h +36 -0
  136. data/ext/parsley/scanner.l +221 -0
  137. data/ext/parsley/test/ambiguous.html +207 -0
  138. data/ext/parsley/test/ambiguous.json +1 -0
  139. data/ext/parsley/test/ambiguous.let +6 -0
  140. data/ext/parsley/test/array-regression.html +5 -0
  141. data/ext/parsley/test/array-regression.json +1 -0
  142. data/ext/parsley/test/array-regression.let +10 -0
  143. data/ext/parsley/test/backslash.html +5 -0
  144. data/ext/parsley/test/backslash.json +1 -0
  145. data/ext/parsley/test/backslash.let +3 -0
  146. data/ext/parsley/test/bang.html +17 -0
  147. data/ext/parsley/test/bang.json +1 -0
  148. data/ext/parsley/test/bang.let +6 -0
  149. data/ext/parsley/test/collate_regression.html +324 -0
  150. data/ext/parsley/test/collate_regression.json +1 -0
  151. data/ext/parsley/test/collate_regression.let +9 -0
  152. data/ext/parsley/test/contains.html +3 -0
  153. data/ext/parsley/test/contains.json +1 -0
  154. data/ext/parsley/test/contains.let +3 -0
  155. data/ext/parsley/test/content.html +13 -0
  156. data/ext/parsley/test/content.json +1 -0
  157. data/ext/parsley/test/content.let +7 -0
  158. data/ext/parsley/test/cool.html +575 -0
  159. data/ext/parsley/test/cool.json +1 -0
  160. data/ext/parsley/test/cool.let +9 -0
  161. data/ext/parsley/test/craigs-simple.html +207 -0
  162. data/ext/parsley/test/craigs-simple.json +1 -0
  163. data/ext/parsley/test/craigs-simple.let +6 -0
  164. data/ext/parsley/test/craigs.html +207 -0
  165. data/ext/parsley/test/craigs.json +1 -0
  166. data/ext/parsley/test/craigs.let +9 -0
  167. data/ext/parsley/test/crash.html +157 -0
  168. data/ext/parsley/test/crash.json +1 -0
  169. data/ext/parsley/test/crash.let +1 -0
  170. data/ext/parsley/test/css_attr.html +3 -0
  171. data/ext/parsley/test/css_attr.json +1 -0
  172. data/ext/parsley/test/css_attr.let +3 -0
  173. data/ext/parsley/test/default-namespace.json +1 -0
  174. data/ext/parsley/test/default-namespace.let +3 -0
  175. data/ext/parsley/test/default-namespace.xml +1493 -0
  176. data/ext/parsley/test/div.html +8 -0
  177. data/ext/parsley/test/div.json +1 -0
  178. data/ext/parsley/test/div.let +10 -0
  179. data/ext/parsley/test/empty.html +3 -0
  180. data/ext/parsley/test/empty.json +1 -0
  181. data/ext/parsley/test/empty.let +1 -0
  182. data/ext/parsley/test/emptyish.html +207 -0
  183. data/ext/parsley/test/emptyish.let +3 -0
  184. data/ext/parsley/test/fictional-opt.html +43 -0
  185. data/ext/parsley/test/fictional-opt.json +1 -0
  186. data/ext/parsley/test/fictional-opt.let +14 -0
  187. data/ext/parsley/test/fictional.html +43 -0
  188. data/ext/parsley/test/fictional.json +1 -0
  189. data/ext/parsley/test/fictional.let +14 -0
  190. data/ext/parsley/test/function-magic.html +9 -0
  191. data/ext/parsley/test/function-magic.json +1 -0
  192. data/ext/parsley/test/function-magic.let +8 -0
  193. data/ext/parsley/test/hn.html +32 -0
  194. data/ext/parsley/test/hn.json +1 -0
  195. data/ext/parsley/test/hn.let +8 -0
  196. data/ext/parsley/test/malformed-array.html +2329 -0
  197. data/ext/parsley/test/malformed-array.json +1 -0
  198. data/ext/parsley/test/malformed-array.let +22 -0
  199. data/ext/parsley/test/malformed-expr.html +2329 -0
  200. data/ext/parsley/test/malformed-expr.json +1 -0
  201. data/ext/parsley/test/malformed-expr.let +16 -0
  202. data/ext/parsley/test/malformed-function.html +845 -0
  203. data/ext/parsley/test/malformed-function.json +197 -0
  204. data/ext/parsley/test/malformed-function.let +8 -0
  205. data/ext/parsley/test/malformed-json.html +2329 -0
  206. data/ext/parsley/test/malformed-json.json +1 -0
  207. data/ext/parsley/test/malformed-json.let +6 -0
  208. data/ext/parsley/test/malformed-xpath.html +8 -0
  209. data/ext/parsley/test/malformed-xpath.json +1 -0
  210. data/ext/parsley/test/malformed-xpath.let +7 -0
  211. data/ext/parsley/test/match.json +1 -0
  212. data/ext/parsley/test/match.let +9 -0
  213. data/ext/parsley/test/match.xml +11 -0
  214. data/ext/parsley/test/math_ambiguity.html +9 -0
  215. data/ext/parsley/test/math_ambiguity.json +1 -0
  216. data/ext/parsley/test/math_ambiguity.let +5 -0
  217. data/ext/parsley/test/nth-regression.html +13 -0
  218. data/ext/parsley/test/nth-regression.json +1 -0
  219. data/ext/parsley/test/nth-regression.let +3 -0
  220. data/ext/parsley/test/optional.html +2328 -0
  221. data/ext/parsley/test/optional.json +1 -0
  222. data/ext/parsley/test/optional.let +8 -0
  223. data/ext/parsley/test/outer-xml.html +6 -0
  224. data/ext/parsley/test/outer-xml.json +1 -0
  225. data/ext/parsley/test/outer-xml.let +5 -0
  226. data/ext/parsley/test/position.html +8 -0
  227. data/ext/parsley/test/position.json +1 -0
  228. data/ext/parsley/test/position.let +6 -0
  229. data/ext/parsley/test/question_regressions.html +443 -0
  230. data/ext/parsley/test/question_regressions.json +1 -0
  231. data/ext/parsley/test/question_regressions.let +6 -0
  232. data/ext/parsley/test/quote.json +1 -0
  233. data/ext/parsley/test/quote.let +8 -0
  234. data/ext/parsley/test/quote.xml +11 -0
  235. data/ext/parsley/test/reddit.html +1 -0
  236. data/ext/parsley/test/reddit.json +1 -0
  237. data/ext/parsley/test/reddit.let +12 -0
  238. data/ext/parsley/test/remote-fail.json +1 -0
  239. data/ext/parsley/test/remote.html +3 -0
  240. data/ext/parsley/test/remote.json +1 -0
  241. data/ext/parsley/test/remote.let +4 -0
  242. data/ext/parsley/test/replace.json +1 -0
  243. data/ext/parsley/test/replace.let +9 -0
  244. data/ext/parsley/test/replace.xml +11 -0
  245. data/ext/parsley/test/scope.html +10 -0
  246. data/ext/parsley/test/scope.json +1 -0
  247. data/ext/parsley/test/scope.let +6 -0
  248. data/ext/parsley/test/segfault.html +5 -0
  249. data/ext/parsley/test/segfault.json +1 -0
  250. data/ext/parsley/test/segfault.let +9 -0
  251. data/ext/parsley/test/sg-wrap.html +5 -0
  252. data/ext/parsley/test/sg-wrap.json +1 -0
  253. data/ext/parsley/test/sg-wrap.let +3 -0
  254. data/ext/parsley/test/sg_off.html +5 -0
  255. data/ext/parsley/test/sg_off.json +1 -0
  256. data/ext/parsley/test/sg_off.let +3 -0
  257. data/ext/parsley/test/test.json +1 -0
  258. data/ext/parsley/test/test.let +6 -0
  259. data/ext/parsley/test/test.xml +11 -0
  260. data/ext/parsley/test/trivial.html +2329 -0
  261. data/ext/parsley/test/trivial.json +1 -0
  262. data/ext/parsley/test/trivial.let +4 -0
  263. data/ext/parsley/test/trivial2.html +2329 -0
  264. data/ext/parsley/test/trivial2.json +1 -0
  265. data/ext/parsley/test/trivial2.let +7 -0
  266. data/ext/parsley/test/unbang.html +17 -0
  267. data/ext/parsley/test/unbang.json +1 -0
  268. data/ext/parsley/test/unbang.let +6 -0
  269. data/ext/parsley/test/unicode.html +3 -0
  270. data/ext/parsley/test/unicode.json +1 -0
  271. data/ext/parsley/test/unicode.let +1 -0
  272. data/ext/parsley/test/whitespace.html +8 -0
  273. data/ext/parsley/test/whitespace.json +1 -0
  274. data/ext/parsley/test/whitespace.let +3 -0
  275. data/ext/parsley/test/whitespace_regression.html +4 -0
  276. data/ext/parsley/test/whitespace_regression.json +1 -0
  277. data/ext/parsley/test/whitespace_regression.let +3 -0
  278. data/ext/parsley/test/yelp-benchmark.rb +53 -0
  279. data/ext/parsley/test/yelp-home.html +1004 -0
  280. data/ext/parsley/test/yelp-home.json +1 -0
  281. data/ext/parsley/test/yelp-home.let +6 -0
  282. data/ext/parsley/test/yelp.html +2329 -0
  283. data/ext/parsley/test/yelp.json +1 -0
  284. data/ext/parsley/test/yelp.let +12 -0
  285. data/ext/parsley/test/youtube.html +1940 -0
  286. data/ext/parsley/test/youtube.let +11 -0
  287. data/ext/parsley/util.c +237 -0
  288. data/ext/parsley/util.h +34 -0
  289. data/ext/parsley/xml2json.c +47 -0
  290. data/ext/parsley/xml2json.h +14 -0
  291. data/ext/parsley/y.tab.h +222 -0
  292. data/ext/parsley/ylwrap +222 -0
  293. data/lib/parsley.rb +84 -0
  294. data/test/test_parsley.rb +120 -0
  295. data/test/yelp-benchmark.rb +53 -0
  296. data/test/yelp-home.html +1004 -0
  297. data/test/yelp-home.let +6 -0
  298. data/test/yelp.html +2329 -0
  299. metadata +366 -0
@@ -0,0 +1,11 @@
1
+ {
2
+ "video(.video-cell)": [ {
3
+ "thumbnail": ".vimg120 @src",
4
+ "title": ".video-short-title a",
5
+ "link": ".video-short-title a @href",
6
+ "posted": ".video-date-added",
7
+ "views": ".video-view-count",
8
+ "length": ".video-time span",
9
+ "rating": ".ratingVS @title"
10
+ } ]
11
+ }
@@ -0,0 +1,237 @@
1
+ #include "util.h"
2
+
3
+ static bool parsley_exslt_registered = false;
4
+
5
+ #define BUF 128
6
+
7
+ FILE* parsley_fopen(char* name, char* mode) {
8
+ FILE* fo;
9
+ if(!strcmp("-", name)) {
10
+ if(!strcmp("w", mode)) {
11
+ fo = stdout;
12
+ } else {
13
+ fo = stdin;
14
+ }
15
+ } else {
16
+ fo = fopen(name, mode);
17
+ }
18
+ if(fo == NULL) {
19
+ fprintf(stderr, "Cannot open file %s, error %d, %s\n", name, errno, strerror(errno));
20
+ exit(1);
21
+ }
22
+ return fo;
23
+ }
24
+
25
+ static int parsley_io_mode = 0;
26
+ static char *parsley_user_agent_header = NULL;
27
+
28
+ int
29
+ parsley_io_get_mode() {
30
+ return parsley_io_mode;
31
+ }
32
+
33
+ static xsltStylesheetPtr span_wrap_sheet = NULL;
34
+
35
+ xmlDocPtr
36
+ parsley_apply_span_wrap(xmlDocPtr doc) {
37
+ if(span_wrap_sheet == NULL) {
38
+ char * sheet = "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\" xmlns:sg=\"http://selectorgadget.com/\"> \
39
+ <xsl:template match=\"text()[(following-sibling::* or preceding-sibling::*) and normalize-space(.) != '']\"> \
40
+ <sg_wrap><xsl:value-of select=\".\" /></sg_wrap> \
41
+ </xsl:template> \
42
+ <xsl:template match=\"@*|node()\"> \
43
+ <xsl:copy> \
44
+ <xsl:apply-templates select=\"@*|node()\"/> \
45
+ </xsl:copy> \
46
+ </xsl:template> \
47
+ </xsl:stylesheet>";
48
+
49
+ xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
50
+ xmlDocPtr xml = xmlCtxtReadMemory(ctxt, sheet, strlen(sheet), NULL, NULL, 0);
51
+ span_wrap_sheet = xsltParseStylesheetDoc(xml);
52
+ }
53
+ xsltTransformContextPtr ctxt = xsltNewTransformContext(span_wrap_sheet, doc);
54
+ xmlSetGenericErrorFunc(ctxt, parsleyXsltError);
55
+ xmlDocPtr out = xsltApplyStylesheetUser(span_wrap_sheet, doc, NULL, NULL, NULL, ctxt);
56
+ xsltFreeTransformContext(ctxt);
57
+ return out;
58
+ }
59
+
60
+ void
61
+ _parsley_set_user_agent(char * agent) {
62
+ if(parsley_user_agent_header != NULL) free(parsley_user_agent_header);
63
+ if(agent == NULL) {
64
+ parsley_user_agent_header = NULL;
65
+ } else {
66
+ asprintf(&parsley_user_agent_header, "User-Agent: %s\n", agent);
67
+ }
68
+ }
69
+
70
+ static void *
71
+ xmlUserAgentIOHTTPOpen(const char * file_name) {
72
+ return (void *)(xmlNanoHTTPMethod(file_name, NULL, NULL, NULL, parsley_user_agent_header, 0));
73
+ }
74
+
75
+ void
76
+ parsley_io_set_mode(int mode) {
77
+ if(mode == parsley_io_mode) return;
78
+ parsley_io_mode = mode;
79
+
80
+ xmlCleanupInputCallbacks();
81
+
82
+ if(parsley_io_mode & PARSLEY_OPTIONS_ALLOW_LOCAL) {
83
+
84
+ xmlRegisterInputCallbacks(xmlFileMatch, xmlFileOpen,
85
+ xmlFileRead, xmlFileClose);
86
+ #ifdef HAVE_ZLIB_H
87
+ xmlRegisterInputCallbacks(xmlGzfileMatch, xmlGzfileOpen,
88
+ xmlGzfileRead, xmlGzfileClose);
89
+ #endif /* HAVE_ZLIB_H */
90
+ }
91
+ if(parsley_io_mode & PARSLEY_OPTIONS_ALLOW_NET) {
92
+ #ifdef LIBXML_HTTP_ENABLED
93
+ xmlRegisterInputCallbacks(xmlIOHTTPMatch, xmlUserAgentIOHTTPOpen,
94
+ xmlIOHTTPRead, xmlIOHTTPClose);
95
+ #endif /* LIBXML_HTTP_ENABLED */
96
+
97
+ #ifdef LIBXML_FTP_ENABLED
98
+ xmlRegisterInputCallbacks(xmlIOFTPMatch, xmlIOFTPOpen,
99
+ xmlIOFTPRead, xmlIOFTPClose);
100
+ #endif /* LIBXML_FTP_ENABLED */
101
+ }
102
+ }
103
+
104
+ void
105
+ printbuf_file_read(FILE *f, struct printbuf *buf) {
106
+ char chars[BUF];
107
+ while(fgets(chars, BUF, f) != NULL){
108
+ sprintbuf(buf, "%s", chars);
109
+ }
110
+ }
111
+
112
+ void registerEXSLT() {
113
+ if(!parsley_exslt_registered) {
114
+ exsltRegisterAll();
115
+ parsley_register_all();
116
+ init_xpath_alias();
117
+ exslt_org_regular_expressions_init();
118
+ parsley_exslt_registered = true;
119
+ }
120
+ }
121
+
122
+ int parsley_key_flags(char* key) {
123
+ char* ptr = key;
124
+ char* last_alnum = key;
125
+ char* last_paren = key;
126
+ while(*ptr++ != '\0'){
127
+ if(isalnum(*ptr)) {
128
+ last_alnum = ptr;
129
+ } else if (*ptr == ')') {
130
+ last_paren = ptr;
131
+ }
132
+ }
133
+ ptr = (last_alnum > last_paren ? last_alnum : last_paren);
134
+ int flags = 0;
135
+ while(*ptr++ != '\0'){
136
+ switch(*ptr){
137
+ case '?':
138
+ flags |= PARSLEY_OPTIONAL;
139
+ break;
140
+ case '!':
141
+ flags |= PARSLEY_BANG;
142
+ break;
143
+ }
144
+ }
145
+ return flags;
146
+ }
147
+
148
+ char* parsley_key_tag(char* key) {
149
+ char *tag = strdup(key);
150
+ char *ptr = tag;
151
+ while(*ptr++ != '\0'){
152
+ if(!isalnum(*ptr) && *ptr != '_' && *ptr != '-') {
153
+ *ptr = 0;
154
+ return tag;
155
+ }
156
+ }
157
+ return tag;
158
+ }
159
+
160
+ pxpathPtr parsley_key_filter(char* key) {
161
+ char *expr = strdup(key);
162
+ char *ptr = expr;
163
+ char *orig = expr;
164
+ char *last_paren;
165
+
166
+ int offset = 0;
167
+ bool has_expr = false;
168
+
169
+ while(*ptr++ != '\0'){
170
+ if(!has_expr) offset++;
171
+ if(*ptr == '(') has_expr = true;
172
+ if(*ptr == ')') last_paren = ptr;
173
+ }
174
+ if(!has_expr) return NULL;
175
+ *last_paren = 0; // clip ")"
176
+ expr += offset + 1; // clip "("
177
+
178
+ pxpathPtr out = strlen(expr) == 0 ? NULL : myparse(expr);
179
+ free(orig);
180
+ // free(expr);
181
+ return out;
182
+ }
183
+
184
+ static xmlNodePtr
185
+ _xmlLastElementChild(xmlNodePtr node) {
186
+ xmlNodePtr child = node->children;
187
+ xmlNodePtr elem = NULL;
188
+ while(child != NULL) {
189
+ if(child->type == XML_ELEMENT_NODE) elem = child;
190
+ child = child->next;
191
+ }
192
+ return elem;
193
+ }
194
+
195
+ xmlNodePtr new_stylesheet_skeleton(char *incl) {
196
+ struct printbuf *buf = printbuf_new();
197
+ sprintbuf(buf, "%s", "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"");
198
+ sprintbuf(buf, "%s", " xmlns:lib=\"http://parselets.com/stdlib\"");
199
+ sprintbuf(buf, "%s", " xmlns:parsley=\"http://parselets.com/json\"");
200
+ sprintbuf(buf, "%s", " xmlns:str=\"http://exslt.org/strings\"");
201
+ sprintbuf(buf, "%s", " xmlns:set=\"http://exslt.org/sets\"");
202
+ sprintbuf(buf, "%s", " xmlns:math=\"http://exslt.org/math\"");
203
+ sprintbuf(buf, "%s", " xmlns:func=\"http://exslt.org/functions\"");
204
+ sprintbuf(buf, "%s", " xmlns:user=\"http://parselets.com/usre\"");
205
+ sprintbuf(buf, "%s", " xmlns:dyn=\"http://exslt.org/dynamic\"");
206
+ sprintbuf(buf, "%s", " xmlns:date=\"http://exslt.org/dates-and-times\"");
207
+ sprintbuf(buf, "%s", " xmlns:exsl=\"http://exslt.org/common\"");
208
+ sprintbuf(buf, "%s", " xmlns:saxon=\"http://icl.com/saxon\"");
209
+ sprintbuf(buf, "%s", " xmlns:regexp=\"http://exslt.org/regular-expressions\"");
210
+ sprintbuf(buf, "%s", " xmlns:regex=\"http://exslt.org/regular-expressions\"");
211
+ sprintbuf(buf, "%s", " extension-element-prefixes=\"lib str math set func dyn exsl saxon user date regexp regex\"");
212
+ sprintbuf(buf, "%s", ">\n");
213
+ sprintbuf(buf, "%s", "<xsl:variable name=\"nbsp\">&#160;</xsl:variable>\n");
214
+ sprintbuf(buf, "%s", "<xsl:output method=\"xml\" indent=\"yes\"/>\n");
215
+ sprintbuf(buf, "%s", "<xsl:strip-space elements=\"*\"/>\n");
216
+ sprintbuf(buf, "%s", "<func:function name=\"lib:nl\"><xsl:param name=\"in\" select=\".\"/>");
217
+ sprintbuf(buf, "%s", "<xsl:variable name=\"out\"><xsl:apply-templates mode=\"innertext\" select=\"exsl:node-set($in)\"/></xsl:variable>");
218
+ sprintbuf(buf, "%s", "<func:result select=\"$out\" /></func:function>");
219
+ sprintbuf(buf, "%s", "<xsl:template match=\"text()\" mode=\"innertext\"><xsl:value-of select=\".\" /></xsl:template>");
220
+ sprintbuf(buf, "%s", "<xsl:template match=\"script|style\" mode=\"innertext\"/>");
221
+ sprintbuf(buf, "%s", "<xsl:template match=\"br|address|blockquote|center|dir|div|form|h1|h2|h3|h4|h5|h6|hr|menu|noframes|noscript|p|pre|li|td|th|p\" mode=\"innertext\"><xsl:apply-templates mode=\"innertext\" /><xsl:text>\n</xsl:text></xsl:template>");
222
+ sprintbuf(buf, "%s\n", incl);
223
+ sprintbuf(buf, "%s\n", "<xsl:template match=\"/\">\n");
224
+ sprintbuf(buf, "%s\n", "<parsley:root />\n");
225
+ sprintbuf(buf, "%s\n", "</xsl:template>\n");
226
+ sprintbuf(buf, "%s\n", "</xsl:stylesheet>\n");
227
+ xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
228
+ xmlDocPtr doc = xmlCtxtReadMemory(ctxt, buf->buf, buf->size, "http://parselets.com/compiled", NULL, 3);
229
+ xmlFreeParserCtxt(ctxt);
230
+ printbuf_free(buf);
231
+
232
+ xmlNodePtr node = xmlDocGetRootElement(doc);
233
+ while(_xmlLastElementChild(node) != NULL) {
234
+ node = _xmlLastElementChild(node);
235
+ }
236
+ return node;
237
+ }
@@ -0,0 +1,34 @@
1
+ #ifndef UTIL_H_INCLUDED
2
+ #define UTIL_H_INCLUDED
3
+
4
+ #include <stdio.h>
5
+ #include <json/json.h>
6
+ #include "parsed_xpath.h"
7
+ #include "parsley.h"
8
+ #include "parser.h"
9
+ #include "regexp.h"
10
+ #include <json/printbuf.h>
11
+ #include "functions.h"
12
+ #include <stdio.h>
13
+ #include <string.h>
14
+ #include <errno.h>
15
+ #include <stdbool.h>
16
+ #include <ctype.h>
17
+ #include <libexslt/exslt.h>
18
+ #include <libxml/xmlIO.h>
19
+
20
+ FILE* parsley_fopen(char*, char*);
21
+ xmlNodePtr new_stylesheet_skeleton(char *incl);
22
+ void registerEXSLT();
23
+ void printbuf_file_read(FILE *f, struct printbuf *buf);
24
+
25
+ int parsley_key_flags(char*);
26
+ char* parsley_key_tag(char*);
27
+ pxpathPtr parsley_key_filter(char*);
28
+ int parsley_io_get_mode();
29
+ void parsley_io_set_mode(int mode);
30
+ void _parsley_set_user_agent(char *agent);
31
+
32
+ xmlDocPtr parsley_apply_span_wrap(xmlDocPtr ptr);
33
+
34
+ #endif
@@ -0,0 +1,47 @@
1
+ #include "xml2json.h"
2
+
3
+ static struct json_object * _xml2json(xmlNodePtr xml) {
4
+ if(xml == NULL) return NULL;
5
+
6
+ xmlNodePtr child;
7
+ struct json_object * json = NULL;
8
+
9
+ switch(xml->type) {
10
+ case XML_ELEMENT_NODE:
11
+ child = xml->children;
12
+ if(xml->ns == NULL) {
13
+ child = xml;
14
+ // json_object_put(json);
15
+ json = json_object_new_object();
16
+ while(child != NULL) {
17
+ json_object_object_add(json, child->name, xml2json(child->children));
18
+ child = child->next;
19
+ }
20
+ } else if(!strcmp(xml->ns->prefix, "parsley")) {
21
+ if(!strcmp(xml->name, "groups")) {
22
+ // json_object_put(json);
23
+ json = json_object_new_array();
24
+ while(child != NULL) {
25
+ json_object_array_add(json, xml2json(child->children));
26
+ child = child->next;
27
+ }
28
+ } else if(!strcmp(xml->name, "group")) {
29
+ // Implicitly handled by parsley:groups handler
30
+ }
31
+ }
32
+ break;
33
+ case XML_TEXT_NODE:
34
+ json = json_object_new_string(xml->content);
35
+ break;
36
+ }
37
+ return json;
38
+ }
39
+
40
+ /**
41
+ * Handles a simplified xml
42
+ */
43
+ struct json_object * xml2json(xmlNodePtr xml) {
44
+ struct json_object * json = _xml2json(xml);
45
+ if(json == NULL) json = json_object_new_object();
46
+ return json;
47
+ }
@@ -0,0 +1,14 @@
1
+ #ifndef XML2JSON_H_INCLUDED
2
+ #define XML2JSON_H_INCLUDED
3
+
4
+ #include <string.h>
5
+ #include <stdbool.h>
6
+ #include <stdio.h>
7
+ #include <libxml/parser.h>
8
+ #include <libxml/tree.h>
9
+ #include <libxml/debugXML.h>
10
+ #include <json/json.h>
11
+
12
+ struct json_object * xml2json(xmlNodePtr);
13
+
14
+ #endif
@@ -0,0 +1,222 @@
1
+ /* A Bison parser, made by GNU Bison 2.3. */
2
+
3
+ /* Skeleton interface for Bison GLR parsers in C
4
+
5
+ Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
6
+
7
+ This program is free software; you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation; either version 2, or (at your option)
10
+ any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor,
20
+ Boston, MA 02110-1301, USA. */
21
+
22
+ /* As a special exception, you may create a larger work that contains
23
+ part or all of the Bison parser skeleton and distribute that work
24
+ under terms of your choice, so long as that work isn't itself a
25
+ parser generator using the skeleton or a modified version thereof
26
+ as a parser skeleton. Alternatively, if you modify or redistribute
27
+ the parser skeleton itself, you may (at your option) remove this
28
+ special exception, which will cause the skeleton and the resulting
29
+ Bison output files to be licensed under the GNU General Public
30
+ License without this special exception.
31
+
32
+ This special exception was added by the Free Software Foundation in
33
+ version 2.2 of Bison. */
34
+
35
+ /* Tokens. */
36
+ #ifndef YYTOKENTYPE
37
+ # define YYTOKENTYPE
38
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
39
+ know about them. */
40
+ enum yytokentype {
41
+ NUMBER = 258,
42
+ S = 259,
43
+ AT = 260,
44
+ LPAREN = 261,
45
+ RPAREN = 262,
46
+ PIPE = 263,
47
+ LT = 264,
48
+ SLASH = 265,
49
+ DBLSLASH = 266,
50
+ BANG = 267,
51
+ COLON = 268,
52
+ DBLCOLON = 269,
53
+ QUERY = 270,
54
+ HASH = 271,
55
+ COMMA = 272,
56
+ DOT = 273,
57
+ DBLDOT = 274,
58
+ GT = 275,
59
+ LBRA = 276,
60
+ RBRA = 277,
61
+ TILDE = 278,
62
+ SPLAT = 279,
63
+ PLUS = 280,
64
+ DASH = 281,
65
+ EQ = 282,
66
+ LTE = 283,
67
+ GTE = 284,
68
+ DOLLAR = 285,
69
+ BSLASHLIT = 286,
70
+ OTHER = 287,
71
+ XANCESTOR = 288,
72
+ XANCESTORSELF = 289,
73
+ XATTR = 290,
74
+ XCHILD = 291,
75
+ XDESC = 292,
76
+ XDESCSELF = 293,
77
+ XFOLLOW = 294,
78
+ XFOLLOWSIB = 295,
79
+ XNS = 296,
80
+ XPARENT = 297,
81
+ XPRE = 298,
82
+ XPRESIB = 299,
83
+ XSELF = 300,
84
+ XOR = 301,
85
+ XAND = 302,
86
+ XDIV = 303,
87
+ XMOD = 304,
88
+ XCOMMENT = 305,
89
+ XTEXT = 306,
90
+ XPI = 307,
91
+ XNODE = 308,
92
+ CXEQUATION = 309,
93
+ CXOPHE = 310,
94
+ CXOPNE = 311,
95
+ CXOPSTARTEQ = 312,
96
+ CXOPENDEQ = 313,
97
+ CXOPCONTAINS = 314,
98
+ CXOPCONTAINS2 = 315,
99
+ CXFIRST = 316,
100
+ CXLAST = 317,
101
+ CXNOT = 318,
102
+ CXEVEN = 319,
103
+ CXODD = 320,
104
+ CXEQ = 321,
105
+ CXGT = 322,
106
+ CXLT = 323,
107
+ CXHEADER = 324,
108
+ CXCONTAINS = 325,
109
+ CXEMPTY = 326,
110
+ CXHAS = 327,
111
+ CXPARENT = 328,
112
+ CXNTHCH = 329,
113
+ CXNTHLASTCH = 330,
114
+ CXNTHTYPE = 331,
115
+ CXNTHLASTTYPE = 332,
116
+ CXFIRSTCH = 333,
117
+ CXLASTCH = 334,
118
+ CXFIRSTTYPE = 335,
119
+ CXLASTTYPE = 336,
120
+ CXONLYCH = 337,
121
+ CXONLYTYPE = 338,
122
+ CXINPUT = 339,
123
+ CXTEXT = 340,
124
+ CXPASSWORD = 341,
125
+ CXRADIO = 342,
126
+ CXCHECKBOX = 343,
127
+ CXSUBMIT = 344,
128
+ CXIMAGE = 345,
129
+ CXRESET = 346,
130
+ CXBUTTON = 347,
131
+ CXFILE = 348,
132
+ CXENABLED = 349,
133
+ CXDISABLED = 350,
134
+ CXCHECKED = 351,
135
+ CXSELECTED = 352,
136
+ NAME = 353,
137
+ STRING = 354
138
+ };
139
+ #endif
140
+
141
+
142
+ /* Copy the first part of user declarations. */
143
+ #line 1 "parser.y"
144
+
145
+ #include <math.h>
146
+ #include <stdio.h>
147
+ #include <stdlib.h>
148
+ #include <string.h>
149
+ #include "parsed_xpath.h"
150
+ #include <libxml/hash.h>
151
+
152
+ #ifndef PARSER_Y_H_INCLUDED
153
+ #define PARSER_Y_H_INCLUDED
154
+
155
+ static pxpathPtr parsed_answer;
156
+
157
+ int yylex (void);
158
+ void yyerror (char const *);
159
+
160
+ void prepare_parse(char*);
161
+ void cleanup_parse(void);
162
+ void start_debugging(void);
163
+
164
+ static xmlHashTablePtr alias_hash;
165
+
166
+ char* xpath_alias(char*);
167
+ void init_xpath_alias();
168
+
169
+ int yyparse(void);
170
+ pxpathPtr myparse(char*);
171
+ void answer(pxpathPtr);
172
+
173
+ #define LIT_BIN_OP(A, B, C) pxpath_cat_literals(3, A, LIT(B), C)
174
+ #define BIN_OP(A, B, C) pxpath_cat_paths(3, A, OP(B), C)
175
+ #define PREP_OP(A, B) pxpath_cat_paths(2, OP(A), B)
176
+ #define PXP(A) pxpath_new_path(1, A)
177
+ #define LIT(A) pxpath_new_literal(1, A)
178
+ #define OP(A) pxpath_new_operator(1, A)
179
+ #define APPEND(A, S) pxpath_cat_paths(2, A, PXP(S));
180
+ #define PREPEND(A, S) pxpath_cat_paths(2, PXP(S), A);
181
+ #define PXPWRAP(A, B, C) pxpath_cat_paths(3, PXP(A), B, PXP(C))
182
+ #define P4E(A, B, C, D) pxpath_cat_paths(4, A, PXP(B), C, PXP(D))
183
+ #define P4O(A, B, C, D) pxpath_cat_paths(4, PXP(A), B, PXP(C), D)
184
+ #define P6E(A, B, C, D, E, F) pxpath_cat_paths(6, A, PXP(B), C, PXP(D), E, PXP(F));
185
+ #define INPUT_TYPE(A, S) APPEND(A, "[lower-case(name())='input' and lower-case(@type)='" #S "']")
186
+ #define TRACE(A, B) fprintf(stderr, "trace(%s): ", A); fprintf(stderr, "%s\n", pxpath_to_string(B));
187
+
188
+ #endif
189
+
190
+
191
+
192
+ #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
193
+ typedef union YYSTYPE
194
+ #line 53 "parser.y"
195
+ {
196
+ int empty;
197
+ char* string;
198
+ pxpathPtr node;
199
+ }
200
+ /* Line 2616 of glr.c. */
201
+ #line 202 "y.tab.h"
202
+ YYSTYPE;
203
+ # define YYSTYPE_IS_DECLARED 1
204
+ # define YYSTYPE_IS_TRIVIAL 1
205
+ #endif
206
+
207
+ #if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
208
+ typedef struct YYLTYPE
209
+ {
210
+
211
+ char yydummy;
212
+
213
+ } YYLTYPE;
214
+ # define YYLTYPE_IS_DECLARED 1
215
+ # define YYLTYPE_IS_TRIVIAL 1
216
+ #endif
217
+
218
+
219
+ extern YYSTYPE yylval;
220
+
221
+
222
+