gtl-parsley-ruby 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. data/CHANGELOG +3 -0
  2. data/README +32 -0
  3. data/Rakefile +57 -0
  4. data/VERSION +1 -0
  5. data/ext/cparsley.c +152 -0
  6. data/ext/extconf.rb +82 -0
  7. data/ext/parsley/.gitignore +32 -0
  8. data/ext/parsley/AUTHORS +1 -0
  9. data/ext/parsley/ChangeLog +0 -0
  10. data/ext/parsley/HACKING +4 -0
  11. data/ext/parsley/INSTALL +73 -0
  12. data/ext/parsley/INTRO +84 -0
  13. data/ext/parsley/Makefile.am +80 -0
  14. data/ext/parsley/Makefile.in +1009 -0
  15. data/ext/parsley/NEWS +0 -0
  16. data/ext/parsley/PAPER +36 -0
  17. data/ext/parsley/Portfile +18 -0
  18. data/ext/parsley/Portfile.in +17 -0
  19. data/ext/parsley/README.C-LANG +92 -0
  20. data/ext/parsley/README.markdown +1 -0
  21. data/ext/parsley/TODO +39 -0
  22. data/ext/parsley/VERSION +1 -0
  23. data/ext/parsley/aclocal.m4 +8918 -0
  24. data/ext/parsley/bootstrap.sh +6 -0
  25. data/ext/parsley/config.guess +1561 -0
  26. data/ext/parsley/config.sub +1686 -0
  27. data/ext/parsley/configure +13437 -0
  28. data/ext/parsley/configure.ac +46 -0
  29. data/ext/parsley/depcomp +630 -0
  30. data/ext/parsley/functions.c +368 -0
  31. data/ext/parsley/functions.h +19 -0
  32. data/ext/parsley/generate_bisect.sh +12 -0
  33. data/ext/parsley/hooks/prepare-commit-msg +16 -0
  34. data/ext/parsley/install-sh +520 -0
  35. data/ext/parsley/json-c-0.9/AUTHORS +2 -0
  36. data/ext/parsley/json-c-0.9/COPYING +19 -0
  37. data/ext/parsley/json-c-0.9/ChangeLog +103 -0
  38. data/ext/parsley/json-c-0.9/INSTALL +302 -0
  39. data/ext/parsley/json-c-0.9/Makefile.am +43 -0
  40. data/ext/parsley/json-c-0.9/Makefile.in +800 -0
  41. data/ext/parsley/json-c-0.9/NEWS +1 -0
  42. data/ext/parsley/json-c-0.9/README +20 -0
  43. data/ext/parsley/json-c-0.9/README-WIN32.html +57 -0
  44. data/ext/parsley/json-c-0.9/README.html +32 -0
  45. data/ext/parsley/json-c-0.9/aclocal.m4 +8909 -0
  46. data/ext/parsley/json-c-0.9/arraylist.c +94 -0
  47. data/ext/parsley/json-c-0.9/arraylist.h +53 -0
  48. data/ext/parsley/json-c-0.9/bits.h +27 -0
  49. data/ext/parsley/json-c-0.9/config.guess +1561 -0
  50. data/ext/parsley/json-c-0.9/config.h +125 -0
  51. data/ext/parsley/json-c-0.9/config.h.in +124 -0
  52. data/ext/parsley/json-c-0.9/config.h.win32 +94 -0
  53. data/ext/parsley/json-c-0.9/config.sub +1686 -0
  54. data/ext/parsley/json-c-0.9/configure +13084 -0
  55. data/ext/parsley/json-c-0.9/configure.in +33 -0
  56. data/ext/parsley/json-c-0.9/debug.c +98 -0
  57. data/ext/parsley/json-c-0.9/debug.h +50 -0
  58. data/ext/parsley/json-c-0.9/depcomp +630 -0
  59. data/ext/parsley/json-c-0.9/doc/html/annotated.html +40 -0
  60. data/ext/parsley/json-c-0.9/doc/html/arraylist_8h.html +240 -0
  61. data/ext/parsley/json-c-0.9/doc/html/bits_8h.html +150 -0
  62. data/ext/parsley/json-c-0.9/doc/html/classes.html +36 -0
  63. data/ext/parsley/json-c-0.9/doc/html/config_8h.html +612 -0
  64. data/ext/parsley/json-c-0.9/doc/html/debug_8h.html +392 -0
  65. data/ext/parsley/json-c-0.9/doc/html/doxygen.css +441 -0
  66. data/ext/parsley/json-c-0.9/doc/html/doxygen.png +0 -0
  67. data/ext/parsley/json-c-0.9/doc/html/files.html +42 -0
  68. data/ext/parsley/json-c-0.9/doc/html/functions.html +206 -0
  69. data/ext/parsley/json-c-0.9/doc/html/functions_vars.html +206 -0
  70. data/ext/parsley/json-c-0.9/doc/html/globals.html +459 -0
  71. data/ext/parsley/json-c-0.9/doc/html/globals_defs.html +202 -0
  72. data/ext/parsley/json-c-0.9/doc/html/globals_enum.html +50 -0
  73. data/ext/parsley/json-c-0.9/doc/html/globals_eval.html +135 -0
  74. data/ext/parsley/json-c-0.9/doc/html/globals_func.html +194 -0
  75. data/ext/parsley/json-c-0.9/doc/html/globals_type.html +70 -0
  76. data/ext/parsley/json-c-0.9/doc/html/globals_vars.html +50 -0
  77. data/ext/parsley/json-c-0.9/doc/html/index.html +25 -0
  78. data/ext/parsley/json-c-0.9/doc/html/json_8h.html +32 -0
  79. data/ext/parsley/json-c-0.9/doc/html/json__object_8h.html +1150 -0
  80. data/ext/parsley/json-c-0.9/doc/html/json__object__private_8h.html +75 -0
  81. data/ext/parsley/json-c-0.9/doc/html/json__tokener_8h.html +366 -0
  82. data/ext/parsley/json-c-0.9/doc/html/json__util_8h.html +106 -0
  83. data/ext/parsley/json-c-0.9/doc/html/linkhash_8h.html +740 -0
  84. data/ext/parsley/json-c-0.9/doc/html/printbuf_8h.html +214 -0
  85. data/ext/parsley/json-c-0.9/doc/html/structarray__list.html +104 -0
  86. data/ext/parsley/json-c-0.9/doc/html/structjson__object.html +141 -0
  87. data/ext/parsley/json-c-0.9/doc/html/structjson__object__iter.html +87 -0
  88. data/ext/parsley/json-c-0.9/doc/html/structjson__tokener.html +206 -0
  89. data/ext/parsley/json-c-0.9/doc/html/structjson__tokener__srec.html +104 -0
  90. data/ext/parsley/json-c-0.9/doc/html/structlh__entry.html +105 -0
  91. data/ext/parsley/json-c-0.9/doc/html/structlh__table.html +275 -0
  92. data/ext/parsley/json-c-0.9/doc/html/structprintbuf.html +87 -0
  93. data/ext/parsley/json-c-0.9/doc/html/tab_b.gif +0 -0
  94. data/ext/parsley/json-c-0.9/doc/html/tab_l.gif +0 -0
  95. data/ext/parsley/json-c-0.9/doc/html/tab_r.gif +0 -0
  96. data/ext/parsley/json-c-0.9/doc/html/tabs.css +105 -0
  97. data/ext/parsley/json-c-0.9/doc/html/unionjson__object_1_1data.html +140 -0
  98. data/ext/parsley/json-c-0.9/install-sh +520 -0
  99. data/ext/parsley/json-c-0.9/json.h +31 -0
  100. data/ext/parsley/json-c-0.9/json.pc +11 -0
  101. data/ext/parsley/json-c-0.9/json.pc.in +11 -0
  102. data/ext/parsley/json-c-0.9/json_object.c +512 -0
  103. data/ext/parsley/json-c-0.9/json_object.h +319 -0
  104. data/ext/parsley/json-c-0.9/json_object_private.h +52 -0
  105. data/ext/parsley/json-c-0.9/json_tokener.c +628 -0
  106. data/ext/parsley/json-c-0.9/json_tokener.h +98 -0
  107. data/ext/parsley/json-c-0.9/json_util.c +122 -0
  108. data/ext/parsley/json-c-0.9/json_util.h +31 -0
  109. data/ext/parsley/json-c-0.9/libjson.la +41 -0
  110. data/ext/parsley/json-c-0.9/libtool +8890 -0
  111. data/ext/parsley/json-c-0.9/linkhash.c +216 -0
  112. data/ext/parsley/json-c-0.9/linkhash.h +272 -0
  113. data/ext/parsley/json-c-0.9/ltmain.sh +8406 -0
  114. data/ext/parsley/json-c-0.9/missing +376 -0
  115. data/ext/parsley/json-c-0.9/printbuf.c +149 -0
  116. data/ext/parsley/json-c-0.9/printbuf.h +64 -0
  117. data/ext/parsley/json-c-0.9/stamp-h1 +1 -0
  118. data/ext/parsley/json-c-0.9/test1 +130 -0
  119. data/ext/parsley/json-c-0.9/test1.c +164 -0
  120. data/ext/parsley/json-c-0.9/test2 +130 -0
  121. data/ext/parsley/json-c-0.9/test2.c +20 -0
  122. data/ext/parsley/json-c-0.9/test3 +130 -0
  123. data/ext/parsley/json-c-0.9/test3.c +23 -0
  124. data/ext/parsley/libtool +8890 -0
  125. data/ext/parsley/ltmain.sh +8406 -0
  126. data/ext/parsley/missing +376 -0
  127. data/ext/parsley/parsed_xpath.c +168 -0
  128. data/ext/parsley/parsed_xpath.h +34 -0
  129. data/ext/parsley/parser.y +631 -0
  130. data/ext/parsley/parsley.c +793 -0
  131. data/ext/parsley/parsley.h +87 -0
  132. data/ext/parsley/parsley_main.c +185 -0
  133. data/ext/parsley/parsleyc_main.c +108 -0
  134. data/ext/parsley/regexp.c +359 -0
  135. data/ext/parsley/regexp.h +36 -0
  136. data/ext/parsley/scanner.l +221 -0
  137. data/ext/parsley/test/ambiguous.html +207 -0
  138. data/ext/parsley/test/ambiguous.json +1 -0
  139. data/ext/parsley/test/ambiguous.let +6 -0
  140. data/ext/parsley/test/array-regression.html +5 -0
  141. data/ext/parsley/test/array-regression.json +1 -0
  142. data/ext/parsley/test/array-regression.let +10 -0
  143. data/ext/parsley/test/backslash.html +5 -0
  144. data/ext/parsley/test/backslash.json +1 -0
  145. data/ext/parsley/test/backslash.let +3 -0
  146. data/ext/parsley/test/bang.html +17 -0
  147. data/ext/parsley/test/bang.json +1 -0
  148. data/ext/parsley/test/bang.let +6 -0
  149. data/ext/parsley/test/collate_regression.html +324 -0
  150. data/ext/parsley/test/collate_regression.json +1 -0
  151. data/ext/parsley/test/collate_regression.let +9 -0
  152. data/ext/parsley/test/contains.html +3 -0
  153. data/ext/parsley/test/contains.json +1 -0
  154. data/ext/parsley/test/contains.let +3 -0
  155. data/ext/parsley/test/content.html +13 -0
  156. data/ext/parsley/test/content.json +1 -0
  157. data/ext/parsley/test/content.let +7 -0
  158. data/ext/parsley/test/cool.html +575 -0
  159. data/ext/parsley/test/cool.json +1 -0
  160. data/ext/parsley/test/cool.let +9 -0
  161. data/ext/parsley/test/craigs-simple.html +207 -0
  162. data/ext/parsley/test/craigs-simple.json +1 -0
  163. data/ext/parsley/test/craigs-simple.let +6 -0
  164. data/ext/parsley/test/craigs.html +207 -0
  165. data/ext/parsley/test/craigs.json +1 -0
  166. data/ext/parsley/test/craigs.let +9 -0
  167. data/ext/parsley/test/crash.html +157 -0
  168. data/ext/parsley/test/crash.json +1 -0
  169. data/ext/parsley/test/crash.let +1 -0
  170. data/ext/parsley/test/css_attr.html +3 -0
  171. data/ext/parsley/test/css_attr.json +1 -0
  172. data/ext/parsley/test/css_attr.let +3 -0
  173. data/ext/parsley/test/default-namespace.json +1 -0
  174. data/ext/parsley/test/default-namespace.let +3 -0
  175. data/ext/parsley/test/default-namespace.xml +1493 -0
  176. data/ext/parsley/test/div.html +8 -0
  177. data/ext/parsley/test/div.json +1 -0
  178. data/ext/parsley/test/div.let +10 -0
  179. data/ext/parsley/test/empty.html +3 -0
  180. data/ext/parsley/test/empty.json +1 -0
  181. data/ext/parsley/test/empty.let +1 -0
  182. data/ext/parsley/test/emptyish.html +207 -0
  183. data/ext/parsley/test/emptyish.let +3 -0
  184. data/ext/parsley/test/fictional-opt.html +43 -0
  185. data/ext/parsley/test/fictional-opt.json +1 -0
  186. data/ext/parsley/test/fictional-opt.let +14 -0
  187. data/ext/parsley/test/fictional.html +43 -0
  188. data/ext/parsley/test/fictional.json +1 -0
  189. data/ext/parsley/test/fictional.let +14 -0
  190. data/ext/parsley/test/function-magic.html +9 -0
  191. data/ext/parsley/test/function-magic.json +1 -0
  192. data/ext/parsley/test/function-magic.let +8 -0
  193. data/ext/parsley/test/hn.html +32 -0
  194. data/ext/parsley/test/hn.json +1 -0
  195. data/ext/parsley/test/hn.let +8 -0
  196. data/ext/parsley/test/malformed-array.html +2329 -0
  197. data/ext/parsley/test/malformed-array.json +1 -0
  198. data/ext/parsley/test/malformed-array.let +22 -0
  199. data/ext/parsley/test/malformed-expr.html +2329 -0
  200. data/ext/parsley/test/malformed-expr.json +1 -0
  201. data/ext/parsley/test/malformed-expr.let +16 -0
  202. data/ext/parsley/test/malformed-function.html +845 -0
  203. data/ext/parsley/test/malformed-function.json +197 -0
  204. data/ext/parsley/test/malformed-function.let +8 -0
  205. data/ext/parsley/test/malformed-json.html +2329 -0
  206. data/ext/parsley/test/malformed-json.json +1 -0
  207. data/ext/parsley/test/malformed-json.let +6 -0
  208. data/ext/parsley/test/malformed-xpath.html +8 -0
  209. data/ext/parsley/test/malformed-xpath.json +1 -0
  210. data/ext/parsley/test/malformed-xpath.let +7 -0
  211. data/ext/parsley/test/match.json +1 -0
  212. data/ext/parsley/test/match.let +9 -0
  213. data/ext/parsley/test/match.xml +11 -0
  214. data/ext/parsley/test/math_ambiguity.html +9 -0
  215. data/ext/parsley/test/math_ambiguity.json +1 -0
  216. data/ext/parsley/test/math_ambiguity.let +5 -0
  217. data/ext/parsley/test/nth-regression.html +13 -0
  218. data/ext/parsley/test/nth-regression.json +1 -0
  219. data/ext/parsley/test/nth-regression.let +3 -0
  220. data/ext/parsley/test/optional.html +2328 -0
  221. data/ext/parsley/test/optional.json +1 -0
  222. data/ext/parsley/test/optional.let +8 -0
  223. data/ext/parsley/test/outer-xml.html +6 -0
  224. data/ext/parsley/test/outer-xml.json +1 -0
  225. data/ext/parsley/test/outer-xml.let +5 -0
  226. data/ext/parsley/test/position.html +8 -0
  227. data/ext/parsley/test/position.json +1 -0
  228. data/ext/parsley/test/position.let +6 -0
  229. data/ext/parsley/test/question_regressions.html +443 -0
  230. data/ext/parsley/test/question_regressions.json +1 -0
  231. data/ext/parsley/test/question_regressions.let +6 -0
  232. data/ext/parsley/test/quote.json +1 -0
  233. data/ext/parsley/test/quote.let +8 -0
  234. data/ext/parsley/test/quote.xml +11 -0
  235. data/ext/parsley/test/reddit.html +1 -0
  236. data/ext/parsley/test/reddit.json +1 -0
  237. data/ext/parsley/test/reddit.let +12 -0
  238. data/ext/parsley/test/remote-fail.json +1 -0
  239. data/ext/parsley/test/remote.html +3 -0
  240. data/ext/parsley/test/remote.json +1 -0
  241. data/ext/parsley/test/remote.let +4 -0
  242. data/ext/parsley/test/replace.json +1 -0
  243. data/ext/parsley/test/replace.let +9 -0
  244. data/ext/parsley/test/replace.xml +11 -0
  245. data/ext/parsley/test/scope.html +10 -0
  246. data/ext/parsley/test/scope.json +1 -0
  247. data/ext/parsley/test/scope.let +6 -0
  248. data/ext/parsley/test/segfault.html +5 -0
  249. data/ext/parsley/test/segfault.json +1 -0
  250. data/ext/parsley/test/segfault.let +9 -0
  251. data/ext/parsley/test/sg-wrap.html +5 -0
  252. data/ext/parsley/test/sg-wrap.json +1 -0
  253. data/ext/parsley/test/sg-wrap.let +3 -0
  254. data/ext/parsley/test/sg_off.html +5 -0
  255. data/ext/parsley/test/sg_off.json +1 -0
  256. data/ext/parsley/test/sg_off.let +3 -0
  257. data/ext/parsley/test/test.json +1 -0
  258. data/ext/parsley/test/test.let +6 -0
  259. data/ext/parsley/test/test.xml +11 -0
  260. data/ext/parsley/test/trivial.html +2329 -0
  261. data/ext/parsley/test/trivial.json +1 -0
  262. data/ext/parsley/test/trivial.let +4 -0
  263. data/ext/parsley/test/trivial2.html +2329 -0
  264. data/ext/parsley/test/trivial2.json +1 -0
  265. data/ext/parsley/test/trivial2.let +7 -0
  266. data/ext/parsley/test/unbang.html +17 -0
  267. data/ext/parsley/test/unbang.json +1 -0
  268. data/ext/parsley/test/unbang.let +6 -0
  269. data/ext/parsley/test/unicode.html +3 -0
  270. data/ext/parsley/test/unicode.json +1 -0
  271. data/ext/parsley/test/unicode.let +1 -0
  272. data/ext/parsley/test/whitespace.html +8 -0
  273. data/ext/parsley/test/whitespace.json +1 -0
  274. data/ext/parsley/test/whitespace.let +3 -0
  275. data/ext/parsley/test/whitespace_regression.html +4 -0
  276. data/ext/parsley/test/whitespace_regression.json +1 -0
  277. data/ext/parsley/test/whitespace_regression.let +3 -0
  278. data/ext/parsley/test/yelp-benchmark.rb +53 -0
  279. data/ext/parsley/test/yelp-home.html +1004 -0
  280. data/ext/parsley/test/yelp-home.json +1 -0
  281. data/ext/parsley/test/yelp-home.let +6 -0
  282. data/ext/parsley/test/yelp.html +2329 -0
  283. data/ext/parsley/test/yelp.json +1 -0
  284. data/ext/parsley/test/yelp.let +12 -0
  285. data/ext/parsley/test/youtube.html +1940 -0
  286. data/ext/parsley/test/youtube.let +11 -0
  287. data/ext/parsley/util.c +237 -0
  288. data/ext/parsley/util.h +34 -0
  289. data/ext/parsley/xml2json.c +47 -0
  290. data/ext/parsley/xml2json.h +14 -0
  291. data/ext/parsley/y.tab.h +222 -0
  292. data/ext/parsley/ylwrap +222 -0
  293. data/lib/parsley.rb +84 -0
  294. data/test/test_parsley.rb +120 -0
  295. data/test/yelp-benchmark.rb +53 -0
  296. data/test/yelp-home.html +1004 -0
  297. data/test/yelp-home.let +6 -0
  298. data/test/yelp.html +2329 -0
  299. metadata +366 -0
@@ -0,0 +1,11 @@
1
+ {
2
+ "video(.video-cell)": [ {
3
+ "thumbnail": ".vimg120 @src",
4
+ "title": ".video-short-title a",
5
+ "link": ".video-short-title a @href",
6
+ "posted": ".video-date-added",
7
+ "views": ".video-view-count",
8
+ "length": ".video-time span",
9
+ "rating": ".ratingVS @title"
10
+ } ]
11
+ }
@@ -0,0 +1,237 @@
1
+ #include "util.h"
2
+
3
+ static bool parsley_exslt_registered = false;
4
+
5
+ #define BUF 128
6
+
7
+ FILE* parsley_fopen(char* name, char* mode) {
8
+ FILE* fo;
9
+ if(!strcmp("-", name)) {
10
+ if(!strcmp("w", mode)) {
11
+ fo = stdout;
12
+ } else {
13
+ fo = stdin;
14
+ }
15
+ } else {
16
+ fo = fopen(name, mode);
17
+ }
18
+ if(fo == NULL) {
19
+ fprintf(stderr, "Cannot open file %s, error %d, %s\n", name, errno, strerror(errno));
20
+ exit(1);
21
+ }
22
+ return fo;
23
+ }
24
+
25
+ static int parsley_io_mode = 0;
26
+ static char *parsley_user_agent_header = NULL;
27
+
28
+ int
29
+ parsley_io_get_mode() {
30
+ return parsley_io_mode;
31
+ }
32
+
33
+ static xsltStylesheetPtr span_wrap_sheet = NULL;
34
+
35
+ xmlDocPtr
36
+ parsley_apply_span_wrap(xmlDocPtr doc) {
37
+ if(span_wrap_sheet == NULL) {
38
+ char * sheet = "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\" xmlns:sg=\"http://selectorgadget.com/\"> \
39
+ <xsl:template match=\"text()[(following-sibling::* or preceding-sibling::*) and normalize-space(.) != '']\"> \
40
+ <sg_wrap><xsl:value-of select=\".\" /></sg_wrap> \
41
+ </xsl:template> \
42
+ <xsl:template match=\"@*|node()\"> \
43
+ <xsl:copy> \
44
+ <xsl:apply-templates select=\"@*|node()\"/> \
45
+ </xsl:copy> \
46
+ </xsl:template> \
47
+ </xsl:stylesheet>";
48
+
49
+ xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
50
+ xmlDocPtr xml = xmlCtxtReadMemory(ctxt, sheet, strlen(sheet), NULL, NULL, 0);
51
+ span_wrap_sheet = xsltParseStylesheetDoc(xml);
52
+ }
53
+ xsltTransformContextPtr ctxt = xsltNewTransformContext(span_wrap_sheet, doc);
54
+ xmlSetGenericErrorFunc(ctxt, parsleyXsltError);
55
+ xmlDocPtr out = xsltApplyStylesheetUser(span_wrap_sheet, doc, NULL, NULL, NULL, ctxt);
56
+ xsltFreeTransformContext(ctxt);
57
+ return out;
58
+ }
59
+
60
+ void
61
+ _parsley_set_user_agent(char * agent) {
62
+ if(parsley_user_agent_header != NULL) free(parsley_user_agent_header);
63
+ if(agent == NULL) {
64
+ parsley_user_agent_header = NULL;
65
+ } else {
66
+ asprintf(&parsley_user_agent_header, "User-Agent: %s\n", agent);
67
+ }
68
+ }
69
+
70
+ static void *
71
+ xmlUserAgentIOHTTPOpen(const char * file_name) {
72
+ return (void *)(xmlNanoHTTPMethod(file_name, NULL, NULL, NULL, parsley_user_agent_header, 0));
73
+ }
74
+
75
+ void
76
+ parsley_io_set_mode(int mode) {
77
+ if(mode == parsley_io_mode) return;
78
+ parsley_io_mode = mode;
79
+
80
+ xmlCleanupInputCallbacks();
81
+
82
+ if(parsley_io_mode & PARSLEY_OPTIONS_ALLOW_LOCAL) {
83
+
84
+ xmlRegisterInputCallbacks(xmlFileMatch, xmlFileOpen,
85
+ xmlFileRead, xmlFileClose);
86
+ #ifdef HAVE_ZLIB_H
87
+ xmlRegisterInputCallbacks(xmlGzfileMatch, xmlGzfileOpen,
88
+ xmlGzfileRead, xmlGzfileClose);
89
+ #endif /* HAVE_ZLIB_H */
90
+ }
91
+ if(parsley_io_mode & PARSLEY_OPTIONS_ALLOW_NET) {
92
+ #ifdef LIBXML_HTTP_ENABLED
93
+ xmlRegisterInputCallbacks(xmlIOHTTPMatch, xmlUserAgentIOHTTPOpen,
94
+ xmlIOHTTPRead, xmlIOHTTPClose);
95
+ #endif /* LIBXML_HTTP_ENABLED */
96
+
97
+ #ifdef LIBXML_FTP_ENABLED
98
+ xmlRegisterInputCallbacks(xmlIOFTPMatch, xmlIOFTPOpen,
99
+ xmlIOFTPRead, xmlIOFTPClose);
100
+ #endif /* LIBXML_FTP_ENABLED */
101
+ }
102
+ }
103
+
104
+ void
105
+ printbuf_file_read(FILE *f, struct printbuf *buf) {
106
+ char chars[BUF];
107
+ while(fgets(chars, BUF, f) != NULL){
108
+ sprintbuf(buf, "%s", chars);
109
+ }
110
+ }
111
+
112
+ void registerEXSLT() {
113
+ if(!parsley_exslt_registered) {
114
+ exsltRegisterAll();
115
+ parsley_register_all();
116
+ init_xpath_alias();
117
+ exslt_org_regular_expressions_init();
118
+ parsley_exslt_registered = true;
119
+ }
120
+ }
121
+
122
+ int parsley_key_flags(char* key) {
123
+ char* ptr = key;
124
+ char* last_alnum = key;
125
+ char* last_paren = key;
126
+ while(*ptr++ != '\0'){
127
+ if(isalnum(*ptr)) {
128
+ last_alnum = ptr;
129
+ } else if (*ptr == ')') {
130
+ last_paren = ptr;
131
+ }
132
+ }
133
+ ptr = (last_alnum > last_paren ? last_alnum : last_paren);
134
+ int flags = 0;
135
+ while(*ptr++ != '\0'){
136
+ switch(*ptr){
137
+ case '?':
138
+ flags |= PARSLEY_OPTIONAL;
139
+ break;
140
+ case '!':
141
+ flags |= PARSLEY_BANG;
142
+ break;
143
+ }
144
+ }
145
+ return flags;
146
+ }
147
+
148
+ char* parsley_key_tag(char* key) {
149
+ char *tag = strdup(key);
150
+ char *ptr = tag;
151
+ while(*ptr++ != '\0'){
152
+ if(!isalnum(*ptr) && *ptr != '_' && *ptr != '-') {
153
+ *ptr = 0;
154
+ return tag;
155
+ }
156
+ }
157
+ return tag;
158
+ }
159
+
160
+ pxpathPtr parsley_key_filter(char* key) {
161
+ char *expr = strdup(key);
162
+ char *ptr = expr;
163
+ char *orig = expr;
164
+ char *last_paren;
165
+
166
+ int offset = 0;
167
+ bool has_expr = false;
168
+
169
+ while(*ptr++ != '\0'){
170
+ if(!has_expr) offset++;
171
+ if(*ptr == '(') has_expr = true;
172
+ if(*ptr == ')') last_paren = ptr;
173
+ }
174
+ if(!has_expr) return NULL;
175
+ *last_paren = 0; // clip ")"
176
+ expr += offset + 1; // clip "("
177
+
178
+ pxpathPtr out = strlen(expr) == 0 ? NULL : myparse(expr);
179
+ free(orig);
180
+ // free(expr);
181
+ return out;
182
+ }
183
+
184
+ static xmlNodePtr
185
+ _xmlLastElementChild(xmlNodePtr node) {
186
+ xmlNodePtr child = node->children;
187
+ xmlNodePtr elem = NULL;
188
+ while(child != NULL) {
189
+ if(child->type == XML_ELEMENT_NODE) elem = child;
190
+ child = child->next;
191
+ }
192
+ return elem;
193
+ }
194
+
195
+ xmlNodePtr new_stylesheet_skeleton(char *incl) {
196
+ struct printbuf *buf = printbuf_new();
197
+ sprintbuf(buf, "%s", "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"");
198
+ sprintbuf(buf, "%s", " xmlns:lib=\"http://parselets.com/stdlib\"");
199
+ sprintbuf(buf, "%s", " xmlns:parsley=\"http://parselets.com/json\"");
200
+ sprintbuf(buf, "%s", " xmlns:str=\"http://exslt.org/strings\"");
201
+ sprintbuf(buf, "%s", " xmlns:set=\"http://exslt.org/sets\"");
202
+ sprintbuf(buf, "%s", " xmlns:math=\"http://exslt.org/math\"");
203
+ sprintbuf(buf, "%s", " xmlns:func=\"http://exslt.org/functions\"");
204
+ sprintbuf(buf, "%s", " xmlns:user=\"http://parselets.com/usre\"");
205
+ sprintbuf(buf, "%s", " xmlns:dyn=\"http://exslt.org/dynamic\"");
206
+ sprintbuf(buf, "%s", " xmlns:date=\"http://exslt.org/dates-and-times\"");
207
+ sprintbuf(buf, "%s", " xmlns:exsl=\"http://exslt.org/common\"");
208
+ sprintbuf(buf, "%s", " xmlns:saxon=\"http://icl.com/saxon\"");
209
+ sprintbuf(buf, "%s", " xmlns:regexp=\"http://exslt.org/regular-expressions\"");
210
+ sprintbuf(buf, "%s", " xmlns:regex=\"http://exslt.org/regular-expressions\"");
211
+ sprintbuf(buf, "%s", " extension-element-prefixes=\"lib str math set func dyn exsl saxon user date regexp regex\"");
212
+ sprintbuf(buf, "%s", ">\n");
213
+ sprintbuf(buf, "%s", "<xsl:variable name=\"nbsp\">&#160;</xsl:variable>\n");
214
+ sprintbuf(buf, "%s", "<xsl:output method=\"xml\" indent=\"yes\"/>\n");
215
+ sprintbuf(buf, "%s", "<xsl:strip-space elements=\"*\"/>\n");
216
+ sprintbuf(buf, "%s", "<func:function name=\"lib:nl\"><xsl:param name=\"in\" select=\".\"/>");
217
+ sprintbuf(buf, "%s", "<xsl:variable name=\"out\"><xsl:apply-templates mode=\"innertext\" select=\"exsl:node-set($in)\"/></xsl:variable>");
218
+ sprintbuf(buf, "%s", "<func:result select=\"$out\" /></func:function>");
219
+ sprintbuf(buf, "%s", "<xsl:template match=\"text()\" mode=\"innertext\"><xsl:value-of select=\".\" /></xsl:template>");
220
+ sprintbuf(buf, "%s", "<xsl:template match=\"script|style\" mode=\"innertext\"/>");
221
+ sprintbuf(buf, "%s", "<xsl:template match=\"br|address|blockquote|center|dir|div|form|h1|h2|h3|h4|h5|h6|hr|menu|noframes|noscript|p|pre|li|td|th|p\" mode=\"innertext\"><xsl:apply-templates mode=\"innertext\" /><xsl:text>\n</xsl:text></xsl:template>");
222
+ sprintbuf(buf, "%s\n", incl);
223
+ sprintbuf(buf, "%s\n", "<xsl:template match=\"/\">\n");
224
+ sprintbuf(buf, "%s\n", "<parsley:root />\n");
225
+ sprintbuf(buf, "%s\n", "</xsl:template>\n");
226
+ sprintbuf(buf, "%s\n", "</xsl:stylesheet>\n");
227
+ xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
228
+ xmlDocPtr doc = xmlCtxtReadMemory(ctxt, buf->buf, buf->size, "http://parselets.com/compiled", NULL, 3);
229
+ xmlFreeParserCtxt(ctxt);
230
+ printbuf_free(buf);
231
+
232
+ xmlNodePtr node = xmlDocGetRootElement(doc);
233
+ while(_xmlLastElementChild(node) != NULL) {
234
+ node = _xmlLastElementChild(node);
235
+ }
236
+ return node;
237
+ }
@@ -0,0 +1,34 @@
1
+ #ifndef UTIL_H_INCLUDED
2
+ #define UTIL_H_INCLUDED
3
+
4
+ #include <stdio.h>
5
+ #include <json/json.h>
6
+ #include "parsed_xpath.h"
7
+ #include "parsley.h"
8
+ #include "parser.h"
9
+ #include "regexp.h"
10
+ #include <json/printbuf.h>
11
+ #include "functions.h"
12
+ #include <stdio.h>
13
+ #include <string.h>
14
+ #include <errno.h>
15
+ #include <stdbool.h>
16
+ #include <ctype.h>
17
+ #include <libexslt/exslt.h>
18
+ #include <libxml/xmlIO.h>
19
+
20
+ FILE* parsley_fopen(char*, char*);
21
+ xmlNodePtr new_stylesheet_skeleton(char *incl);
22
+ void registerEXSLT();
23
+ void printbuf_file_read(FILE *f, struct printbuf *buf);
24
+
25
+ int parsley_key_flags(char*);
26
+ char* parsley_key_tag(char*);
27
+ pxpathPtr parsley_key_filter(char*);
28
+ int parsley_io_get_mode();
29
+ void parsley_io_set_mode(int mode);
30
+ void _parsley_set_user_agent(char *agent);
31
+
32
+ xmlDocPtr parsley_apply_span_wrap(xmlDocPtr ptr);
33
+
34
+ #endif
@@ -0,0 +1,47 @@
1
+ #include "xml2json.h"
2
+
3
+ static struct json_object * _xml2json(xmlNodePtr xml) {
4
+ if(xml == NULL) return NULL;
5
+
6
+ xmlNodePtr child;
7
+ struct json_object * json = NULL;
8
+
9
+ switch(xml->type) {
10
+ case XML_ELEMENT_NODE:
11
+ child = xml->children;
12
+ if(xml->ns == NULL) {
13
+ child = xml;
14
+ // json_object_put(json);
15
+ json = json_object_new_object();
16
+ while(child != NULL) {
17
+ json_object_object_add(json, child->name, xml2json(child->children));
18
+ child = child->next;
19
+ }
20
+ } else if(!strcmp(xml->ns->prefix, "parsley")) {
21
+ if(!strcmp(xml->name, "groups")) {
22
+ // json_object_put(json);
23
+ json = json_object_new_array();
24
+ while(child != NULL) {
25
+ json_object_array_add(json, xml2json(child->children));
26
+ child = child->next;
27
+ }
28
+ } else if(!strcmp(xml->name, "group")) {
29
+ // Implicitly handled by parsley:groups handler
30
+ }
31
+ }
32
+ break;
33
+ case XML_TEXT_NODE:
34
+ json = json_object_new_string(xml->content);
35
+ break;
36
+ }
37
+ return json;
38
+ }
39
+
40
+ /**
41
+ * Handles a simplified xml
42
+ */
43
+ struct json_object * xml2json(xmlNodePtr xml) {
44
+ struct json_object * json = _xml2json(xml);
45
+ if(json == NULL) json = json_object_new_object();
46
+ return json;
47
+ }
@@ -0,0 +1,14 @@
1
+ #ifndef XML2JSON_H_INCLUDED
2
+ #define XML2JSON_H_INCLUDED
3
+
4
+ #include <string.h>
5
+ #include <stdbool.h>
6
+ #include <stdio.h>
7
+ #include <libxml/parser.h>
8
+ #include <libxml/tree.h>
9
+ #include <libxml/debugXML.h>
10
+ #include <json/json.h>
11
+
12
+ struct json_object * xml2json(xmlNodePtr);
13
+
14
+ #endif
@@ -0,0 +1,222 @@
1
+ /* A Bison parser, made by GNU Bison 2.3. */
2
+
3
+ /* Skeleton interface for Bison GLR parsers in C
4
+
5
+ Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
6
+
7
+ This program is free software; you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation; either version 2, or (at your option)
10
+ any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor,
20
+ Boston, MA 02110-1301, USA. */
21
+
22
+ /* As a special exception, you may create a larger work that contains
23
+ part or all of the Bison parser skeleton and distribute that work
24
+ under terms of your choice, so long as that work isn't itself a
25
+ parser generator using the skeleton or a modified version thereof
26
+ as a parser skeleton. Alternatively, if you modify or redistribute
27
+ the parser skeleton itself, you may (at your option) remove this
28
+ special exception, which will cause the skeleton and the resulting
29
+ Bison output files to be licensed under the GNU General Public
30
+ License without this special exception.
31
+
32
+ This special exception was added by the Free Software Foundation in
33
+ version 2.2 of Bison. */
34
+
35
+ /* Tokens. */
36
+ #ifndef YYTOKENTYPE
37
+ # define YYTOKENTYPE
38
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
39
+ know about them. */
40
+ enum yytokentype {
41
+ NUMBER = 258,
42
+ S = 259,
43
+ AT = 260,
44
+ LPAREN = 261,
45
+ RPAREN = 262,
46
+ PIPE = 263,
47
+ LT = 264,
48
+ SLASH = 265,
49
+ DBLSLASH = 266,
50
+ BANG = 267,
51
+ COLON = 268,
52
+ DBLCOLON = 269,
53
+ QUERY = 270,
54
+ HASH = 271,
55
+ COMMA = 272,
56
+ DOT = 273,
57
+ DBLDOT = 274,
58
+ GT = 275,
59
+ LBRA = 276,
60
+ RBRA = 277,
61
+ TILDE = 278,
62
+ SPLAT = 279,
63
+ PLUS = 280,
64
+ DASH = 281,
65
+ EQ = 282,
66
+ LTE = 283,
67
+ GTE = 284,
68
+ DOLLAR = 285,
69
+ BSLASHLIT = 286,
70
+ OTHER = 287,
71
+ XANCESTOR = 288,
72
+ XANCESTORSELF = 289,
73
+ XATTR = 290,
74
+ XCHILD = 291,
75
+ XDESC = 292,
76
+ XDESCSELF = 293,
77
+ XFOLLOW = 294,
78
+ XFOLLOWSIB = 295,
79
+ XNS = 296,
80
+ XPARENT = 297,
81
+ XPRE = 298,
82
+ XPRESIB = 299,
83
+ XSELF = 300,
84
+ XOR = 301,
85
+ XAND = 302,
86
+ XDIV = 303,
87
+ XMOD = 304,
88
+ XCOMMENT = 305,
89
+ XTEXT = 306,
90
+ XPI = 307,
91
+ XNODE = 308,
92
+ CXEQUATION = 309,
93
+ CXOPHE = 310,
94
+ CXOPNE = 311,
95
+ CXOPSTARTEQ = 312,
96
+ CXOPENDEQ = 313,
97
+ CXOPCONTAINS = 314,
98
+ CXOPCONTAINS2 = 315,
99
+ CXFIRST = 316,
100
+ CXLAST = 317,
101
+ CXNOT = 318,
102
+ CXEVEN = 319,
103
+ CXODD = 320,
104
+ CXEQ = 321,
105
+ CXGT = 322,
106
+ CXLT = 323,
107
+ CXHEADER = 324,
108
+ CXCONTAINS = 325,
109
+ CXEMPTY = 326,
110
+ CXHAS = 327,
111
+ CXPARENT = 328,
112
+ CXNTHCH = 329,
113
+ CXNTHLASTCH = 330,
114
+ CXNTHTYPE = 331,
115
+ CXNTHLASTTYPE = 332,
116
+ CXFIRSTCH = 333,
117
+ CXLASTCH = 334,
118
+ CXFIRSTTYPE = 335,
119
+ CXLASTTYPE = 336,
120
+ CXONLYCH = 337,
121
+ CXONLYTYPE = 338,
122
+ CXINPUT = 339,
123
+ CXTEXT = 340,
124
+ CXPASSWORD = 341,
125
+ CXRADIO = 342,
126
+ CXCHECKBOX = 343,
127
+ CXSUBMIT = 344,
128
+ CXIMAGE = 345,
129
+ CXRESET = 346,
130
+ CXBUTTON = 347,
131
+ CXFILE = 348,
132
+ CXENABLED = 349,
133
+ CXDISABLED = 350,
134
+ CXCHECKED = 351,
135
+ CXSELECTED = 352,
136
+ NAME = 353,
137
+ STRING = 354
138
+ };
139
+ #endif
140
+
141
+
142
+ /* Copy the first part of user declarations. */
143
+ #line 1 "parser.y"
144
+
145
+ #include <math.h>
146
+ #include <stdio.h>
147
+ #include <stdlib.h>
148
+ #include <string.h>
149
+ #include "parsed_xpath.h"
150
+ #include <libxml/hash.h>
151
+
152
+ #ifndef PARSER_Y_H_INCLUDED
153
+ #define PARSER_Y_H_INCLUDED
154
+
155
+ static pxpathPtr parsed_answer;
156
+
157
+ int yylex (void);
158
+ void yyerror (char const *);
159
+
160
+ void prepare_parse(char*);
161
+ void cleanup_parse(void);
162
+ void start_debugging(void);
163
+
164
+ static xmlHashTablePtr alias_hash;
165
+
166
+ char* xpath_alias(char*);
167
+ void init_xpath_alias();
168
+
169
+ int yyparse(void);
170
+ pxpathPtr myparse(char*);
171
+ void answer(pxpathPtr);
172
+
173
+ #define LIT_BIN_OP(A, B, C) pxpath_cat_literals(3, A, LIT(B), C)
174
+ #define BIN_OP(A, B, C) pxpath_cat_paths(3, A, OP(B), C)
175
+ #define PREP_OP(A, B) pxpath_cat_paths(2, OP(A), B)
176
+ #define PXP(A) pxpath_new_path(1, A)
177
+ #define LIT(A) pxpath_new_literal(1, A)
178
+ #define OP(A) pxpath_new_operator(1, A)
179
+ #define APPEND(A, S) pxpath_cat_paths(2, A, PXP(S));
180
+ #define PREPEND(A, S) pxpath_cat_paths(2, PXP(S), A);
181
+ #define PXPWRAP(A, B, C) pxpath_cat_paths(3, PXP(A), B, PXP(C))
182
+ #define P4E(A, B, C, D) pxpath_cat_paths(4, A, PXP(B), C, PXP(D))
183
+ #define P4O(A, B, C, D) pxpath_cat_paths(4, PXP(A), B, PXP(C), D)
184
+ #define P6E(A, B, C, D, E, F) pxpath_cat_paths(6, A, PXP(B), C, PXP(D), E, PXP(F));
185
+ #define INPUT_TYPE(A, S) APPEND(A, "[lower-case(name())='input' and lower-case(@type)='" #S "']")
186
+ #define TRACE(A, B) fprintf(stderr, "trace(%s): ", A); fprintf(stderr, "%s\n", pxpath_to_string(B));
187
+
188
+ #endif
189
+
190
+
191
+
192
+ #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
193
+ typedef union YYSTYPE
194
+ #line 53 "parser.y"
195
+ {
196
+ int empty;
197
+ char* string;
198
+ pxpathPtr node;
199
+ }
200
+ /* Line 2616 of glr.c. */
201
+ #line 202 "y.tab.h"
202
+ YYSTYPE;
203
+ # define YYSTYPE_IS_DECLARED 1
204
+ # define YYSTYPE_IS_TRIVIAL 1
205
+ #endif
206
+
207
+ #if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
208
+ typedef struct YYLTYPE
209
+ {
210
+
211
+ char yydummy;
212
+
213
+ } YYLTYPE;
214
+ # define YYLTYPE_IS_DECLARED 1
215
+ # define YYLTYPE_IS_TRIVIAL 1
216
+ #endif
217
+
218
+
219
+ extern YYSTYPE yylval;
220
+
221
+
222
+