gtl-parsley-ruby 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/README +32 -0
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/ext/cparsley.c +152 -0
- data/ext/extconf.rb +82 -0
- data/ext/parsley/.gitignore +32 -0
- data/ext/parsley/AUTHORS +1 -0
- data/ext/parsley/ChangeLog +0 -0
- data/ext/parsley/HACKING +4 -0
- data/ext/parsley/INSTALL +73 -0
- data/ext/parsley/INTRO +84 -0
- data/ext/parsley/Makefile.am +80 -0
- data/ext/parsley/Makefile.in +1009 -0
- data/ext/parsley/NEWS +0 -0
- data/ext/parsley/PAPER +36 -0
- data/ext/parsley/Portfile +18 -0
- data/ext/parsley/Portfile.in +17 -0
- data/ext/parsley/README.C-LANG +92 -0
- data/ext/parsley/README.markdown +1 -0
- data/ext/parsley/TODO +39 -0
- data/ext/parsley/VERSION +1 -0
- data/ext/parsley/aclocal.m4 +8918 -0
- data/ext/parsley/bootstrap.sh +6 -0
- data/ext/parsley/config.guess +1561 -0
- data/ext/parsley/config.sub +1686 -0
- data/ext/parsley/configure +13437 -0
- data/ext/parsley/configure.ac +46 -0
- data/ext/parsley/depcomp +630 -0
- data/ext/parsley/functions.c +368 -0
- data/ext/parsley/functions.h +19 -0
- data/ext/parsley/generate_bisect.sh +12 -0
- data/ext/parsley/hooks/prepare-commit-msg +16 -0
- data/ext/parsley/install-sh +520 -0
- data/ext/parsley/json-c-0.9/AUTHORS +2 -0
- data/ext/parsley/json-c-0.9/COPYING +19 -0
- data/ext/parsley/json-c-0.9/ChangeLog +103 -0
- data/ext/parsley/json-c-0.9/INSTALL +302 -0
- data/ext/parsley/json-c-0.9/Makefile.am +43 -0
- data/ext/parsley/json-c-0.9/Makefile.in +800 -0
- data/ext/parsley/json-c-0.9/NEWS +1 -0
- data/ext/parsley/json-c-0.9/README +20 -0
- data/ext/parsley/json-c-0.9/README-WIN32.html +57 -0
- data/ext/parsley/json-c-0.9/README.html +32 -0
- data/ext/parsley/json-c-0.9/aclocal.m4 +8909 -0
- data/ext/parsley/json-c-0.9/arraylist.c +94 -0
- data/ext/parsley/json-c-0.9/arraylist.h +53 -0
- data/ext/parsley/json-c-0.9/bits.h +27 -0
- data/ext/parsley/json-c-0.9/config.guess +1561 -0
- data/ext/parsley/json-c-0.9/config.h +125 -0
- data/ext/parsley/json-c-0.9/config.h.in +124 -0
- data/ext/parsley/json-c-0.9/config.h.win32 +94 -0
- data/ext/parsley/json-c-0.9/config.sub +1686 -0
- data/ext/parsley/json-c-0.9/configure +13084 -0
- data/ext/parsley/json-c-0.9/configure.in +33 -0
- data/ext/parsley/json-c-0.9/debug.c +98 -0
- data/ext/parsley/json-c-0.9/debug.h +50 -0
- data/ext/parsley/json-c-0.9/depcomp +630 -0
- data/ext/parsley/json-c-0.9/doc/html/annotated.html +40 -0
- data/ext/parsley/json-c-0.9/doc/html/arraylist_8h.html +240 -0
- data/ext/parsley/json-c-0.9/doc/html/bits_8h.html +150 -0
- data/ext/parsley/json-c-0.9/doc/html/classes.html +36 -0
- data/ext/parsley/json-c-0.9/doc/html/config_8h.html +612 -0
- data/ext/parsley/json-c-0.9/doc/html/debug_8h.html +392 -0
- data/ext/parsley/json-c-0.9/doc/html/doxygen.css +441 -0
- data/ext/parsley/json-c-0.9/doc/html/doxygen.png +0 -0
- data/ext/parsley/json-c-0.9/doc/html/files.html +42 -0
- data/ext/parsley/json-c-0.9/doc/html/functions.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/functions_vars.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/globals.html +459 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_defs.html +202 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_enum.html +50 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_eval.html +135 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_func.html +194 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_type.html +70 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_vars.html +50 -0
- data/ext/parsley/json-c-0.9/doc/html/index.html +25 -0
- data/ext/parsley/json-c-0.9/doc/html/json_8h.html +32 -0
- data/ext/parsley/json-c-0.9/doc/html/json__object_8h.html +1150 -0
- data/ext/parsley/json-c-0.9/doc/html/json__object__private_8h.html +75 -0
- data/ext/parsley/json-c-0.9/doc/html/json__tokener_8h.html +366 -0
- data/ext/parsley/json-c-0.9/doc/html/json__util_8h.html +106 -0
- data/ext/parsley/json-c-0.9/doc/html/linkhash_8h.html +740 -0
- data/ext/parsley/json-c-0.9/doc/html/printbuf_8h.html +214 -0
- data/ext/parsley/json-c-0.9/doc/html/structarray__list.html +104 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__object.html +141 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__object__iter.html +87 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__tokener.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__tokener__srec.html +104 -0
- data/ext/parsley/json-c-0.9/doc/html/structlh__entry.html +105 -0
- data/ext/parsley/json-c-0.9/doc/html/structlh__table.html +275 -0
- data/ext/parsley/json-c-0.9/doc/html/structprintbuf.html +87 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_b.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_l.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_r.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tabs.css +105 -0
- data/ext/parsley/json-c-0.9/doc/html/unionjson__object_1_1data.html +140 -0
- data/ext/parsley/json-c-0.9/install-sh +520 -0
- data/ext/parsley/json-c-0.9/json.h +31 -0
- data/ext/parsley/json-c-0.9/json.pc +11 -0
- data/ext/parsley/json-c-0.9/json.pc.in +11 -0
- data/ext/parsley/json-c-0.9/json_object.c +512 -0
- data/ext/parsley/json-c-0.9/json_object.h +319 -0
- data/ext/parsley/json-c-0.9/json_object_private.h +52 -0
- data/ext/parsley/json-c-0.9/json_tokener.c +628 -0
- data/ext/parsley/json-c-0.9/json_tokener.h +98 -0
- data/ext/parsley/json-c-0.9/json_util.c +122 -0
- data/ext/parsley/json-c-0.9/json_util.h +31 -0
- data/ext/parsley/json-c-0.9/libjson.la +41 -0
- data/ext/parsley/json-c-0.9/libtool +8890 -0
- data/ext/parsley/json-c-0.9/linkhash.c +216 -0
- data/ext/parsley/json-c-0.9/linkhash.h +272 -0
- data/ext/parsley/json-c-0.9/ltmain.sh +8406 -0
- data/ext/parsley/json-c-0.9/missing +376 -0
- data/ext/parsley/json-c-0.9/printbuf.c +149 -0
- data/ext/parsley/json-c-0.9/printbuf.h +64 -0
- data/ext/parsley/json-c-0.9/stamp-h1 +1 -0
- data/ext/parsley/json-c-0.9/test1 +130 -0
- data/ext/parsley/json-c-0.9/test1.c +164 -0
- data/ext/parsley/json-c-0.9/test2 +130 -0
- data/ext/parsley/json-c-0.9/test2.c +20 -0
- data/ext/parsley/json-c-0.9/test3 +130 -0
- data/ext/parsley/json-c-0.9/test3.c +23 -0
- data/ext/parsley/libtool +8890 -0
- data/ext/parsley/ltmain.sh +8406 -0
- data/ext/parsley/missing +376 -0
- data/ext/parsley/parsed_xpath.c +168 -0
- data/ext/parsley/parsed_xpath.h +34 -0
- data/ext/parsley/parser.y +631 -0
- data/ext/parsley/parsley.c +793 -0
- data/ext/parsley/parsley.h +87 -0
- data/ext/parsley/parsley_main.c +185 -0
- data/ext/parsley/parsleyc_main.c +108 -0
- data/ext/parsley/regexp.c +359 -0
- data/ext/parsley/regexp.h +36 -0
- data/ext/parsley/scanner.l +221 -0
- data/ext/parsley/test/ambiguous.html +207 -0
- data/ext/parsley/test/ambiguous.json +1 -0
- data/ext/parsley/test/ambiguous.let +6 -0
- data/ext/parsley/test/array-regression.html +5 -0
- data/ext/parsley/test/array-regression.json +1 -0
- data/ext/parsley/test/array-regression.let +10 -0
- data/ext/parsley/test/backslash.html +5 -0
- data/ext/parsley/test/backslash.json +1 -0
- data/ext/parsley/test/backslash.let +3 -0
- data/ext/parsley/test/bang.html +17 -0
- data/ext/parsley/test/bang.json +1 -0
- data/ext/parsley/test/bang.let +6 -0
- data/ext/parsley/test/collate_regression.html +324 -0
- data/ext/parsley/test/collate_regression.json +1 -0
- data/ext/parsley/test/collate_regression.let +9 -0
- data/ext/parsley/test/contains.html +3 -0
- data/ext/parsley/test/contains.json +1 -0
- data/ext/parsley/test/contains.let +3 -0
- data/ext/parsley/test/content.html +13 -0
- data/ext/parsley/test/content.json +1 -0
- data/ext/parsley/test/content.let +7 -0
- data/ext/parsley/test/cool.html +575 -0
- data/ext/parsley/test/cool.json +1 -0
- data/ext/parsley/test/cool.let +9 -0
- data/ext/parsley/test/craigs-simple.html +207 -0
- data/ext/parsley/test/craigs-simple.json +1 -0
- data/ext/parsley/test/craigs-simple.let +6 -0
- data/ext/parsley/test/craigs.html +207 -0
- data/ext/parsley/test/craigs.json +1 -0
- data/ext/parsley/test/craigs.let +9 -0
- data/ext/parsley/test/crash.html +157 -0
- data/ext/parsley/test/crash.json +1 -0
- data/ext/parsley/test/crash.let +1 -0
- data/ext/parsley/test/css_attr.html +3 -0
- data/ext/parsley/test/css_attr.json +1 -0
- data/ext/parsley/test/css_attr.let +3 -0
- data/ext/parsley/test/default-namespace.json +1 -0
- data/ext/parsley/test/default-namespace.let +3 -0
- data/ext/parsley/test/default-namespace.xml +1493 -0
- data/ext/parsley/test/div.html +8 -0
- data/ext/parsley/test/div.json +1 -0
- data/ext/parsley/test/div.let +10 -0
- data/ext/parsley/test/empty.html +3 -0
- data/ext/parsley/test/empty.json +1 -0
- data/ext/parsley/test/empty.let +1 -0
- data/ext/parsley/test/emptyish.html +207 -0
- data/ext/parsley/test/emptyish.let +3 -0
- data/ext/parsley/test/fictional-opt.html +43 -0
- data/ext/parsley/test/fictional-opt.json +1 -0
- data/ext/parsley/test/fictional-opt.let +14 -0
- data/ext/parsley/test/fictional.html +43 -0
- data/ext/parsley/test/fictional.json +1 -0
- data/ext/parsley/test/fictional.let +14 -0
- data/ext/parsley/test/function-magic.html +9 -0
- data/ext/parsley/test/function-magic.json +1 -0
- data/ext/parsley/test/function-magic.let +8 -0
- data/ext/parsley/test/hn.html +32 -0
- data/ext/parsley/test/hn.json +1 -0
- data/ext/parsley/test/hn.let +8 -0
- data/ext/parsley/test/malformed-array.html +2329 -0
- data/ext/parsley/test/malformed-array.json +1 -0
- data/ext/parsley/test/malformed-array.let +22 -0
- data/ext/parsley/test/malformed-expr.html +2329 -0
- data/ext/parsley/test/malformed-expr.json +1 -0
- data/ext/parsley/test/malformed-expr.let +16 -0
- data/ext/parsley/test/malformed-function.html +845 -0
- data/ext/parsley/test/malformed-function.json +197 -0
- data/ext/parsley/test/malformed-function.let +8 -0
- data/ext/parsley/test/malformed-json.html +2329 -0
- data/ext/parsley/test/malformed-json.json +1 -0
- data/ext/parsley/test/malformed-json.let +6 -0
- data/ext/parsley/test/malformed-xpath.html +8 -0
- data/ext/parsley/test/malformed-xpath.json +1 -0
- data/ext/parsley/test/malformed-xpath.let +7 -0
- data/ext/parsley/test/match.json +1 -0
- data/ext/parsley/test/match.let +9 -0
- data/ext/parsley/test/match.xml +11 -0
- data/ext/parsley/test/math_ambiguity.html +9 -0
- data/ext/parsley/test/math_ambiguity.json +1 -0
- data/ext/parsley/test/math_ambiguity.let +5 -0
- data/ext/parsley/test/nth-regression.html +13 -0
- data/ext/parsley/test/nth-regression.json +1 -0
- data/ext/parsley/test/nth-regression.let +3 -0
- data/ext/parsley/test/optional.html +2328 -0
- data/ext/parsley/test/optional.json +1 -0
- data/ext/parsley/test/optional.let +8 -0
- data/ext/parsley/test/outer-xml.html +6 -0
- data/ext/parsley/test/outer-xml.json +1 -0
- data/ext/parsley/test/outer-xml.let +5 -0
- data/ext/parsley/test/position.html +8 -0
- data/ext/parsley/test/position.json +1 -0
- data/ext/parsley/test/position.let +6 -0
- data/ext/parsley/test/question_regressions.html +443 -0
- data/ext/parsley/test/question_regressions.json +1 -0
- data/ext/parsley/test/question_regressions.let +6 -0
- data/ext/parsley/test/quote.json +1 -0
- data/ext/parsley/test/quote.let +8 -0
- data/ext/parsley/test/quote.xml +11 -0
- data/ext/parsley/test/reddit.html +1 -0
- data/ext/parsley/test/reddit.json +1 -0
- data/ext/parsley/test/reddit.let +12 -0
- data/ext/parsley/test/remote-fail.json +1 -0
- data/ext/parsley/test/remote.html +3 -0
- data/ext/parsley/test/remote.json +1 -0
- data/ext/parsley/test/remote.let +4 -0
- data/ext/parsley/test/replace.json +1 -0
- data/ext/parsley/test/replace.let +9 -0
- data/ext/parsley/test/replace.xml +11 -0
- data/ext/parsley/test/scope.html +10 -0
- data/ext/parsley/test/scope.json +1 -0
- data/ext/parsley/test/scope.let +6 -0
- data/ext/parsley/test/segfault.html +5 -0
- data/ext/parsley/test/segfault.json +1 -0
- data/ext/parsley/test/segfault.let +9 -0
- data/ext/parsley/test/sg-wrap.html +5 -0
- data/ext/parsley/test/sg-wrap.json +1 -0
- data/ext/parsley/test/sg-wrap.let +3 -0
- data/ext/parsley/test/sg_off.html +5 -0
- data/ext/parsley/test/sg_off.json +1 -0
- data/ext/parsley/test/sg_off.let +3 -0
- data/ext/parsley/test/test.json +1 -0
- data/ext/parsley/test/test.let +6 -0
- data/ext/parsley/test/test.xml +11 -0
- data/ext/parsley/test/trivial.html +2329 -0
- data/ext/parsley/test/trivial.json +1 -0
- data/ext/parsley/test/trivial.let +4 -0
- data/ext/parsley/test/trivial2.html +2329 -0
- data/ext/parsley/test/trivial2.json +1 -0
- data/ext/parsley/test/trivial2.let +7 -0
- data/ext/parsley/test/unbang.html +17 -0
- data/ext/parsley/test/unbang.json +1 -0
- data/ext/parsley/test/unbang.let +6 -0
- data/ext/parsley/test/unicode.html +3 -0
- data/ext/parsley/test/unicode.json +1 -0
- data/ext/parsley/test/unicode.let +1 -0
- data/ext/parsley/test/whitespace.html +8 -0
- data/ext/parsley/test/whitespace.json +1 -0
- data/ext/parsley/test/whitespace.let +3 -0
- data/ext/parsley/test/whitespace_regression.html +4 -0
- data/ext/parsley/test/whitespace_regression.json +1 -0
- data/ext/parsley/test/whitespace_regression.let +3 -0
- data/ext/parsley/test/yelp-benchmark.rb +53 -0
- data/ext/parsley/test/yelp-home.html +1004 -0
- data/ext/parsley/test/yelp-home.json +1 -0
- data/ext/parsley/test/yelp-home.let +6 -0
- data/ext/parsley/test/yelp.html +2329 -0
- data/ext/parsley/test/yelp.json +1 -0
- data/ext/parsley/test/yelp.let +12 -0
- data/ext/parsley/test/youtube.html +1940 -0
- data/ext/parsley/test/youtube.let +11 -0
- data/ext/parsley/util.c +237 -0
- data/ext/parsley/util.h +34 -0
- data/ext/parsley/xml2json.c +47 -0
- data/ext/parsley/xml2json.h +14 -0
- data/ext/parsley/y.tab.h +222 -0
- data/ext/parsley/ylwrap +222 -0
- data/lib/parsley.rb +84 -0
- data/test/test_parsley.rb +120 -0
- data/test/yelp-benchmark.rb +53 -0
- data/test/yelp-home.html +1004 -0
- data/test/yelp-home.let +6 -0
- data/test/yelp.html +2329 -0
- metadata +366 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
{
|
2
|
+
"video(.video-cell)": [ {
|
3
|
+
"thumbnail": ".vimg120 @src",
|
4
|
+
"title": ".video-short-title a",
|
5
|
+
"link": ".video-short-title a @href",
|
6
|
+
"posted": ".video-date-added",
|
7
|
+
"views": ".video-view-count",
|
8
|
+
"length": ".video-time span",
|
9
|
+
"rating": ".ratingVS @title"
|
10
|
+
} ]
|
11
|
+
}
|
data/ext/parsley/util.c
ADDED
@@ -0,0 +1,237 @@
|
|
1
|
+
#include "util.h"
|
2
|
+
|
3
|
+
static bool parsley_exslt_registered = false;
|
4
|
+
|
5
|
+
#define BUF 128
|
6
|
+
|
7
|
+
FILE* parsley_fopen(char* name, char* mode) {
|
8
|
+
FILE* fo;
|
9
|
+
if(!strcmp("-", name)) {
|
10
|
+
if(!strcmp("w", mode)) {
|
11
|
+
fo = stdout;
|
12
|
+
} else {
|
13
|
+
fo = stdin;
|
14
|
+
}
|
15
|
+
} else {
|
16
|
+
fo = fopen(name, mode);
|
17
|
+
}
|
18
|
+
if(fo == NULL) {
|
19
|
+
fprintf(stderr, "Cannot open file %s, error %d, %s\n", name, errno, strerror(errno));
|
20
|
+
exit(1);
|
21
|
+
}
|
22
|
+
return fo;
|
23
|
+
}
|
24
|
+
|
25
|
+
static int parsley_io_mode = 0;
|
26
|
+
static char *parsley_user_agent_header = NULL;
|
27
|
+
|
28
|
+
int
|
29
|
+
parsley_io_get_mode() {
|
30
|
+
return parsley_io_mode;
|
31
|
+
}
|
32
|
+
|
33
|
+
static xsltStylesheetPtr span_wrap_sheet = NULL;
|
34
|
+
|
35
|
+
xmlDocPtr
|
36
|
+
parsley_apply_span_wrap(xmlDocPtr doc) {
|
37
|
+
if(span_wrap_sheet == NULL) {
|
38
|
+
char * sheet = "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\" xmlns:sg=\"http://selectorgadget.com/\"> \
|
39
|
+
<xsl:template match=\"text()[(following-sibling::* or preceding-sibling::*) and normalize-space(.) != '']\"> \
|
40
|
+
<sg_wrap><xsl:value-of select=\".\" /></sg_wrap> \
|
41
|
+
</xsl:template> \
|
42
|
+
<xsl:template match=\"@*|node()\"> \
|
43
|
+
<xsl:copy> \
|
44
|
+
<xsl:apply-templates select=\"@*|node()\"/> \
|
45
|
+
</xsl:copy> \
|
46
|
+
</xsl:template> \
|
47
|
+
</xsl:stylesheet>";
|
48
|
+
|
49
|
+
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
|
50
|
+
xmlDocPtr xml = xmlCtxtReadMemory(ctxt, sheet, strlen(sheet), NULL, NULL, 0);
|
51
|
+
span_wrap_sheet = xsltParseStylesheetDoc(xml);
|
52
|
+
}
|
53
|
+
xsltTransformContextPtr ctxt = xsltNewTransformContext(span_wrap_sheet, doc);
|
54
|
+
xmlSetGenericErrorFunc(ctxt, parsleyXsltError);
|
55
|
+
xmlDocPtr out = xsltApplyStylesheetUser(span_wrap_sheet, doc, NULL, NULL, NULL, ctxt);
|
56
|
+
xsltFreeTransformContext(ctxt);
|
57
|
+
return out;
|
58
|
+
}
|
59
|
+
|
60
|
+
void
|
61
|
+
_parsley_set_user_agent(char * agent) {
|
62
|
+
if(parsley_user_agent_header != NULL) free(parsley_user_agent_header);
|
63
|
+
if(agent == NULL) {
|
64
|
+
parsley_user_agent_header = NULL;
|
65
|
+
} else {
|
66
|
+
asprintf(&parsley_user_agent_header, "User-Agent: %s\n", agent);
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
static void *
|
71
|
+
xmlUserAgentIOHTTPOpen(const char * file_name) {
|
72
|
+
return (void *)(xmlNanoHTTPMethod(file_name, NULL, NULL, NULL, parsley_user_agent_header, 0));
|
73
|
+
}
|
74
|
+
|
75
|
+
void
|
76
|
+
parsley_io_set_mode(int mode) {
|
77
|
+
if(mode == parsley_io_mode) return;
|
78
|
+
parsley_io_mode = mode;
|
79
|
+
|
80
|
+
xmlCleanupInputCallbacks();
|
81
|
+
|
82
|
+
if(parsley_io_mode & PARSLEY_OPTIONS_ALLOW_LOCAL) {
|
83
|
+
|
84
|
+
xmlRegisterInputCallbacks(xmlFileMatch, xmlFileOpen,
|
85
|
+
xmlFileRead, xmlFileClose);
|
86
|
+
#ifdef HAVE_ZLIB_H
|
87
|
+
xmlRegisterInputCallbacks(xmlGzfileMatch, xmlGzfileOpen,
|
88
|
+
xmlGzfileRead, xmlGzfileClose);
|
89
|
+
#endif /* HAVE_ZLIB_H */
|
90
|
+
}
|
91
|
+
if(parsley_io_mode & PARSLEY_OPTIONS_ALLOW_NET) {
|
92
|
+
#ifdef LIBXML_HTTP_ENABLED
|
93
|
+
xmlRegisterInputCallbacks(xmlIOHTTPMatch, xmlUserAgentIOHTTPOpen,
|
94
|
+
xmlIOHTTPRead, xmlIOHTTPClose);
|
95
|
+
#endif /* LIBXML_HTTP_ENABLED */
|
96
|
+
|
97
|
+
#ifdef LIBXML_FTP_ENABLED
|
98
|
+
xmlRegisterInputCallbacks(xmlIOFTPMatch, xmlIOFTPOpen,
|
99
|
+
xmlIOFTPRead, xmlIOFTPClose);
|
100
|
+
#endif /* LIBXML_FTP_ENABLED */
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
void
|
105
|
+
printbuf_file_read(FILE *f, struct printbuf *buf) {
|
106
|
+
char chars[BUF];
|
107
|
+
while(fgets(chars, BUF, f) != NULL){
|
108
|
+
sprintbuf(buf, "%s", chars);
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
void registerEXSLT() {
|
113
|
+
if(!parsley_exslt_registered) {
|
114
|
+
exsltRegisterAll();
|
115
|
+
parsley_register_all();
|
116
|
+
init_xpath_alias();
|
117
|
+
exslt_org_regular_expressions_init();
|
118
|
+
parsley_exslt_registered = true;
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
int parsley_key_flags(char* key) {
|
123
|
+
char* ptr = key;
|
124
|
+
char* last_alnum = key;
|
125
|
+
char* last_paren = key;
|
126
|
+
while(*ptr++ != '\0'){
|
127
|
+
if(isalnum(*ptr)) {
|
128
|
+
last_alnum = ptr;
|
129
|
+
} else if (*ptr == ')') {
|
130
|
+
last_paren = ptr;
|
131
|
+
}
|
132
|
+
}
|
133
|
+
ptr = (last_alnum > last_paren ? last_alnum : last_paren);
|
134
|
+
int flags = 0;
|
135
|
+
while(*ptr++ != '\0'){
|
136
|
+
switch(*ptr){
|
137
|
+
case '?':
|
138
|
+
flags |= PARSLEY_OPTIONAL;
|
139
|
+
break;
|
140
|
+
case '!':
|
141
|
+
flags |= PARSLEY_BANG;
|
142
|
+
break;
|
143
|
+
}
|
144
|
+
}
|
145
|
+
return flags;
|
146
|
+
}
|
147
|
+
|
148
|
+
char* parsley_key_tag(char* key) {
|
149
|
+
char *tag = strdup(key);
|
150
|
+
char *ptr = tag;
|
151
|
+
while(*ptr++ != '\0'){
|
152
|
+
if(!isalnum(*ptr) && *ptr != '_' && *ptr != '-') {
|
153
|
+
*ptr = 0;
|
154
|
+
return tag;
|
155
|
+
}
|
156
|
+
}
|
157
|
+
return tag;
|
158
|
+
}
|
159
|
+
|
160
|
+
pxpathPtr parsley_key_filter(char* key) {
|
161
|
+
char *expr = strdup(key);
|
162
|
+
char *ptr = expr;
|
163
|
+
char *orig = expr;
|
164
|
+
char *last_paren;
|
165
|
+
|
166
|
+
int offset = 0;
|
167
|
+
bool has_expr = false;
|
168
|
+
|
169
|
+
while(*ptr++ != '\0'){
|
170
|
+
if(!has_expr) offset++;
|
171
|
+
if(*ptr == '(') has_expr = true;
|
172
|
+
if(*ptr == ')') last_paren = ptr;
|
173
|
+
}
|
174
|
+
if(!has_expr) return NULL;
|
175
|
+
*last_paren = 0; // clip ")"
|
176
|
+
expr += offset + 1; // clip "("
|
177
|
+
|
178
|
+
pxpathPtr out = strlen(expr) == 0 ? NULL : myparse(expr);
|
179
|
+
free(orig);
|
180
|
+
// free(expr);
|
181
|
+
return out;
|
182
|
+
}
|
183
|
+
|
184
|
+
static xmlNodePtr
|
185
|
+
_xmlLastElementChild(xmlNodePtr node) {
|
186
|
+
xmlNodePtr child = node->children;
|
187
|
+
xmlNodePtr elem = NULL;
|
188
|
+
while(child != NULL) {
|
189
|
+
if(child->type == XML_ELEMENT_NODE) elem = child;
|
190
|
+
child = child->next;
|
191
|
+
}
|
192
|
+
return elem;
|
193
|
+
}
|
194
|
+
|
195
|
+
xmlNodePtr new_stylesheet_skeleton(char *incl) {
|
196
|
+
struct printbuf *buf = printbuf_new();
|
197
|
+
sprintbuf(buf, "%s", "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"");
|
198
|
+
sprintbuf(buf, "%s", " xmlns:lib=\"http://parselets.com/stdlib\"");
|
199
|
+
sprintbuf(buf, "%s", " xmlns:parsley=\"http://parselets.com/json\"");
|
200
|
+
sprintbuf(buf, "%s", " xmlns:str=\"http://exslt.org/strings\"");
|
201
|
+
sprintbuf(buf, "%s", " xmlns:set=\"http://exslt.org/sets\"");
|
202
|
+
sprintbuf(buf, "%s", " xmlns:math=\"http://exslt.org/math\"");
|
203
|
+
sprintbuf(buf, "%s", " xmlns:func=\"http://exslt.org/functions\"");
|
204
|
+
sprintbuf(buf, "%s", " xmlns:user=\"http://parselets.com/usre\"");
|
205
|
+
sprintbuf(buf, "%s", " xmlns:dyn=\"http://exslt.org/dynamic\"");
|
206
|
+
sprintbuf(buf, "%s", " xmlns:date=\"http://exslt.org/dates-and-times\"");
|
207
|
+
sprintbuf(buf, "%s", " xmlns:exsl=\"http://exslt.org/common\"");
|
208
|
+
sprintbuf(buf, "%s", " xmlns:saxon=\"http://icl.com/saxon\"");
|
209
|
+
sprintbuf(buf, "%s", " xmlns:regexp=\"http://exslt.org/regular-expressions\"");
|
210
|
+
sprintbuf(buf, "%s", " xmlns:regex=\"http://exslt.org/regular-expressions\"");
|
211
|
+
sprintbuf(buf, "%s", " extension-element-prefixes=\"lib str math set func dyn exsl saxon user date regexp regex\"");
|
212
|
+
sprintbuf(buf, "%s", ">\n");
|
213
|
+
sprintbuf(buf, "%s", "<xsl:variable name=\"nbsp\"> </xsl:variable>\n");
|
214
|
+
sprintbuf(buf, "%s", "<xsl:output method=\"xml\" indent=\"yes\"/>\n");
|
215
|
+
sprintbuf(buf, "%s", "<xsl:strip-space elements=\"*\"/>\n");
|
216
|
+
sprintbuf(buf, "%s", "<func:function name=\"lib:nl\"><xsl:param name=\"in\" select=\".\"/>");
|
217
|
+
sprintbuf(buf, "%s", "<xsl:variable name=\"out\"><xsl:apply-templates mode=\"innertext\" select=\"exsl:node-set($in)\"/></xsl:variable>");
|
218
|
+
sprintbuf(buf, "%s", "<func:result select=\"$out\" /></func:function>");
|
219
|
+
sprintbuf(buf, "%s", "<xsl:template match=\"text()\" mode=\"innertext\"><xsl:value-of select=\".\" /></xsl:template>");
|
220
|
+
sprintbuf(buf, "%s", "<xsl:template match=\"script|style\" mode=\"innertext\"/>");
|
221
|
+
sprintbuf(buf, "%s", "<xsl:template match=\"br|address|blockquote|center|dir|div|form|h1|h2|h3|h4|h5|h6|hr|menu|noframes|noscript|p|pre|li|td|th|p\" mode=\"innertext\"><xsl:apply-templates mode=\"innertext\" /><xsl:text>\n</xsl:text></xsl:template>");
|
222
|
+
sprintbuf(buf, "%s\n", incl);
|
223
|
+
sprintbuf(buf, "%s\n", "<xsl:template match=\"/\">\n");
|
224
|
+
sprintbuf(buf, "%s\n", "<parsley:root />\n");
|
225
|
+
sprintbuf(buf, "%s\n", "</xsl:template>\n");
|
226
|
+
sprintbuf(buf, "%s\n", "</xsl:stylesheet>\n");
|
227
|
+
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
|
228
|
+
xmlDocPtr doc = xmlCtxtReadMemory(ctxt, buf->buf, buf->size, "http://parselets.com/compiled", NULL, 3);
|
229
|
+
xmlFreeParserCtxt(ctxt);
|
230
|
+
printbuf_free(buf);
|
231
|
+
|
232
|
+
xmlNodePtr node = xmlDocGetRootElement(doc);
|
233
|
+
while(_xmlLastElementChild(node) != NULL) {
|
234
|
+
node = _xmlLastElementChild(node);
|
235
|
+
}
|
236
|
+
return node;
|
237
|
+
}
|
data/ext/parsley/util.h
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#ifndef UTIL_H_INCLUDED
|
2
|
+
#define UTIL_H_INCLUDED
|
3
|
+
|
4
|
+
#include <stdio.h>
|
5
|
+
#include <json/json.h>
|
6
|
+
#include "parsed_xpath.h"
|
7
|
+
#include "parsley.h"
|
8
|
+
#include "parser.h"
|
9
|
+
#include "regexp.h"
|
10
|
+
#include <json/printbuf.h>
|
11
|
+
#include "functions.h"
|
12
|
+
#include <stdio.h>
|
13
|
+
#include <string.h>
|
14
|
+
#include <errno.h>
|
15
|
+
#include <stdbool.h>
|
16
|
+
#include <ctype.h>
|
17
|
+
#include <libexslt/exslt.h>
|
18
|
+
#include <libxml/xmlIO.h>
|
19
|
+
|
20
|
+
FILE* parsley_fopen(char*, char*);
|
21
|
+
xmlNodePtr new_stylesheet_skeleton(char *incl);
|
22
|
+
void registerEXSLT();
|
23
|
+
void printbuf_file_read(FILE *f, struct printbuf *buf);
|
24
|
+
|
25
|
+
int parsley_key_flags(char*);
|
26
|
+
char* parsley_key_tag(char*);
|
27
|
+
pxpathPtr parsley_key_filter(char*);
|
28
|
+
int parsley_io_get_mode();
|
29
|
+
void parsley_io_set_mode(int mode);
|
30
|
+
void _parsley_set_user_agent(char *agent);
|
31
|
+
|
32
|
+
xmlDocPtr parsley_apply_span_wrap(xmlDocPtr ptr);
|
33
|
+
|
34
|
+
#endif
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#include "xml2json.h"
|
2
|
+
|
3
|
+
static struct json_object * _xml2json(xmlNodePtr xml) {
|
4
|
+
if(xml == NULL) return NULL;
|
5
|
+
|
6
|
+
xmlNodePtr child;
|
7
|
+
struct json_object * json = NULL;
|
8
|
+
|
9
|
+
switch(xml->type) {
|
10
|
+
case XML_ELEMENT_NODE:
|
11
|
+
child = xml->children;
|
12
|
+
if(xml->ns == NULL) {
|
13
|
+
child = xml;
|
14
|
+
// json_object_put(json);
|
15
|
+
json = json_object_new_object();
|
16
|
+
while(child != NULL) {
|
17
|
+
json_object_object_add(json, child->name, xml2json(child->children));
|
18
|
+
child = child->next;
|
19
|
+
}
|
20
|
+
} else if(!strcmp(xml->ns->prefix, "parsley")) {
|
21
|
+
if(!strcmp(xml->name, "groups")) {
|
22
|
+
// json_object_put(json);
|
23
|
+
json = json_object_new_array();
|
24
|
+
while(child != NULL) {
|
25
|
+
json_object_array_add(json, xml2json(child->children));
|
26
|
+
child = child->next;
|
27
|
+
}
|
28
|
+
} else if(!strcmp(xml->name, "group")) {
|
29
|
+
// Implicitly handled by parsley:groups handler
|
30
|
+
}
|
31
|
+
}
|
32
|
+
break;
|
33
|
+
case XML_TEXT_NODE:
|
34
|
+
json = json_object_new_string(xml->content);
|
35
|
+
break;
|
36
|
+
}
|
37
|
+
return json;
|
38
|
+
}
|
39
|
+
|
40
|
+
/**
|
41
|
+
* Handles a simplified xml
|
42
|
+
*/
|
43
|
+
struct json_object * xml2json(xmlNodePtr xml) {
|
44
|
+
struct json_object * json = _xml2json(xml);
|
45
|
+
if(json == NULL) json = json_object_new_object();
|
46
|
+
return json;
|
47
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#ifndef XML2JSON_H_INCLUDED
|
2
|
+
#define XML2JSON_H_INCLUDED
|
3
|
+
|
4
|
+
#include <string.h>
|
5
|
+
#include <stdbool.h>
|
6
|
+
#include <stdio.h>
|
7
|
+
#include <libxml/parser.h>
|
8
|
+
#include <libxml/tree.h>
|
9
|
+
#include <libxml/debugXML.h>
|
10
|
+
#include <json/json.h>
|
11
|
+
|
12
|
+
struct json_object * xml2json(xmlNodePtr);
|
13
|
+
|
14
|
+
#endif
|
data/ext/parsley/y.tab.h
ADDED
@@ -0,0 +1,222 @@
|
|
1
|
+
/* A Bison parser, made by GNU Bison 2.3. */
|
2
|
+
|
3
|
+
/* Skeleton interface for Bison GLR parsers in C
|
4
|
+
|
5
|
+
Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
|
6
|
+
|
7
|
+
This program is free software; you can redistribute it and/or modify
|
8
|
+
it under the terms of the GNU General Public License as published by
|
9
|
+
the Free Software Foundation; either version 2, or (at your option)
|
10
|
+
any later version.
|
11
|
+
|
12
|
+
This program is distributed in the hope that it will be useful,
|
13
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
GNU General Public License for more details.
|
16
|
+
|
17
|
+
You should have received a copy of the GNU General Public License
|
18
|
+
along with this program; if not, write to the Free Software
|
19
|
+
Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
20
|
+
Boston, MA 02110-1301, USA. */
|
21
|
+
|
22
|
+
/* As a special exception, you may create a larger work that contains
|
23
|
+
part or all of the Bison parser skeleton and distribute that work
|
24
|
+
under terms of your choice, so long as that work isn't itself a
|
25
|
+
parser generator using the skeleton or a modified version thereof
|
26
|
+
as a parser skeleton. Alternatively, if you modify or redistribute
|
27
|
+
the parser skeleton itself, you may (at your option) remove this
|
28
|
+
special exception, which will cause the skeleton and the resulting
|
29
|
+
Bison output files to be licensed under the GNU General Public
|
30
|
+
License without this special exception.
|
31
|
+
|
32
|
+
This special exception was added by the Free Software Foundation in
|
33
|
+
version 2.2 of Bison. */
|
34
|
+
|
35
|
+
/* Tokens. */
|
36
|
+
#ifndef YYTOKENTYPE
|
37
|
+
# define YYTOKENTYPE
|
38
|
+
/* Put the tokens into the symbol table, so that GDB and other debuggers
|
39
|
+
know about them. */
|
40
|
+
enum yytokentype {
|
41
|
+
NUMBER = 258,
|
42
|
+
S = 259,
|
43
|
+
AT = 260,
|
44
|
+
LPAREN = 261,
|
45
|
+
RPAREN = 262,
|
46
|
+
PIPE = 263,
|
47
|
+
LT = 264,
|
48
|
+
SLASH = 265,
|
49
|
+
DBLSLASH = 266,
|
50
|
+
BANG = 267,
|
51
|
+
COLON = 268,
|
52
|
+
DBLCOLON = 269,
|
53
|
+
QUERY = 270,
|
54
|
+
HASH = 271,
|
55
|
+
COMMA = 272,
|
56
|
+
DOT = 273,
|
57
|
+
DBLDOT = 274,
|
58
|
+
GT = 275,
|
59
|
+
LBRA = 276,
|
60
|
+
RBRA = 277,
|
61
|
+
TILDE = 278,
|
62
|
+
SPLAT = 279,
|
63
|
+
PLUS = 280,
|
64
|
+
DASH = 281,
|
65
|
+
EQ = 282,
|
66
|
+
LTE = 283,
|
67
|
+
GTE = 284,
|
68
|
+
DOLLAR = 285,
|
69
|
+
BSLASHLIT = 286,
|
70
|
+
OTHER = 287,
|
71
|
+
XANCESTOR = 288,
|
72
|
+
XANCESTORSELF = 289,
|
73
|
+
XATTR = 290,
|
74
|
+
XCHILD = 291,
|
75
|
+
XDESC = 292,
|
76
|
+
XDESCSELF = 293,
|
77
|
+
XFOLLOW = 294,
|
78
|
+
XFOLLOWSIB = 295,
|
79
|
+
XNS = 296,
|
80
|
+
XPARENT = 297,
|
81
|
+
XPRE = 298,
|
82
|
+
XPRESIB = 299,
|
83
|
+
XSELF = 300,
|
84
|
+
XOR = 301,
|
85
|
+
XAND = 302,
|
86
|
+
XDIV = 303,
|
87
|
+
XMOD = 304,
|
88
|
+
XCOMMENT = 305,
|
89
|
+
XTEXT = 306,
|
90
|
+
XPI = 307,
|
91
|
+
XNODE = 308,
|
92
|
+
CXEQUATION = 309,
|
93
|
+
CXOPHE = 310,
|
94
|
+
CXOPNE = 311,
|
95
|
+
CXOPSTARTEQ = 312,
|
96
|
+
CXOPENDEQ = 313,
|
97
|
+
CXOPCONTAINS = 314,
|
98
|
+
CXOPCONTAINS2 = 315,
|
99
|
+
CXFIRST = 316,
|
100
|
+
CXLAST = 317,
|
101
|
+
CXNOT = 318,
|
102
|
+
CXEVEN = 319,
|
103
|
+
CXODD = 320,
|
104
|
+
CXEQ = 321,
|
105
|
+
CXGT = 322,
|
106
|
+
CXLT = 323,
|
107
|
+
CXHEADER = 324,
|
108
|
+
CXCONTAINS = 325,
|
109
|
+
CXEMPTY = 326,
|
110
|
+
CXHAS = 327,
|
111
|
+
CXPARENT = 328,
|
112
|
+
CXNTHCH = 329,
|
113
|
+
CXNTHLASTCH = 330,
|
114
|
+
CXNTHTYPE = 331,
|
115
|
+
CXNTHLASTTYPE = 332,
|
116
|
+
CXFIRSTCH = 333,
|
117
|
+
CXLASTCH = 334,
|
118
|
+
CXFIRSTTYPE = 335,
|
119
|
+
CXLASTTYPE = 336,
|
120
|
+
CXONLYCH = 337,
|
121
|
+
CXONLYTYPE = 338,
|
122
|
+
CXINPUT = 339,
|
123
|
+
CXTEXT = 340,
|
124
|
+
CXPASSWORD = 341,
|
125
|
+
CXRADIO = 342,
|
126
|
+
CXCHECKBOX = 343,
|
127
|
+
CXSUBMIT = 344,
|
128
|
+
CXIMAGE = 345,
|
129
|
+
CXRESET = 346,
|
130
|
+
CXBUTTON = 347,
|
131
|
+
CXFILE = 348,
|
132
|
+
CXENABLED = 349,
|
133
|
+
CXDISABLED = 350,
|
134
|
+
CXCHECKED = 351,
|
135
|
+
CXSELECTED = 352,
|
136
|
+
NAME = 353,
|
137
|
+
STRING = 354
|
138
|
+
};
|
139
|
+
#endif
|
140
|
+
|
141
|
+
|
142
|
+
/* Copy the first part of user declarations. */
|
143
|
+
#line 1 "parser.y"
|
144
|
+
|
145
|
+
#include <math.h>
|
146
|
+
#include <stdio.h>
|
147
|
+
#include <stdlib.h>
|
148
|
+
#include <string.h>
|
149
|
+
#include "parsed_xpath.h"
|
150
|
+
#include <libxml/hash.h>
|
151
|
+
|
152
|
+
#ifndef PARSER_Y_H_INCLUDED
|
153
|
+
#define PARSER_Y_H_INCLUDED
|
154
|
+
|
155
|
+
static pxpathPtr parsed_answer;
|
156
|
+
|
157
|
+
int yylex (void);
|
158
|
+
void yyerror (char const *);
|
159
|
+
|
160
|
+
void prepare_parse(char*);
|
161
|
+
void cleanup_parse(void);
|
162
|
+
void start_debugging(void);
|
163
|
+
|
164
|
+
static xmlHashTablePtr alias_hash;
|
165
|
+
|
166
|
+
char* xpath_alias(char*);
|
167
|
+
void init_xpath_alias();
|
168
|
+
|
169
|
+
int yyparse(void);
|
170
|
+
pxpathPtr myparse(char*);
|
171
|
+
void answer(pxpathPtr);
|
172
|
+
|
173
|
+
#define LIT_BIN_OP(A, B, C) pxpath_cat_literals(3, A, LIT(B), C)
|
174
|
+
#define BIN_OP(A, B, C) pxpath_cat_paths(3, A, OP(B), C)
|
175
|
+
#define PREP_OP(A, B) pxpath_cat_paths(2, OP(A), B)
|
176
|
+
#define PXP(A) pxpath_new_path(1, A)
|
177
|
+
#define LIT(A) pxpath_new_literal(1, A)
|
178
|
+
#define OP(A) pxpath_new_operator(1, A)
|
179
|
+
#define APPEND(A, S) pxpath_cat_paths(2, A, PXP(S));
|
180
|
+
#define PREPEND(A, S) pxpath_cat_paths(2, PXP(S), A);
|
181
|
+
#define PXPWRAP(A, B, C) pxpath_cat_paths(3, PXP(A), B, PXP(C))
|
182
|
+
#define P4E(A, B, C, D) pxpath_cat_paths(4, A, PXP(B), C, PXP(D))
|
183
|
+
#define P4O(A, B, C, D) pxpath_cat_paths(4, PXP(A), B, PXP(C), D)
|
184
|
+
#define P6E(A, B, C, D, E, F) pxpath_cat_paths(6, A, PXP(B), C, PXP(D), E, PXP(F));
|
185
|
+
#define INPUT_TYPE(A, S) APPEND(A, "[lower-case(name())='input' and lower-case(@type)='" #S "']")
|
186
|
+
#define TRACE(A, B) fprintf(stderr, "trace(%s): ", A); fprintf(stderr, "%s\n", pxpath_to_string(B));
|
187
|
+
|
188
|
+
#endif
|
189
|
+
|
190
|
+
|
191
|
+
|
192
|
+
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
|
193
|
+
typedef union YYSTYPE
|
194
|
+
#line 53 "parser.y"
|
195
|
+
{
|
196
|
+
int empty;
|
197
|
+
char* string;
|
198
|
+
pxpathPtr node;
|
199
|
+
}
|
200
|
+
/* Line 2616 of glr.c. */
|
201
|
+
#line 202 "y.tab.h"
|
202
|
+
YYSTYPE;
|
203
|
+
# define YYSTYPE_IS_DECLARED 1
|
204
|
+
# define YYSTYPE_IS_TRIVIAL 1
|
205
|
+
#endif
|
206
|
+
|
207
|
+
#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
|
208
|
+
typedef struct YYLTYPE
|
209
|
+
{
|
210
|
+
|
211
|
+
char yydummy;
|
212
|
+
|
213
|
+
} YYLTYPE;
|
214
|
+
# define YYLTYPE_IS_DECLARED 1
|
215
|
+
# define YYLTYPE_IS_TRIVIAL 1
|
216
|
+
#endif
|
217
|
+
|
218
|
+
|
219
|
+
extern YYSTYPE yylval;
|
220
|
+
|
221
|
+
|
222
|
+
|