gtl-parsley-ruby 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/README +32 -0
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/ext/cparsley.c +152 -0
- data/ext/extconf.rb +82 -0
- data/ext/parsley/.gitignore +32 -0
- data/ext/parsley/AUTHORS +1 -0
- data/ext/parsley/ChangeLog +0 -0
- data/ext/parsley/HACKING +4 -0
- data/ext/parsley/INSTALL +73 -0
- data/ext/parsley/INTRO +84 -0
- data/ext/parsley/Makefile.am +80 -0
- data/ext/parsley/Makefile.in +1009 -0
- data/ext/parsley/NEWS +0 -0
- data/ext/parsley/PAPER +36 -0
- data/ext/parsley/Portfile +18 -0
- data/ext/parsley/Portfile.in +17 -0
- data/ext/parsley/README.C-LANG +92 -0
- data/ext/parsley/README.markdown +1 -0
- data/ext/parsley/TODO +39 -0
- data/ext/parsley/VERSION +1 -0
- data/ext/parsley/aclocal.m4 +8918 -0
- data/ext/parsley/bootstrap.sh +6 -0
- data/ext/parsley/config.guess +1561 -0
- data/ext/parsley/config.sub +1686 -0
- data/ext/parsley/configure +13437 -0
- data/ext/parsley/configure.ac +46 -0
- data/ext/parsley/depcomp +630 -0
- data/ext/parsley/functions.c +368 -0
- data/ext/parsley/functions.h +19 -0
- data/ext/parsley/generate_bisect.sh +12 -0
- data/ext/parsley/hooks/prepare-commit-msg +16 -0
- data/ext/parsley/install-sh +520 -0
- data/ext/parsley/json-c-0.9/AUTHORS +2 -0
- data/ext/parsley/json-c-0.9/COPYING +19 -0
- data/ext/parsley/json-c-0.9/ChangeLog +103 -0
- data/ext/parsley/json-c-0.9/INSTALL +302 -0
- data/ext/parsley/json-c-0.9/Makefile.am +43 -0
- data/ext/parsley/json-c-0.9/Makefile.in +800 -0
- data/ext/parsley/json-c-0.9/NEWS +1 -0
- data/ext/parsley/json-c-0.9/README +20 -0
- data/ext/parsley/json-c-0.9/README-WIN32.html +57 -0
- data/ext/parsley/json-c-0.9/README.html +32 -0
- data/ext/parsley/json-c-0.9/aclocal.m4 +8909 -0
- data/ext/parsley/json-c-0.9/arraylist.c +94 -0
- data/ext/parsley/json-c-0.9/arraylist.h +53 -0
- data/ext/parsley/json-c-0.9/bits.h +27 -0
- data/ext/parsley/json-c-0.9/config.guess +1561 -0
- data/ext/parsley/json-c-0.9/config.h +125 -0
- data/ext/parsley/json-c-0.9/config.h.in +124 -0
- data/ext/parsley/json-c-0.9/config.h.win32 +94 -0
- data/ext/parsley/json-c-0.9/config.sub +1686 -0
- data/ext/parsley/json-c-0.9/configure +13084 -0
- data/ext/parsley/json-c-0.9/configure.in +33 -0
- data/ext/parsley/json-c-0.9/debug.c +98 -0
- data/ext/parsley/json-c-0.9/debug.h +50 -0
- data/ext/parsley/json-c-0.9/depcomp +630 -0
- data/ext/parsley/json-c-0.9/doc/html/annotated.html +40 -0
- data/ext/parsley/json-c-0.9/doc/html/arraylist_8h.html +240 -0
- data/ext/parsley/json-c-0.9/doc/html/bits_8h.html +150 -0
- data/ext/parsley/json-c-0.9/doc/html/classes.html +36 -0
- data/ext/parsley/json-c-0.9/doc/html/config_8h.html +612 -0
- data/ext/parsley/json-c-0.9/doc/html/debug_8h.html +392 -0
- data/ext/parsley/json-c-0.9/doc/html/doxygen.css +441 -0
- data/ext/parsley/json-c-0.9/doc/html/doxygen.png +0 -0
- data/ext/parsley/json-c-0.9/doc/html/files.html +42 -0
- data/ext/parsley/json-c-0.9/doc/html/functions.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/functions_vars.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/globals.html +459 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_defs.html +202 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_enum.html +50 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_eval.html +135 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_func.html +194 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_type.html +70 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_vars.html +50 -0
- data/ext/parsley/json-c-0.9/doc/html/index.html +25 -0
- data/ext/parsley/json-c-0.9/doc/html/json_8h.html +32 -0
- data/ext/parsley/json-c-0.9/doc/html/json__object_8h.html +1150 -0
- data/ext/parsley/json-c-0.9/doc/html/json__object__private_8h.html +75 -0
- data/ext/parsley/json-c-0.9/doc/html/json__tokener_8h.html +366 -0
- data/ext/parsley/json-c-0.9/doc/html/json__util_8h.html +106 -0
- data/ext/parsley/json-c-0.9/doc/html/linkhash_8h.html +740 -0
- data/ext/parsley/json-c-0.9/doc/html/printbuf_8h.html +214 -0
- data/ext/parsley/json-c-0.9/doc/html/structarray__list.html +104 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__object.html +141 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__object__iter.html +87 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__tokener.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__tokener__srec.html +104 -0
- data/ext/parsley/json-c-0.9/doc/html/structlh__entry.html +105 -0
- data/ext/parsley/json-c-0.9/doc/html/structlh__table.html +275 -0
- data/ext/parsley/json-c-0.9/doc/html/structprintbuf.html +87 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_b.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_l.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_r.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tabs.css +105 -0
- data/ext/parsley/json-c-0.9/doc/html/unionjson__object_1_1data.html +140 -0
- data/ext/parsley/json-c-0.9/install-sh +520 -0
- data/ext/parsley/json-c-0.9/json.h +31 -0
- data/ext/parsley/json-c-0.9/json.pc +11 -0
- data/ext/parsley/json-c-0.9/json.pc.in +11 -0
- data/ext/parsley/json-c-0.9/json_object.c +512 -0
- data/ext/parsley/json-c-0.9/json_object.h +319 -0
- data/ext/parsley/json-c-0.9/json_object_private.h +52 -0
- data/ext/parsley/json-c-0.9/json_tokener.c +628 -0
- data/ext/parsley/json-c-0.9/json_tokener.h +98 -0
- data/ext/parsley/json-c-0.9/json_util.c +122 -0
- data/ext/parsley/json-c-0.9/json_util.h +31 -0
- data/ext/parsley/json-c-0.9/libjson.la +41 -0
- data/ext/parsley/json-c-0.9/libtool +8890 -0
- data/ext/parsley/json-c-0.9/linkhash.c +216 -0
- data/ext/parsley/json-c-0.9/linkhash.h +272 -0
- data/ext/parsley/json-c-0.9/ltmain.sh +8406 -0
- data/ext/parsley/json-c-0.9/missing +376 -0
- data/ext/parsley/json-c-0.9/printbuf.c +149 -0
- data/ext/parsley/json-c-0.9/printbuf.h +64 -0
- data/ext/parsley/json-c-0.9/stamp-h1 +1 -0
- data/ext/parsley/json-c-0.9/test1 +130 -0
- data/ext/parsley/json-c-0.9/test1.c +164 -0
- data/ext/parsley/json-c-0.9/test2 +130 -0
- data/ext/parsley/json-c-0.9/test2.c +20 -0
- data/ext/parsley/json-c-0.9/test3 +130 -0
- data/ext/parsley/json-c-0.9/test3.c +23 -0
- data/ext/parsley/libtool +8890 -0
- data/ext/parsley/ltmain.sh +8406 -0
- data/ext/parsley/missing +376 -0
- data/ext/parsley/parsed_xpath.c +168 -0
- data/ext/parsley/parsed_xpath.h +34 -0
- data/ext/parsley/parser.y +631 -0
- data/ext/parsley/parsley.c +793 -0
- data/ext/parsley/parsley.h +87 -0
- data/ext/parsley/parsley_main.c +185 -0
- data/ext/parsley/parsleyc_main.c +108 -0
- data/ext/parsley/regexp.c +359 -0
- data/ext/parsley/regexp.h +36 -0
- data/ext/parsley/scanner.l +221 -0
- data/ext/parsley/test/ambiguous.html +207 -0
- data/ext/parsley/test/ambiguous.json +1 -0
- data/ext/parsley/test/ambiguous.let +6 -0
- data/ext/parsley/test/array-regression.html +5 -0
- data/ext/parsley/test/array-regression.json +1 -0
- data/ext/parsley/test/array-regression.let +10 -0
- data/ext/parsley/test/backslash.html +5 -0
- data/ext/parsley/test/backslash.json +1 -0
- data/ext/parsley/test/backslash.let +3 -0
- data/ext/parsley/test/bang.html +17 -0
- data/ext/parsley/test/bang.json +1 -0
- data/ext/parsley/test/bang.let +6 -0
- data/ext/parsley/test/collate_regression.html +324 -0
- data/ext/parsley/test/collate_regression.json +1 -0
- data/ext/parsley/test/collate_regression.let +9 -0
- data/ext/parsley/test/contains.html +3 -0
- data/ext/parsley/test/contains.json +1 -0
- data/ext/parsley/test/contains.let +3 -0
- data/ext/parsley/test/content.html +13 -0
- data/ext/parsley/test/content.json +1 -0
- data/ext/parsley/test/content.let +7 -0
- data/ext/parsley/test/cool.html +575 -0
- data/ext/parsley/test/cool.json +1 -0
- data/ext/parsley/test/cool.let +9 -0
- data/ext/parsley/test/craigs-simple.html +207 -0
- data/ext/parsley/test/craigs-simple.json +1 -0
- data/ext/parsley/test/craigs-simple.let +6 -0
- data/ext/parsley/test/craigs.html +207 -0
- data/ext/parsley/test/craigs.json +1 -0
- data/ext/parsley/test/craigs.let +9 -0
- data/ext/parsley/test/crash.html +157 -0
- data/ext/parsley/test/crash.json +1 -0
- data/ext/parsley/test/crash.let +1 -0
- data/ext/parsley/test/css_attr.html +3 -0
- data/ext/parsley/test/css_attr.json +1 -0
- data/ext/parsley/test/css_attr.let +3 -0
- data/ext/parsley/test/default-namespace.json +1 -0
- data/ext/parsley/test/default-namespace.let +3 -0
- data/ext/parsley/test/default-namespace.xml +1493 -0
- data/ext/parsley/test/div.html +8 -0
- data/ext/parsley/test/div.json +1 -0
- data/ext/parsley/test/div.let +10 -0
- data/ext/parsley/test/empty.html +3 -0
- data/ext/parsley/test/empty.json +1 -0
- data/ext/parsley/test/empty.let +1 -0
- data/ext/parsley/test/emptyish.html +207 -0
- data/ext/parsley/test/emptyish.let +3 -0
- data/ext/parsley/test/fictional-opt.html +43 -0
- data/ext/parsley/test/fictional-opt.json +1 -0
- data/ext/parsley/test/fictional-opt.let +14 -0
- data/ext/parsley/test/fictional.html +43 -0
- data/ext/parsley/test/fictional.json +1 -0
- data/ext/parsley/test/fictional.let +14 -0
- data/ext/parsley/test/function-magic.html +9 -0
- data/ext/parsley/test/function-magic.json +1 -0
- data/ext/parsley/test/function-magic.let +8 -0
- data/ext/parsley/test/hn.html +32 -0
- data/ext/parsley/test/hn.json +1 -0
- data/ext/parsley/test/hn.let +8 -0
- data/ext/parsley/test/malformed-array.html +2329 -0
- data/ext/parsley/test/malformed-array.json +1 -0
- data/ext/parsley/test/malformed-array.let +22 -0
- data/ext/parsley/test/malformed-expr.html +2329 -0
- data/ext/parsley/test/malformed-expr.json +1 -0
- data/ext/parsley/test/malformed-expr.let +16 -0
- data/ext/parsley/test/malformed-function.html +845 -0
- data/ext/parsley/test/malformed-function.json +197 -0
- data/ext/parsley/test/malformed-function.let +8 -0
- data/ext/parsley/test/malformed-json.html +2329 -0
- data/ext/parsley/test/malformed-json.json +1 -0
- data/ext/parsley/test/malformed-json.let +6 -0
- data/ext/parsley/test/malformed-xpath.html +8 -0
- data/ext/parsley/test/malformed-xpath.json +1 -0
- data/ext/parsley/test/malformed-xpath.let +7 -0
- data/ext/parsley/test/match.json +1 -0
- data/ext/parsley/test/match.let +9 -0
- data/ext/parsley/test/match.xml +11 -0
- data/ext/parsley/test/math_ambiguity.html +9 -0
- data/ext/parsley/test/math_ambiguity.json +1 -0
- data/ext/parsley/test/math_ambiguity.let +5 -0
- data/ext/parsley/test/nth-regression.html +13 -0
- data/ext/parsley/test/nth-regression.json +1 -0
- data/ext/parsley/test/nth-regression.let +3 -0
- data/ext/parsley/test/optional.html +2328 -0
- data/ext/parsley/test/optional.json +1 -0
- data/ext/parsley/test/optional.let +8 -0
- data/ext/parsley/test/outer-xml.html +6 -0
- data/ext/parsley/test/outer-xml.json +1 -0
- data/ext/parsley/test/outer-xml.let +5 -0
- data/ext/parsley/test/position.html +8 -0
- data/ext/parsley/test/position.json +1 -0
- data/ext/parsley/test/position.let +6 -0
- data/ext/parsley/test/question_regressions.html +443 -0
- data/ext/parsley/test/question_regressions.json +1 -0
- data/ext/parsley/test/question_regressions.let +6 -0
- data/ext/parsley/test/quote.json +1 -0
- data/ext/parsley/test/quote.let +8 -0
- data/ext/parsley/test/quote.xml +11 -0
- data/ext/parsley/test/reddit.html +1 -0
- data/ext/parsley/test/reddit.json +1 -0
- data/ext/parsley/test/reddit.let +12 -0
- data/ext/parsley/test/remote-fail.json +1 -0
- data/ext/parsley/test/remote.html +3 -0
- data/ext/parsley/test/remote.json +1 -0
- data/ext/parsley/test/remote.let +4 -0
- data/ext/parsley/test/replace.json +1 -0
- data/ext/parsley/test/replace.let +9 -0
- data/ext/parsley/test/replace.xml +11 -0
- data/ext/parsley/test/scope.html +10 -0
- data/ext/parsley/test/scope.json +1 -0
- data/ext/parsley/test/scope.let +6 -0
- data/ext/parsley/test/segfault.html +5 -0
- data/ext/parsley/test/segfault.json +1 -0
- data/ext/parsley/test/segfault.let +9 -0
- data/ext/parsley/test/sg-wrap.html +5 -0
- data/ext/parsley/test/sg-wrap.json +1 -0
- data/ext/parsley/test/sg-wrap.let +3 -0
- data/ext/parsley/test/sg_off.html +5 -0
- data/ext/parsley/test/sg_off.json +1 -0
- data/ext/parsley/test/sg_off.let +3 -0
- data/ext/parsley/test/test.json +1 -0
- data/ext/parsley/test/test.let +6 -0
- data/ext/parsley/test/test.xml +11 -0
- data/ext/parsley/test/trivial.html +2329 -0
- data/ext/parsley/test/trivial.json +1 -0
- data/ext/parsley/test/trivial.let +4 -0
- data/ext/parsley/test/trivial2.html +2329 -0
- data/ext/parsley/test/trivial2.json +1 -0
- data/ext/parsley/test/trivial2.let +7 -0
- data/ext/parsley/test/unbang.html +17 -0
- data/ext/parsley/test/unbang.json +1 -0
- data/ext/parsley/test/unbang.let +6 -0
- data/ext/parsley/test/unicode.html +3 -0
- data/ext/parsley/test/unicode.json +1 -0
- data/ext/parsley/test/unicode.let +1 -0
- data/ext/parsley/test/whitespace.html +8 -0
- data/ext/parsley/test/whitespace.json +1 -0
- data/ext/parsley/test/whitespace.let +3 -0
- data/ext/parsley/test/whitespace_regression.html +4 -0
- data/ext/parsley/test/whitespace_regression.json +1 -0
- data/ext/parsley/test/whitespace_regression.let +3 -0
- data/ext/parsley/test/yelp-benchmark.rb +53 -0
- data/ext/parsley/test/yelp-home.html +1004 -0
- data/ext/parsley/test/yelp-home.json +1 -0
- data/ext/parsley/test/yelp-home.let +6 -0
- data/ext/parsley/test/yelp.html +2329 -0
- data/ext/parsley/test/yelp.json +1 -0
- data/ext/parsley/test/yelp.let +12 -0
- data/ext/parsley/test/youtube.html +1940 -0
- data/ext/parsley/test/youtube.let +11 -0
- data/ext/parsley/util.c +237 -0
- data/ext/parsley/util.h +34 -0
- data/ext/parsley/xml2json.c +47 -0
- data/ext/parsley/xml2json.h +14 -0
- data/ext/parsley/y.tab.h +222 -0
- data/ext/parsley/ylwrap +222 -0
- data/lib/parsley.rb +84 -0
- data/test/test_parsley.rb +120 -0
- data/test/yelp-benchmark.rb +53 -0
- data/test/yelp-home.html +1004 -0
- data/test/yelp-home.let +6 -0
- data/test/yelp.html +2329 -0
- metadata +366 -0
|
@@ -0,0 +1,793 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
#include <stdlib.h>
|
|
3
|
+
#include <argp.h>
|
|
4
|
+
#include <stdarg.h>
|
|
5
|
+
#include <json/json.h>
|
|
6
|
+
#include "parsley.h"
|
|
7
|
+
#include "y.tab.h"
|
|
8
|
+
#include <json/printbuf.h>
|
|
9
|
+
#include "functions.h"
|
|
10
|
+
#include "util.h"
|
|
11
|
+
#include <string.h>
|
|
12
|
+
#include <errno.h>
|
|
13
|
+
#include <ctype.h>
|
|
14
|
+
#include <stdbool.h>
|
|
15
|
+
#include <libxslt/xslt.h>
|
|
16
|
+
#include <libxslt/xsltInternals.h>
|
|
17
|
+
#include <libxslt/transform.h>
|
|
18
|
+
#include <libxml/tree.h>
|
|
19
|
+
#include <libxml/parser.h>
|
|
20
|
+
#include <libxml/debugXML.h>
|
|
21
|
+
#include <libxml/HTMLparser.h>
|
|
22
|
+
#include <libxml/HTMLtree.h>
|
|
23
|
+
#include <libxml/xmlwriter.h>
|
|
24
|
+
#include <libxml/xmlerror.h>
|
|
25
|
+
#include <libxml/debugXML.h>
|
|
26
|
+
#include <libexslt/exslt.h>
|
|
27
|
+
|
|
28
|
+
int yywrap(void){
|
|
29
|
+
return 1;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
struct ll {
|
|
33
|
+
xmlChar *name;
|
|
34
|
+
struct ll *next;
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
static char*
|
|
38
|
+
arepl(char* orig, char* old, char* new) {
|
|
39
|
+
// printf("y\n");
|
|
40
|
+
char* ptr = strdup(orig);
|
|
41
|
+
int nlen = strlen(new);
|
|
42
|
+
int olen = strlen(old);
|
|
43
|
+
char* i;
|
|
44
|
+
struct printbuf * buf = printbuf_new();
|
|
45
|
+
while((i = strstr(ptr, old)) != NULL) {
|
|
46
|
+
printbuf_memappend(buf, ptr, i - ptr);
|
|
47
|
+
printbuf_memappend(buf, new, nlen);
|
|
48
|
+
ptr = i + olen;
|
|
49
|
+
}
|
|
50
|
+
printbuf_memappend(buf, ptr, strlen(ptr));
|
|
51
|
+
ptr = strdup(buf->buf);
|
|
52
|
+
printbuf_free(buf);
|
|
53
|
+
return ptr;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
static char *
|
|
57
|
+
full_key_name(contextPtr c) {
|
|
58
|
+
if(c == NULL || c->parent == NULL) return strdup("/");
|
|
59
|
+
static struct ll * last = NULL;
|
|
60
|
+
while(c->parent != NULL) {
|
|
61
|
+
if(c->tag != NULL) {
|
|
62
|
+
struct ll * ptr = calloc(sizeof(struct ll), 1);
|
|
63
|
+
ptr->name = c->tag;
|
|
64
|
+
ptr->next = last;
|
|
65
|
+
last = ptr;
|
|
66
|
+
}
|
|
67
|
+
c = c->parent;
|
|
68
|
+
}
|
|
69
|
+
struct printbuf *buf = printbuf_new();
|
|
70
|
+
while(last != NULL) {
|
|
71
|
+
sprintbuf(buf, "/%s", last->name);
|
|
72
|
+
last = last->next;
|
|
73
|
+
}
|
|
74
|
+
char *out = strdup(buf->buf);
|
|
75
|
+
printbuf_free(buf);
|
|
76
|
+
return out;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
void parsed_parsley_free(parsedParsleyPtr ptr) {
|
|
80
|
+
if(ptr->xml != NULL) xmlFree(ptr->xml);
|
|
81
|
+
if(ptr->error != NULL) free(ptr->error);
|
|
82
|
+
free(ptr);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
static parsedParsleyPtr parse_error(char* format, ...) {
|
|
86
|
+
parsedParsleyPtr ptr = (parsedParsleyPtr) calloc(sizeof(parsed_parsley), 1);
|
|
87
|
+
ptr->xml = NULL;
|
|
88
|
+
va_list args;
|
|
89
|
+
va_start(args, format);
|
|
90
|
+
vasprintf(&ptr->error, format, args);
|
|
91
|
+
va_end(args);
|
|
92
|
+
return ptr;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
parsedParsleyPtr parsley_parse_file(parsleyPtr parsley, char* file, int flags) {
|
|
96
|
+
xmlSetGenericErrorFunc(NULL , parsleyXsltError);
|
|
97
|
+
bool html = flags & PARSLEY_OPTIONS_HTML;
|
|
98
|
+
char * encoding = flags & PARSLEY_OPTIONS_FORCE_UTF8 ? "UTF-8" : NULL;
|
|
99
|
+
if(html) {
|
|
100
|
+
htmlParserCtxtPtr htmlCtxt = htmlNewParserCtxt();
|
|
101
|
+
htmlDocPtr html = htmlCtxtReadFile(htmlCtxt, file, encoding, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
|
|
102
|
+
htmlFreeParserCtxt(htmlCtxt);
|
|
103
|
+
if(html == NULL) return parse_error("Couldn't parse file: %s\n", file);
|
|
104
|
+
parsedParsleyPtr out = parsley_parse_doc(parsley, html, flags);
|
|
105
|
+
xmlFreeDoc(html);
|
|
106
|
+
return out;
|
|
107
|
+
} else {
|
|
108
|
+
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
|
|
109
|
+
xmlDocPtr xml = xmlCtxtReadFile(ctxt, file, encoding, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
|
|
110
|
+
xmlFreeParserCtxt(ctxt);
|
|
111
|
+
if(xml == NULL) return parse_error("Couldn't parse file: %s\n", file);
|
|
112
|
+
parsedParsleyPtr out = parsley_parse_doc(parsley, xml, flags);
|
|
113
|
+
xmlFreeDoc(xml);
|
|
114
|
+
return out;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
parsedParsleyPtr parsley_parse_string(parsleyPtr parsley, char* string, size_t size, char* base_uri, int flags) {
|
|
119
|
+
xmlSetGenericErrorFunc(NULL , parsleyXsltError);
|
|
120
|
+
bool html = flags & PARSLEY_OPTIONS_HTML;
|
|
121
|
+
char * encoding = flags & PARSLEY_OPTIONS_FORCE_UTF8 ? "UTF-8" : NULL;
|
|
122
|
+
if(base_uri == NULL) base_uri = "http://parselets.com/in-memory-string";
|
|
123
|
+
if(html) {
|
|
124
|
+
htmlParserCtxtPtr htmlCtxt = htmlNewParserCtxt();
|
|
125
|
+
htmlDocPtr html = htmlCtxtReadMemory(htmlCtxt, string, size, base_uri, encoding, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
|
|
126
|
+
if(html == NULL) return parse_error("Couldn't parse string");
|
|
127
|
+
parsedParsleyPtr out = parsley_parse_doc(parsley, html, flags);
|
|
128
|
+
xmlFreeDoc(html);
|
|
129
|
+
return out;
|
|
130
|
+
} else {
|
|
131
|
+
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
|
|
132
|
+
xmlDocPtr xml = xmlCtxtReadMemory(ctxt, string, size, base_uri, encoding, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
|
|
133
|
+
if(xml == NULL) return parse_error("Couldn't parse string");
|
|
134
|
+
parsedParsleyPtr out = parsley_parse_doc(parsley, xml, flags);
|
|
135
|
+
xmlFreeDoc(xml);
|
|
136
|
+
return out;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
static char *
|
|
141
|
+
xpath_of(xmlNodePtr node) {
|
|
142
|
+
if(node == NULL || node->name == NULL || node->parent == NULL) return strdup("/");
|
|
143
|
+
|
|
144
|
+
struct ll * ptr = (struct ll *) calloc(sizeof(struct ll), 1);
|
|
145
|
+
|
|
146
|
+
while(node->name != NULL && node->parent != NULL) {
|
|
147
|
+
if(node->ns == NULL) {
|
|
148
|
+
struct ll * tmp = (struct ll *) calloc(sizeof(struct ll), 1);
|
|
149
|
+
tmp->name = node->name;
|
|
150
|
+
tmp->next = ptr;
|
|
151
|
+
ptr = tmp;
|
|
152
|
+
}
|
|
153
|
+
node = node->parent;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
struct printbuf *buf = printbuf_new();
|
|
157
|
+
sprintbuf(buf, "");
|
|
158
|
+
while(ptr->name != NULL) {
|
|
159
|
+
sprintbuf(buf, "/%s", ptr->name);
|
|
160
|
+
struct ll * last = ptr;
|
|
161
|
+
ptr = ptr->next;
|
|
162
|
+
free(last);
|
|
163
|
+
}
|
|
164
|
+
free(ptr);
|
|
165
|
+
|
|
166
|
+
char *str = strdup(strlen(buf->buf) ? buf->buf : "/");
|
|
167
|
+
printbuf_free(buf);
|
|
168
|
+
return str;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
static bool
|
|
172
|
+
is_root(xmlNodePtr node) {
|
|
173
|
+
return node != NULL && node->ns != NULL && !strcmp(node->ns->prefix, "parsley") && !strcmp(node->name, "root");
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
int compare_pos (const void * a, const void * b)
|
|
177
|
+
{
|
|
178
|
+
char* as = xmlGetProp(*(xmlNodePtr*)a, "position");
|
|
179
|
+
char* bs = xmlGetProp(*(xmlNodePtr*)b, "position");
|
|
180
|
+
return atoi(as) - atoi(bs);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
static void
|
|
184
|
+
_xmlAddChild(xmlNodePtr parent, xmlNodePtr child) {
|
|
185
|
+
xmlNodePtr node = parent->children;
|
|
186
|
+
if(node == NULL) {
|
|
187
|
+
parent->children = child;
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
while(node->next != NULL){
|
|
191
|
+
node = node->next;
|
|
192
|
+
}
|
|
193
|
+
node->next = child;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
static int
|
|
197
|
+
_xmlChildElementCount(xmlNodePtr n) {
|
|
198
|
+
xmlNodePtr child = n->children;
|
|
199
|
+
int i = 0;
|
|
200
|
+
while(child != NULL) {
|
|
201
|
+
i++;
|
|
202
|
+
child = child->next;
|
|
203
|
+
}
|
|
204
|
+
return i;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
static bool
|
|
208
|
+
xml_empty(xmlNodePtr xml) {
|
|
209
|
+
// fprintf(stderr, "%s\n", xml->name);
|
|
210
|
+
xmlNodePtr child = xml->children;
|
|
211
|
+
while(child != NULL) {
|
|
212
|
+
if(child->type != XML_TEXT_NODE) return false;
|
|
213
|
+
if(strlen(child->content)) return false;
|
|
214
|
+
child = child->next;
|
|
215
|
+
}
|
|
216
|
+
// printf("hello!\n");
|
|
217
|
+
return true;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
static void
|
|
221
|
+
collate(xmlNodePtr xml) {
|
|
222
|
+
if(xml == NULL) return ;
|
|
223
|
+
if(xml->type != XML_ELEMENT_NODE) return;
|
|
224
|
+
if(xml->ns != NULL && !strcmp(xml->ns->prefix, "parsley") && !strcmp(xml->name, "zipped")){
|
|
225
|
+
xmlNodePtr parent = xml->parent;
|
|
226
|
+
xmlNodePtr child = xml->children;
|
|
227
|
+
if(child == NULL) return;
|
|
228
|
+
int n = _xmlChildElementCount(xml);
|
|
229
|
+
|
|
230
|
+
xmlNodePtr* name_nodes = calloc(n, sizeof(xmlNodePtr));
|
|
231
|
+
xmlNodePtr* lists = calloc(n, sizeof(xmlNodePtr));
|
|
232
|
+
bool* empty = calloc(n, sizeof(bool));
|
|
233
|
+
bool* multi = calloc(n, sizeof(bool));
|
|
234
|
+
bool* optional = calloc(n, sizeof(bool));
|
|
235
|
+
|
|
236
|
+
int len = 0;
|
|
237
|
+
for(int i = 0; i < n; i++) {
|
|
238
|
+
name_nodes[i] = child;
|
|
239
|
+
if(child->children == NULL) {
|
|
240
|
+
lists[i] = NULL;
|
|
241
|
+
} else {
|
|
242
|
+
lists[i] = child->children;
|
|
243
|
+
multi[i] = false;
|
|
244
|
+
optional[i] = xmlGetProp(name_nodes[i], "optional") != NULL;
|
|
245
|
+
if(lists[i] != NULL && !strcmp(lists[i]->name, "groups")) {
|
|
246
|
+
lists[i] = lists[i]->children;
|
|
247
|
+
multi[i] = true;
|
|
248
|
+
}
|
|
249
|
+
if(lists[i] != NULL) {
|
|
250
|
+
lists[i]->parent->extra = i;
|
|
251
|
+
len += _xmlChildElementCount(lists[i]->parent);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
child->children = NULL;
|
|
255
|
+
}
|
|
256
|
+
child = child->next;
|
|
257
|
+
}
|
|
258
|
+
xml->children = NULL;
|
|
259
|
+
|
|
260
|
+
xmlNodePtr* sortable = malloc(len * sizeof(xmlNodePtr));
|
|
261
|
+
int j = 0;
|
|
262
|
+
|
|
263
|
+
for(int i = 0; i < n; i++) {
|
|
264
|
+
xmlNodePtr node = lists[i];
|
|
265
|
+
while(node != NULL){
|
|
266
|
+
sortable[j++] = node;
|
|
267
|
+
// printf("%d/%d: %d/%d\n", i, n, j, len);
|
|
268
|
+
node = node->next;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
for(int i = 0; i < len; i++) {
|
|
273
|
+
sortable[i]->next = NULL;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
qsort(sortable, len, sizeof(xmlNodePtr), compare_pos);
|
|
277
|
+
|
|
278
|
+
xmlNodePtr groups = xml->parent;
|
|
279
|
+
groups->children = NULL;
|
|
280
|
+
xmlNodePtr group;
|
|
281
|
+
xmlNodePtr* targets = calloc(sizeof(xmlNodePtr), n);
|
|
282
|
+
|
|
283
|
+
for(j = 0; j < len; j++) {
|
|
284
|
+
int i = sortable[j]->parent->extra;
|
|
285
|
+
if (j == 0 || (!empty[i] && !multi[i] && !optional[i])) { // first or full
|
|
286
|
+
xmlNodePtr group = xmlNewChild(groups, xml->ns, "group", NULL); //new group
|
|
287
|
+
xmlSetProp(group, "optional", "true");
|
|
288
|
+
for(int k = 0; k < n; k++) {
|
|
289
|
+
empty[k] = true;
|
|
290
|
+
targets[k] = xmlCopyNode(name_nodes[k], 2);
|
|
291
|
+
_xmlAddChild(group, targets[k]);
|
|
292
|
+
if(multi[k]) targets[k] = xmlNewChild(targets[k], xml->ns, "groups", NULL);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if(!multi[i]) sortable[j] = sortable[j]->children;
|
|
297
|
+
if(empty[i] || multi[i]) _xmlAddChild(targets[i], sortable[j]);
|
|
298
|
+
empty[i] = false;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
free(targets);
|
|
302
|
+
free(name_nodes);
|
|
303
|
+
free(lists);
|
|
304
|
+
free(optional);
|
|
305
|
+
free(empty);
|
|
306
|
+
free(multi);
|
|
307
|
+
free(sortable);
|
|
308
|
+
|
|
309
|
+
collate(groups);
|
|
310
|
+
} else {
|
|
311
|
+
xmlNodePtr child = xml->children;
|
|
312
|
+
while(child != NULL){
|
|
313
|
+
collate(child);
|
|
314
|
+
child = child->next;
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
void
|
|
320
|
+
parsley_set_user_agent(char const * agent) {
|
|
321
|
+
_parsley_set_user_agent(agent);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
static void
|
|
325
|
+
unlink(xmlNodePtr parent, xmlNodePtr child) {
|
|
326
|
+
if(child == NULL || parent == NULL) return;
|
|
327
|
+
xmlNodePtr ptr = parent->children;
|
|
328
|
+
|
|
329
|
+
if(ptr == child) {
|
|
330
|
+
parent->children = child->next;
|
|
331
|
+
if(child->next != NULL) {
|
|
332
|
+
child->next->prev = NULL;
|
|
333
|
+
}
|
|
334
|
+
} else {
|
|
335
|
+
while(ptr != NULL) {
|
|
336
|
+
if(ptr->next == child) {
|
|
337
|
+
ptr->next = child->next;
|
|
338
|
+
if(child->next) child->next->prev = ptr;
|
|
339
|
+
}
|
|
340
|
+
ptr = ptr->next;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
child->next = NULL;
|
|
344
|
+
child->prev = NULL;
|
|
345
|
+
child->parent = NULL;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
static void
|
|
349
|
+
visit(parsedParsleyPtr ptr, xmlNodePtr xml, char* err);
|
|
350
|
+
|
|
351
|
+
static void
|
|
352
|
+
prune(parsedParsleyPtr ptr, xmlNodePtr xml, char* err) {
|
|
353
|
+
if(xml == NULL || is_root(xml)) return;
|
|
354
|
+
bool optional = xmlGetProp(xml, "optional") != NULL;
|
|
355
|
+
if(optional) {
|
|
356
|
+
xmlNodePtr parent = xml->parent;
|
|
357
|
+
unlink(parent, xml);
|
|
358
|
+
free(err);
|
|
359
|
+
err = NULL;
|
|
360
|
+
visit(ptr, parent, err);
|
|
361
|
+
return;
|
|
362
|
+
} else {
|
|
363
|
+
if(err == NULL) asprintf(&err, "%s was empty", xpath_of(xml));
|
|
364
|
+
if(!is_root(xml->parent)) {
|
|
365
|
+
// fprintf(stderr, "prune up: %s\n", xml->parent->name);
|
|
366
|
+
prune(ptr, xml->parent, err);
|
|
367
|
+
} else {
|
|
368
|
+
// fprintf(stderr, "error out\n");
|
|
369
|
+
ptr->error = err;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
static void
|
|
375
|
+
visit(parsedParsleyPtr ptr, xmlNodePtr xml, char* err) {
|
|
376
|
+
if(xml == NULL) return;
|
|
377
|
+
// printf("trying to visit: %s\n", xml->name);
|
|
378
|
+
if(xml->type != XML_ELEMENT_NODE) return;
|
|
379
|
+
xmlNodePtr child = xml->children;
|
|
380
|
+
xmlNodePtr parent = xml->parent;
|
|
381
|
+
if(parent == NULL) return;
|
|
382
|
+
|
|
383
|
+
// printf("passed guard clause: %s\n", xml->name);
|
|
384
|
+
|
|
385
|
+
if(xml_empty(xml)) {
|
|
386
|
+
if(err == NULL) asprintf(&err, "%s was empty", xpath_of(xml));
|
|
387
|
+
|
|
388
|
+
prune(ptr, xml, err);
|
|
389
|
+
} else if(err != NULL) {
|
|
390
|
+
free(err);
|
|
391
|
+
}
|
|
392
|
+
while(err == NULL && child != NULL){
|
|
393
|
+
child->parent = xml;
|
|
394
|
+
visit(ptr, child, err);
|
|
395
|
+
child = child->next;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
static parsedParsleyPtr current_ptr = NULL;
|
|
400
|
+
|
|
401
|
+
void
|
|
402
|
+
parsleyXsltError(void * ctx, const char * msg, ...) {
|
|
403
|
+
if(current_ptr == NULL) return;
|
|
404
|
+
va_list ap;
|
|
405
|
+
va_start(ap, msg);
|
|
406
|
+
if(current_ptr->error == NULL) {
|
|
407
|
+
char *tmp;
|
|
408
|
+
char *tmp2;
|
|
409
|
+
vasprintf(&tmp, msg, ap);
|
|
410
|
+
tmp2 = arepl(tmp, "xmlXPathCompOpEval: ", "");
|
|
411
|
+
current_ptr->error = arepl(tmp2, "\n", "");
|
|
412
|
+
|
|
413
|
+
free(tmp);
|
|
414
|
+
free(tmp2);
|
|
415
|
+
}
|
|
416
|
+
va_end(ap);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
static bool
|
|
420
|
+
hasDefaultNS(xmlDocPtr doc) {
|
|
421
|
+
return xmlSearchNs(doc, doc->children, NULL) != NULL;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
static void
|
|
425
|
+
_killDefaultNS(xmlNodePtr node) {
|
|
426
|
+
if(node == NULL) return;
|
|
427
|
+
|
|
428
|
+
xmlNsPtr ns = node->nsDef;
|
|
429
|
+
if(ns != NULL) {
|
|
430
|
+
if(ns->prefix == NULL) node->nsDef = ns->next;
|
|
431
|
+
xmlNsPtr prev = ns;
|
|
432
|
+
while(ns = ns->next) {
|
|
433
|
+
if(ns->prefix == NULL) prev->next = ns->next;
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
ns = node->ns;
|
|
438
|
+
if(ns != NULL) {
|
|
439
|
+
if(ns->prefix == NULL) node->ns = ns->next;
|
|
440
|
+
xmlNsPtr prev = ns;
|
|
441
|
+
while(ns = ns->next) {
|
|
442
|
+
if(ns->prefix == NULL) prev->next = ns->next;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
_killDefaultNS(node->children);
|
|
447
|
+
_killDefaultNS(node->next);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
void
|
|
451
|
+
killDefaultNS(xmlDocPtr doc) {
|
|
452
|
+
if(hasDefaultNS(doc)) {
|
|
453
|
+
_killDefaultNS(doc->children);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
parsedParsleyPtr parsley_parse_doc(parsleyPtr parsley, xmlDocPtr doc, int flags) {
|
|
458
|
+
killDefaultNS(doc);
|
|
459
|
+
|
|
460
|
+
parsedParsleyPtr ptr = (parsedParsleyPtr) calloc(sizeof(parsed_parsley), 1);
|
|
461
|
+
ptr->error = NULL;
|
|
462
|
+
ptr->parsley = parsley;
|
|
463
|
+
|
|
464
|
+
parsley_io_set_mode(flags);
|
|
465
|
+
xsltTransformContextPtr ctxt = xsltNewTransformContext(parsley->stylesheet, doc);
|
|
466
|
+
xmlSetGenericErrorFunc(ctxt, parsleyXsltError);
|
|
467
|
+
current_ptr = ptr;
|
|
468
|
+
|
|
469
|
+
if(flags & PARSLEY_OPTIONS_SGWRAP) {
|
|
470
|
+
doc = parsley_apply_span_wrap(doc);
|
|
471
|
+
}
|
|
472
|
+
ptr->xml = xsltApplyStylesheetUser(parsley->stylesheet, doc, NULL, NULL, NULL, ctxt);
|
|
473
|
+
xsltFreeTransformContext(ctxt);
|
|
474
|
+
current_ptr = NULL;
|
|
475
|
+
|
|
476
|
+
if(ptr->error == NULL) {
|
|
477
|
+
if(ptr->xml != NULL && ptr->error == NULL) {
|
|
478
|
+
if(flags & PARSLEY_OPTIONS_COLLATE) collate(ptr->xml->children);
|
|
479
|
+
if(flags & PARSLEY_OPTIONS_PRUNE) visit(ptr, ptr->xml->children, NULL);
|
|
480
|
+
}
|
|
481
|
+
if(ptr->xml == NULL && ptr->error == NULL) { // == NULL
|
|
482
|
+
ptr->error = strdup("Internal runtime error");
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
return ptr;
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
static bool
|
|
489
|
+
json_invalid_object(parsleyPtr ptr, struct json_object *json) {
|
|
490
|
+
json_object_object_foreach(json, key, val) {
|
|
491
|
+
if(val==NULL) ptr->error = strdup("Parselets can only be made up of strings, arrays, and objects.");
|
|
492
|
+
|
|
493
|
+
switch(json_object_get_type(val)) {
|
|
494
|
+
case json_type_string:
|
|
495
|
+
break;
|
|
496
|
+
case json_type_array:
|
|
497
|
+
if(json_object_array_length(val) != 1) {
|
|
498
|
+
ptr->error = strdup("Parselet arrays should have length 1.");
|
|
499
|
+
return true;
|
|
500
|
+
}
|
|
501
|
+
struct json_object * inner = json_object_array_get_idx(val, 0);
|
|
502
|
+
switch(json_object_get_type(inner)) {
|
|
503
|
+
case json_type_string:
|
|
504
|
+
break;
|
|
505
|
+
case json_type_object:
|
|
506
|
+
if(json_invalid_object(ptr, inner)) return true;
|
|
507
|
+
break;
|
|
508
|
+
default:
|
|
509
|
+
ptr->error = strdup("Arrays may contain either a single string or an object.");
|
|
510
|
+
return true;
|
|
511
|
+
}
|
|
512
|
+
break;
|
|
513
|
+
case json_type_object:
|
|
514
|
+
if(json_invalid_object(ptr, val)) {
|
|
515
|
+
return true;
|
|
516
|
+
}
|
|
517
|
+
break;
|
|
518
|
+
default:
|
|
519
|
+
ptr->error = strdup("Parselets can only be made up of strings, arrays, and objects.");
|
|
520
|
+
}
|
|
521
|
+
if(val == NULL || !json_object_is_type(val, json_type_string)) return false;
|
|
522
|
+
}
|
|
523
|
+
return false;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
static bool
|
|
527
|
+
json_invalid(parsleyPtr ptr, struct json_object *json) {
|
|
528
|
+
if(!json_object_is_type(json, json_type_object)) {
|
|
529
|
+
ptr->error = strdup("The parselet root must be an object");
|
|
530
|
+
return true;
|
|
531
|
+
}
|
|
532
|
+
return json_invalid_object(ptr, json);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
static void free_context(contextPtr c) {
|
|
536
|
+
if(c == NULL) return;
|
|
537
|
+
if(c->tag != NULL) free(c->tag);
|
|
538
|
+
if(c->filter != NULL) pxpath_free(c->filter);
|
|
539
|
+
if(c->expr != NULL) pxpath_free(c->expr);
|
|
540
|
+
|
|
541
|
+
if(c->parent != NULL && c->parent->child != NULL) {
|
|
542
|
+
if(c->parent->child == c) {
|
|
543
|
+
c->parent->child = NULL;
|
|
544
|
+
} else {
|
|
545
|
+
contextPtr ptr = c->parent->child;
|
|
546
|
+
while(ptr->next != NULL) {
|
|
547
|
+
if(ptr->next == c) {
|
|
548
|
+
ptr->next = NULL;
|
|
549
|
+
} else {
|
|
550
|
+
ptr = ptr->next;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
if(c->next != NULL) free_context(c->next);
|
|
556
|
+
if(c->child != NULL) free_context(c->child);
|
|
557
|
+
free(c);
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
static contextPtr
|
|
561
|
+
new_context(struct json_object * json, xmlNodePtr node) {
|
|
562
|
+
contextPtr c = calloc(sizeof(parsley_context), 1);
|
|
563
|
+
c->node = node;
|
|
564
|
+
c->ns = node->ns;
|
|
565
|
+
c->tag = strdup("root");
|
|
566
|
+
c->expr = pxpath_new_path(1, "/");
|
|
567
|
+
c->json = json;
|
|
568
|
+
return c;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
parsleyPtr parsley_compile(char* parsley_str, char* incl) {
|
|
572
|
+
parsleyPtr parsley = (parsleyPtr) calloc(sizeof(compiled_parsley), 1);
|
|
573
|
+
|
|
574
|
+
if(last_parsley_error != NULL) {
|
|
575
|
+
free(last_parsley_error);
|
|
576
|
+
last_parsley_error = NULL;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
registerEXSLT();
|
|
580
|
+
|
|
581
|
+
// struct json_tokener *tok = json_tokener_new();
|
|
582
|
+
// struct json_object *json = json_tokener_parse_ex(tok, parsley_str);
|
|
583
|
+
//
|
|
584
|
+
struct json_tokener *tok = json_tokener_new();
|
|
585
|
+
struct json_object *json = json_tokener_parse_ex(tok, parsley_str, -1);
|
|
586
|
+
int error_offset = tok->char_offset;
|
|
587
|
+
if(tok->err != json_tokener_success)
|
|
588
|
+
json = error_ptr(-tok->err);
|
|
589
|
+
json_tokener_free(tok);
|
|
590
|
+
|
|
591
|
+
if(is_error(json)) {
|
|
592
|
+
asprintf(&(parsley->error), "Your parselet is not valid json: %s at char:%d", json_tokener_errors[-(unsigned long) json], error_offset);
|
|
593
|
+
return parsley;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
if(json_invalid(parsley, json)) {
|
|
597
|
+
// fprintf(stderr, "Invalid parselet structure: %s\n", parsley->error);
|
|
598
|
+
return parsley;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
xmlNodePtr node = new_stylesheet_skeleton(incl);
|
|
602
|
+
|
|
603
|
+
contextPtr context = new_context(json, node);
|
|
604
|
+
__parsley_recurse(context);
|
|
605
|
+
|
|
606
|
+
json_object_put(json); // frees json
|
|
607
|
+
parsley->error = last_parsley_error;
|
|
608
|
+
|
|
609
|
+
if(parsley->error == NULL) {
|
|
610
|
+
parsley->stylesheet = xsltParseStylesheetDoc(node->doc);
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
free_context(context);
|
|
614
|
+
return parsley;
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
contextPtr deeper_context(contextPtr context, char* key, struct json_object * val) {
|
|
618
|
+
contextPtr c = (contextPtr) calloc(sizeof(parsley_context), 1);
|
|
619
|
+
c->node = context->node;
|
|
620
|
+
c->ns = context->ns;
|
|
621
|
+
c->parent = context;
|
|
622
|
+
c->tag = parsley_key_tag(key);
|
|
623
|
+
c->flags = parsley_key_flags(key);
|
|
624
|
+
parsley_parsing_context = c;
|
|
625
|
+
c->array = val != NULL && json_object_is_type(val, json_type_array);
|
|
626
|
+
c->json = c->array ? json_object_array_get_idx(val, 0) : val;
|
|
627
|
+
c->string = val != NULL && json_object_is_type(c->json, json_type_string);
|
|
628
|
+
c->filter = parsley_key_filter(key);
|
|
629
|
+
c->magic = context->array && context->filter == NULL;
|
|
630
|
+
c->expr = c->string ? myparse(json_object_get_string(c->json)) : NULL;
|
|
631
|
+
if(context->child == NULL) {
|
|
632
|
+
context->child = c;
|
|
633
|
+
} else {
|
|
634
|
+
contextPtr tmp = context->child;
|
|
635
|
+
while(tmp->next != NULL) tmp = tmp->next;
|
|
636
|
+
tmp->next = c;
|
|
637
|
+
}
|
|
638
|
+
// printf(stderr, "json: %s\ntag: %s\nexpr: %s\nfilter: %s\n\n", json_object_get_string(c->json), c->tag, pxpath_to_string(c->expr), pxpath_to_string(c->filter));
|
|
639
|
+
return c;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
void parsley_free(parsleyPtr ptr) {
|
|
643
|
+
if(ptr->error != NULL)
|
|
644
|
+
free(ptr->error);
|
|
645
|
+
if(ptr->stylesheet != NULL)
|
|
646
|
+
xsltFreeStylesheet(ptr->stylesheet);
|
|
647
|
+
free(ptr);
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
void yyerror(const char * s) {
|
|
651
|
+
struct printbuf *buf = printbuf_new();
|
|
652
|
+
if(last_parsley_error !=NULL) sprintbuf(buf, "%s\n", last_parsley_error);
|
|
653
|
+
sprintbuf(buf, "%s in key: %s", s, full_key_name(parsley_parsing_context));
|
|
654
|
+
last_parsley_error = strdup(buf->buf);
|
|
655
|
+
printbuf_free(buf);
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
static bool
|
|
659
|
+
all_strings(struct json_object * json) {
|
|
660
|
+
json_object_object_foreach(json, key, val) {
|
|
661
|
+
if(val == NULL || !json_object_is_type(val, json_type_string)) return false;
|
|
662
|
+
}
|
|
663
|
+
return true;
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
static char *
|
|
667
|
+
inner_path(pxpathPtr p) {
|
|
668
|
+
if(p == NULL) return NULL;
|
|
669
|
+
if(p->type == PXPATH_PATH) return pxpath_to_string(p);
|
|
670
|
+
char *tmp = NULL;
|
|
671
|
+
pxpathPtr ptr = p->child;
|
|
672
|
+
while(ptr != NULL) {
|
|
673
|
+
tmp = inner_path(p->child);
|
|
674
|
+
if(tmp != NULL) return tmp;
|
|
675
|
+
ptr = ptr->next;
|
|
676
|
+
}
|
|
677
|
+
return NULL;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
//TODO: tree-based replace, instead of naive string-based
|
|
681
|
+
static char *
|
|
682
|
+
inner_path_to_dot(pxpathPtr p) {
|
|
683
|
+
char *outer = pxpath_to_string(p);
|
|
684
|
+
char *inner = inner_path(p);
|
|
685
|
+
char *repl = NULL;
|
|
686
|
+
if(inner != NULL) {
|
|
687
|
+
repl = arepl(outer, inner, ".");
|
|
688
|
+
free(inner);
|
|
689
|
+
}
|
|
690
|
+
free(outer);
|
|
691
|
+
return repl;
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
static bool
|
|
695
|
+
inner_path_transform(contextPtr c) {
|
|
696
|
+
return c->filter == NULL && c->expr != NULL && inner_path(c->expr) != NULL;
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
static char *
|
|
700
|
+
resolve_filter(contextPtr c) {
|
|
701
|
+
return inner_path_transform(c) ? inner_path(c->expr) : pxpath_to_string(c->filter);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
static char *
|
|
705
|
+
resolve_expr(contextPtr c) {
|
|
706
|
+
return inner_path_transform(c) ? inner_path_to_dot(c->expr) : pxpath_to_string(c->expr);
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
static void
|
|
710
|
+
render(contextPtr c) {
|
|
711
|
+
char *filter = resolve_filter(c);
|
|
712
|
+
char *expr = resolve_expr(c);
|
|
713
|
+
char *scope = filter == NULL ? expr : filter;
|
|
714
|
+
bool magic_children = c->array && filter == NULL;
|
|
715
|
+
bool simple_array = c->array && filter != NULL;
|
|
716
|
+
bool filtered = filter != NULL;
|
|
717
|
+
bool multiple = (c->array || c->magic) && !magic_children;
|
|
718
|
+
|
|
719
|
+
// printf("node %s\n", c->node->name);
|
|
720
|
+
xmlNsPtr parsley = c->ns;
|
|
721
|
+
xmlNsPtr xsl = xmlDocGetRootElement(c->node->doc)->ns;
|
|
722
|
+
|
|
723
|
+
if(c->array) c->node = xmlNewChild(c->node, parsley, "groups", NULL);
|
|
724
|
+
if(filtered) {
|
|
725
|
+
c->node = xmlNewChild(c->node, xsl, "for-each", NULL);
|
|
726
|
+
xmlSetProp(c->node, "select", filter);
|
|
727
|
+
}
|
|
728
|
+
if(filtered && !multiple) {
|
|
729
|
+
c->node = xmlNewChild(c->node, xsl, "if", NULL);
|
|
730
|
+
xmlSetProp(c->node, "test", "position() = 1");
|
|
731
|
+
}
|
|
732
|
+
if(multiple) {
|
|
733
|
+
c->node = xmlNewChild(c->node, parsley, "group", NULL);
|
|
734
|
+
if (!(c->flags & PARSLEY_BANG)) xmlSetProp(c->node, "optional", "true");
|
|
735
|
+
}
|
|
736
|
+
xmlNodePtr attr = xmlNewChild(c->node, xsl, "attribute", NULL);
|
|
737
|
+
xmlSetProp(attr, "name", "position");
|
|
738
|
+
xmlNodePtr counter = xmlNewChild(attr, xsl, "value-of", NULL);
|
|
739
|
+
xmlSetProp(counter, "select", "count(preceding::*) + count(ancestor::*)");
|
|
740
|
+
|
|
741
|
+
if(c->string) {
|
|
742
|
+
c->node = xmlNewChild(c->node, xsl, "value-of", NULL);
|
|
743
|
+
xmlSetProp(c->node, "select", expr);
|
|
744
|
+
} else {
|
|
745
|
+
if(magic_children) c->node = xmlNewChild(c->node, parsley, "zipped", NULL);
|
|
746
|
+
__parsley_recurse(c);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
if(filter !=NULL) free(filter);
|
|
750
|
+
if(expr !=NULL) free(expr);
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
void __parsley_recurse(contextPtr context) {
|
|
754
|
+
contextPtr c;
|
|
755
|
+
if(context->json == NULL) return;
|
|
756
|
+
// printf(stderr, "<%s> %s\n", context->tag, context->node->name);
|
|
757
|
+
json_object_object_foreach(context->json, key, val) {
|
|
758
|
+
c = deeper_context(context, key, val);
|
|
759
|
+
// printf(stderr, "<%s>\n", c->tag);
|
|
760
|
+
c->node = xmlAddChild(c->node, xmlNewNode(NULL, c->tag));
|
|
761
|
+
if (c->flags & PARSLEY_OPTIONAL) xmlSetProp(c->node, "optional", "true");
|
|
762
|
+
render(c);
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
// static char* full_expr(contextPtr context, char* expr) {
|
|
768
|
+
// if(expr == NULL) return context->full_expr;
|
|
769
|
+
// char* merged = arepl(expr, ".", context->full_expr);
|
|
770
|
+
// return arepl(merged, "///", "//");
|
|
771
|
+
// }
|
|
772
|
+
static char*
|
|
773
|
+
inner_key_each(struct json_object * json);
|
|
774
|
+
|
|
775
|
+
static char* inner_key_of(struct json_object * json) {
|
|
776
|
+
switch(json_object_get_type(json)) {
|
|
777
|
+
case json_type_string:
|
|
778
|
+
return json_object_get_string(json);
|
|
779
|
+
case json_type_array:
|
|
780
|
+
return NULL;
|
|
781
|
+
case json_type_object:
|
|
782
|
+
return inner_key_each(json);
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
static char*
|
|
787
|
+
inner_key_each(struct json_object * json) {
|
|
788
|
+
json_object_object_foreach(json, key, val) {
|
|
789
|
+
char* inner = inner_key_of(val);
|
|
790
|
+
if(inner != NULL) return inner;
|
|
791
|
+
}
|
|
792
|
+
return NULL;
|
|
793
|
+
}
|