gtl-parsley-ruby 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/README +32 -0
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/ext/cparsley.c +152 -0
- data/ext/extconf.rb +82 -0
- data/ext/parsley/.gitignore +32 -0
- data/ext/parsley/AUTHORS +1 -0
- data/ext/parsley/ChangeLog +0 -0
- data/ext/parsley/HACKING +4 -0
- data/ext/parsley/INSTALL +73 -0
- data/ext/parsley/INTRO +84 -0
- data/ext/parsley/Makefile.am +80 -0
- data/ext/parsley/Makefile.in +1009 -0
- data/ext/parsley/NEWS +0 -0
- data/ext/parsley/PAPER +36 -0
- data/ext/parsley/Portfile +18 -0
- data/ext/parsley/Portfile.in +17 -0
- data/ext/parsley/README.C-LANG +92 -0
- data/ext/parsley/README.markdown +1 -0
- data/ext/parsley/TODO +39 -0
- data/ext/parsley/VERSION +1 -0
- data/ext/parsley/aclocal.m4 +8918 -0
- data/ext/parsley/bootstrap.sh +6 -0
- data/ext/parsley/config.guess +1561 -0
- data/ext/parsley/config.sub +1686 -0
- data/ext/parsley/configure +13437 -0
- data/ext/parsley/configure.ac +46 -0
- data/ext/parsley/depcomp +630 -0
- data/ext/parsley/functions.c +368 -0
- data/ext/parsley/functions.h +19 -0
- data/ext/parsley/generate_bisect.sh +12 -0
- data/ext/parsley/hooks/prepare-commit-msg +16 -0
- data/ext/parsley/install-sh +520 -0
- data/ext/parsley/json-c-0.9/AUTHORS +2 -0
- data/ext/parsley/json-c-0.9/COPYING +19 -0
- data/ext/parsley/json-c-0.9/ChangeLog +103 -0
- data/ext/parsley/json-c-0.9/INSTALL +302 -0
- data/ext/parsley/json-c-0.9/Makefile.am +43 -0
- data/ext/parsley/json-c-0.9/Makefile.in +800 -0
- data/ext/parsley/json-c-0.9/NEWS +1 -0
- data/ext/parsley/json-c-0.9/README +20 -0
- data/ext/parsley/json-c-0.9/README-WIN32.html +57 -0
- data/ext/parsley/json-c-0.9/README.html +32 -0
- data/ext/parsley/json-c-0.9/aclocal.m4 +8909 -0
- data/ext/parsley/json-c-0.9/arraylist.c +94 -0
- data/ext/parsley/json-c-0.9/arraylist.h +53 -0
- data/ext/parsley/json-c-0.9/bits.h +27 -0
- data/ext/parsley/json-c-0.9/config.guess +1561 -0
- data/ext/parsley/json-c-0.9/config.h +125 -0
- data/ext/parsley/json-c-0.9/config.h.in +124 -0
- data/ext/parsley/json-c-0.9/config.h.win32 +94 -0
- data/ext/parsley/json-c-0.9/config.sub +1686 -0
- data/ext/parsley/json-c-0.9/configure +13084 -0
- data/ext/parsley/json-c-0.9/configure.in +33 -0
- data/ext/parsley/json-c-0.9/debug.c +98 -0
- data/ext/parsley/json-c-0.9/debug.h +50 -0
- data/ext/parsley/json-c-0.9/depcomp +630 -0
- data/ext/parsley/json-c-0.9/doc/html/annotated.html +40 -0
- data/ext/parsley/json-c-0.9/doc/html/arraylist_8h.html +240 -0
- data/ext/parsley/json-c-0.9/doc/html/bits_8h.html +150 -0
- data/ext/parsley/json-c-0.9/doc/html/classes.html +36 -0
- data/ext/parsley/json-c-0.9/doc/html/config_8h.html +612 -0
- data/ext/parsley/json-c-0.9/doc/html/debug_8h.html +392 -0
- data/ext/parsley/json-c-0.9/doc/html/doxygen.css +441 -0
- data/ext/parsley/json-c-0.9/doc/html/doxygen.png +0 -0
- data/ext/parsley/json-c-0.9/doc/html/files.html +42 -0
- data/ext/parsley/json-c-0.9/doc/html/functions.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/functions_vars.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/globals.html +459 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_defs.html +202 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_enum.html +50 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_eval.html +135 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_func.html +194 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_type.html +70 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_vars.html +50 -0
- data/ext/parsley/json-c-0.9/doc/html/index.html +25 -0
- data/ext/parsley/json-c-0.9/doc/html/json_8h.html +32 -0
- data/ext/parsley/json-c-0.9/doc/html/json__object_8h.html +1150 -0
- data/ext/parsley/json-c-0.9/doc/html/json__object__private_8h.html +75 -0
- data/ext/parsley/json-c-0.9/doc/html/json__tokener_8h.html +366 -0
- data/ext/parsley/json-c-0.9/doc/html/json__util_8h.html +106 -0
- data/ext/parsley/json-c-0.9/doc/html/linkhash_8h.html +740 -0
- data/ext/parsley/json-c-0.9/doc/html/printbuf_8h.html +214 -0
- data/ext/parsley/json-c-0.9/doc/html/structarray__list.html +104 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__object.html +141 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__object__iter.html +87 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__tokener.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__tokener__srec.html +104 -0
- data/ext/parsley/json-c-0.9/doc/html/structlh__entry.html +105 -0
- data/ext/parsley/json-c-0.9/doc/html/structlh__table.html +275 -0
- data/ext/parsley/json-c-0.9/doc/html/structprintbuf.html +87 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_b.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_l.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_r.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tabs.css +105 -0
- data/ext/parsley/json-c-0.9/doc/html/unionjson__object_1_1data.html +140 -0
- data/ext/parsley/json-c-0.9/install-sh +520 -0
- data/ext/parsley/json-c-0.9/json.h +31 -0
- data/ext/parsley/json-c-0.9/json.pc +11 -0
- data/ext/parsley/json-c-0.9/json.pc.in +11 -0
- data/ext/parsley/json-c-0.9/json_object.c +512 -0
- data/ext/parsley/json-c-0.9/json_object.h +319 -0
- data/ext/parsley/json-c-0.9/json_object_private.h +52 -0
- data/ext/parsley/json-c-0.9/json_tokener.c +628 -0
- data/ext/parsley/json-c-0.9/json_tokener.h +98 -0
- data/ext/parsley/json-c-0.9/json_util.c +122 -0
- data/ext/parsley/json-c-0.9/json_util.h +31 -0
- data/ext/parsley/json-c-0.9/libjson.la +41 -0
- data/ext/parsley/json-c-0.9/libtool +8890 -0
- data/ext/parsley/json-c-0.9/linkhash.c +216 -0
- data/ext/parsley/json-c-0.9/linkhash.h +272 -0
- data/ext/parsley/json-c-0.9/ltmain.sh +8406 -0
- data/ext/parsley/json-c-0.9/missing +376 -0
- data/ext/parsley/json-c-0.9/printbuf.c +149 -0
- data/ext/parsley/json-c-0.9/printbuf.h +64 -0
- data/ext/parsley/json-c-0.9/stamp-h1 +1 -0
- data/ext/parsley/json-c-0.9/test1 +130 -0
- data/ext/parsley/json-c-0.9/test1.c +164 -0
- data/ext/parsley/json-c-0.9/test2 +130 -0
- data/ext/parsley/json-c-0.9/test2.c +20 -0
- data/ext/parsley/json-c-0.9/test3 +130 -0
- data/ext/parsley/json-c-0.9/test3.c +23 -0
- data/ext/parsley/libtool +8890 -0
- data/ext/parsley/ltmain.sh +8406 -0
- data/ext/parsley/missing +376 -0
- data/ext/parsley/parsed_xpath.c +168 -0
- data/ext/parsley/parsed_xpath.h +34 -0
- data/ext/parsley/parser.y +631 -0
- data/ext/parsley/parsley.c +793 -0
- data/ext/parsley/parsley.h +87 -0
- data/ext/parsley/parsley_main.c +185 -0
- data/ext/parsley/parsleyc_main.c +108 -0
- data/ext/parsley/regexp.c +359 -0
- data/ext/parsley/regexp.h +36 -0
- data/ext/parsley/scanner.l +221 -0
- data/ext/parsley/test/ambiguous.html +207 -0
- data/ext/parsley/test/ambiguous.json +1 -0
- data/ext/parsley/test/ambiguous.let +6 -0
- data/ext/parsley/test/array-regression.html +5 -0
- data/ext/parsley/test/array-regression.json +1 -0
- data/ext/parsley/test/array-regression.let +10 -0
- data/ext/parsley/test/backslash.html +5 -0
- data/ext/parsley/test/backslash.json +1 -0
- data/ext/parsley/test/backslash.let +3 -0
- data/ext/parsley/test/bang.html +17 -0
- data/ext/parsley/test/bang.json +1 -0
- data/ext/parsley/test/bang.let +6 -0
- data/ext/parsley/test/collate_regression.html +324 -0
- data/ext/parsley/test/collate_regression.json +1 -0
- data/ext/parsley/test/collate_regression.let +9 -0
- data/ext/parsley/test/contains.html +3 -0
- data/ext/parsley/test/contains.json +1 -0
- data/ext/parsley/test/contains.let +3 -0
- data/ext/parsley/test/content.html +13 -0
- data/ext/parsley/test/content.json +1 -0
- data/ext/parsley/test/content.let +7 -0
- data/ext/parsley/test/cool.html +575 -0
- data/ext/parsley/test/cool.json +1 -0
- data/ext/parsley/test/cool.let +9 -0
- data/ext/parsley/test/craigs-simple.html +207 -0
- data/ext/parsley/test/craigs-simple.json +1 -0
- data/ext/parsley/test/craigs-simple.let +6 -0
- data/ext/parsley/test/craigs.html +207 -0
- data/ext/parsley/test/craigs.json +1 -0
- data/ext/parsley/test/craigs.let +9 -0
- data/ext/parsley/test/crash.html +157 -0
- data/ext/parsley/test/crash.json +1 -0
- data/ext/parsley/test/crash.let +1 -0
- data/ext/parsley/test/css_attr.html +3 -0
- data/ext/parsley/test/css_attr.json +1 -0
- data/ext/parsley/test/css_attr.let +3 -0
- data/ext/parsley/test/default-namespace.json +1 -0
- data/ext/parsley/test/default-namespace.let +3 -0
- data/ext/parsley/test/default-namespace.xml +1493 -0
- data/ext/parsley/test/div.html +8 -0
- data/ext/parsley/test/div.json +1 -0
- data/ext/parsley/test/div.let +10 -0
- data/ext/parsley/test/empty.html +3 -0
- data/ext/parsley/test/empty.json +1 -0
- data/ext/parsley/test/empty.let +1 -0
- data/ext/parsley/test/emptyish.html +207 -0
- data/ext/parsley/test/emptyish.let +3 -0
- data/ext/parsley/test/fictional-opt.html +43 -0
- data/ext/parsley/test/fictional-opt.json +1 -0
- data/ext/parsley/test/fictional-opt.let +14 -0
- data/ext/parsley/test/fictional.html +43 -0
- data/ext/parsley/test/fictional.json +1 -0
- data/ext/parsley/test/fictional.let +14 -0
- data/ext/parsley/test/function-magic.html +9 -0
- data/ext/parsley/test/function-magic.json +1 -0
- data/ext/parsley/test/function-magic.let +8 -0
- data/ext/parsley/test/hn.html +32 -0
- data/ext/parsley/test/hn.json +1 -0
- data/ext/parsley/test/hn.let +8 -0
- data/ext/parsley/test/malformed-array.html +2329 -0
- data/ext/parsley/test/malformed-array.json +1 -0
- data/ext/parsley/test/malformed-array.let +22 -0
- data/ext/parsley/test/malformed-expr.html +2329 -0
- data/ext/parsley/test/malformed-expr.json +1 -0
- data/ext/parsley/test/malformed-expr.let +16 -0
- data/ext/parsley/test/malformed-function.html +845 -0
- data/ext/parsley/test/malformed-function.json +197 -0
- data/ext/parsley/test/malformed-function.let +8 -0
- data/ext/parsley/test/malformed-json.html +2329 -0
- data/ext/parsley/test/malformed-json.json +1 -0
- data/ext/parsley/test/malformed-json.let +6 -0
- data/ext/parsley/test/malformed-xpath.html +8 -0
- data/ext/parsley/test/malformed-xpath.json +1 -0
- data/ext/parsley/test/malformed-xpath.let +7 -0
- data/ext/parsley/test/match.json +1 -0
- data/ext/parsley/test/match.let +9 -0
- data/ext/parsley/test/match.xml +11 -0
- data/ext/parsley/test/math_ambiguity.html +9 -0
- data/ext/parsley/test/math_ambiguity.json +1 -0
- data/ext/parsley/test/math_ambiguity.let +5 -0
- data/ext/parsley/test/nth-regression.html +13 -0
- data/ext/parsley/test/nth-regression.json +1 -0
- data/ext/parsley/test/nth-regression.let +3 -0
- data/ext/parsley/test/optional.html +2328 -0
- data/ext/parsley/test/optional.json +1 -0
- data/ext/parsley/test/optional.let +8 -0
- data/ext/parsley/test/outer-xml.html +6 -0
- data/ext/parsley/test/outer-xml.json +1 -0
- data/ext/parsley/test/outer-xml.let +5 -0
- data/ext/parsley/test/position.html +8 -0
- data/ext/parsley/test/position.json +1 -0
- data/ext/parsley/test/position.let +6 -0
- data/ext/parsley/test/question_regressions.html +443 -0
- data/ext/parsley/test/question_regressions.json +1 -0
- data/ext/parsley/test/question_regressions.let +6 -0
- data/ext/parsley/test/quote.json +1 -0
- data/ext/parsley/test/quote.let +8 -0
- data/ext/parsley/test/quote.xml +11 -0
- data/ext/parsley/test/reddit.html +1 -0
- data/ext/parsley/test/reddit.json +1 -0
- data/ext/parsley/test/reddit.let +12 -0
- data/ext/parsley/test/remote-fail.json +1 -0
- data/ext/parsley/test/remote.html +3 -0
- data/ext/parsley/test/remote.json +1 -0
- data/ext/parsley/test/remote.let +4 -0
- data/ext/parsley/test/replace.json +1 -0
- data/ext/parsley/test/replace.let +9 -0
- data/ext/parsley/test/replace.xml +11 -0
- data/ext/parsley/test/scope.html +10 -0
- data/ext/parsley/test/scope.json +1 -0
- data/ext/parsley/test/scope.let +6 -0
- data/ext/parsley/test/segfault.html +5 -0
- data/ext/parsley/test/segfault.json +1 -0
- data/ext/parsley/test/segfault.let +9 -0
- data/ext/parsley/test/sg-wrap.html +5 -0
- data/ext/parsley/test/sg-wrap.json +1 -0
- data/ext/parsley/test/sg-wrap.let +3 -0
- data/ext/parsley/test/sg_off.html +5 -0
- data/ext/parsley/test/sg_off.json +1 -0
- data/ext/parsley/test/sg_off.let +3 -0
- data/ext/parsley/test/test.json +1 -0
- data/ext/parsley/test/test.let +6 -0
- data/ext/parsley/test/test.xml +11 -0
- data/ext/parsley/test/trivial.html +2329 -0
- data/ext/parsley/test/trivial.json +1 -0
- data/ext/parsley/test/trivial.let +4 -0
- data/ext/parsley/test/trivial2.html +2329 -0
- data/ext/parsley/test/trivial2.json +1 -0
- data/ext/parsley/test/trivial2.let +7 -0
- data/ext/parsley/test/unbang.html +17 -0
- data/ext/parsley/test/unbang.json +1 -0
- data/ext/parsley/test/unbang.let +6 -0
- data/ext/parsley/test/unicode.html +3 -0
- data/ext/parsley/test/unicode.json +1 -0
- data/ext/parsley/test/unicode.let +1 -0
- data/ext/parsley/test/whitespace.html +8 -0
- data/ext/parsley/test/whitespace.json +1 -0
- data/ext/parsley/test/whitespace.let +3 -0
- data/ext/parsley/test/whitespace_regression.html +4 -0
- data/ext/parsley/test/whitespace_regression.json +1 -0
- data/ext/parsley/test/whitespace_regression.let +3 -0
- data/ext/parsley/test/yelp-benchmark.rb +53 -0
- data/ext/parsley/test/yelp-home.html +1004 -0
- data/ext/parsley/test/yelp-home.json +1 -0
- data/ext/parsley/test/yelp-home.let +6 -0
- data/ext/parsley/test/yelp.html +2329 -0
- data/ext/parsley/test/yelp.json +1 -0
- data/ext/parsley/test/yelp.let +12 -0
- data/ext/parsley/test/youtube.html +1940 -0
- data/ext/parsley/test/youtube.let +11 -0
- data/ext/parsley/util.c +237 -0
- data/ext/parsley/util.h +34 -0
- data/ext/parsley/xml2json.c +47 -0
- data/ext/parsley/xml2json.h +14 -0
- data/ext/parsley/y.tab.h +222 -0
- data/ext/parsley/ylwrap +222 -0
- data/lib/parsley.rb +84 -0
- data/test/test_parsley.rb +120 -0
- data/test/yelp-benchmark.rb +53 -0
- data/test/yelp-home.html +1004 -0
- data/test/yelp-home.let +6 -0
- data/test/yelp.html +2329 -0
- metadata +366 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#ifndef PARSLEY_H_INCLUDED
|
|
2
|
+
#define PARSLEY_H_INCLUDED
|
|
3
|
+
|
|
4
|
+
#define PARSLEY_BUF_SIZE 1024
|
|
5
|
+
|
|
6
|
+
#include <stdbool.h>
|
|
7
|
+
#include <libxslt/xslt.h>
|
|
8
|
+
#include <libxslt/xsltInternals.h>
|
|
9
|
+
#include <libxslt/transform.h>
|
|
10
|
+
#include <json/json.h>
|
|
11
|
+
#include "parsed_xpath.h"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
static int parsley_debug_mode = 0;
|
|
15
|
+
static char* last_parsley_error;
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
typedef struct __compiled_parsley {
|
|
19
|
+
xsltStylesheetPtr stylesheet;
|
|
20
|
+
char* error;
|
|
21
|
+
} compiled_parsley;
|
|
22
|
+
|
|
23
|
+
typedef struct __parsed_parsley {
|
|
24
|
+
xmlDocPtr xml;
|
|
25
|
+
char *error;
|
|
26
|
+
compiled_parsley *parsley;
|
|
27
|
+
} parsed_parsley;
|
|
28
|
+
|
|
29
|
+
typedef compiled_parsley * parsleyPtr;
|
|
30
|
+
typedef parsed_parsley * parsedParsleyPtr;
|
|
31
|
+
|
|
32
|
+
typedef struct __key_node {
|
|
33
|
+
char* name;
|
|
34
|
+
char* use;
|
|
35
|
+
struct __key_node * next;
|
|
36
|
+
} key_node;
|
|
37
|
+
|
|
38
|
+
typedef key_node * keyPtr;
|
|
39
|
+
|
|
40
|
+
typedef struct __parsley_context {
|
|
41
|
+
xmlNsPtr ns;
|
|
42
|
+
xmlNodePtr node;
|
|
43
|
+
struct json_object * json;
|
|
44
|
+
char* tag;
|
|
45
|
+
pxpathPtr filter;
|
|
46
|
+
pxpathPtr expr;
|
|
47
|
+
bool magic;
|
|
48
|
+
bool array;
|
|
49
|
+
bool string;
|
|
50
|
+
int flags; //bitmask over following enum
|
|
51
|
+
struct __parsley_context * parent;
|
|
52
|
+
struct __parsley_context *child;
|
|
53
|
+
struct __parsley_context *next;
|
|
54
|
+
} parsley_context;
|
|
55
|
+
|
|
56
|
+
enum {
|
|
57
|
+
PARSLEY_OPTIONAL = 1,
|
|
58
|
+
PARSLEY_BANG = 2
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
enum {
|
|
62
|
+
PARSLEY_OPTIONS_HTML = 1,
|
|
63
|
+
PARSLEY_OPTIONS_PRUNE = 2,
|
|
64
|
+
PARSLEY_OPTIONS_ALLOW_NET = 4,
|
|
65
|
+
PARSLEY_OPTIONS_ALLOW_LOCAL = 8,
|
|
66
|
+
PARSLEY_OPTIONS_COLLATE = 16,
|
|
67
|
+
PARSLEY_OPTIONS_SGWRAP = 32,
|
|
68
|
+
PARSLEY_OPTIONS_FORCE_UTF8 = 64
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
typedef parsley_context * contextPtr;
|
|
72
|
+
|
|
73
|
+
void parsed_parsley_free(parsedParsleyPtr);
|
|
74
|
+
|
|
75
|
+
void killDefaultNS(xmlDocPtr doc);
|
|
76
|
+
void parsley_free(parsleyPtr);
|
|
77
|
+
parsleyPtr parsley_compile(char* parsley, char* incl);
|
|
78
|
+
parsedParsleyPtr parsley_parse_file(parsleyPtr parsley, char* file, int flags);
|
|
79
|
+
parsedParsleyPtr parsley_parse_string(parsleyPtr parsley, char* string, size_t size, char* base_uri, int flags);
|
|
80
|
+
parsedParsleyPtr parsley_parse_doc(parsleyPtr, xmlDocPtr, int);
|
|
81
|
+
|
|
82
|
+
void parsleyXsltError(void * ctx, const char * msg, ...);
|
|
83
|
+
|
|
84
|
+
void parsley_set_user_agent(char const *agent);
|
|
85
|
+
static contextPtr parsley_parsing_context;
|
|
86
|
+
|
|
87
|
+
#endif
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
#include <stdlib.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
#include <json/printbuf.h>
|
|
5
|
+
#include "parsley.h"
|
|
6
|
+
#include "xml2json.h"
|
|
7
|
+
#include <libxslt/xslt.h>
|
|
8
|
+
#include <libxslt/xsltInternals.h>
|
|
9
|
+
#include <libxslt/transform.h>
|
|
10
|
+
#include <libxml/parser.h>
|
|
11
|
+
#include <libxml/HTMLparser.h>
|
|
12
|
+
#include <libxml/HTMLtree.h>
|
|
13
|
+
#include <libxml/xmlwriter.h>
|
|
14
|
+
#include <json/json.h>
|
|
15
|
+
#include <argp.h>
|
|
16
|
+
#include "util.h"
|
|
17
|
+
|
|
18
|
+
struct arguments
|
|
19
|
+
{
|
|
20
|
+
struct list_elem *include_files;
|
|
21
|
+
int flags;
|
|
22
|
+
int output_xml;
|
|
23
|
+
char *parsley;
|
|
24
|
+
char *user_agent;
|
|
25
|
+
char *input_file;
|
|
26
|
+
char *output_file;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
struct list_elem {
|
|
30
|
+
int has_next;
|
|
31
|
+
struct list_elem *next;
|
|
32
|
+
char *string;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const char *argp_program_version = "parsley 0.1";
|
|
36
|
+
const char *argp_program_bug_address = "<kyle@kylemaxwell.com>";
|
|
37
|
+
static char args_doc[] = "PARSELET FILE_TO_PARSE";
|
|
38
|
+
static char doc[] = "Parsley is a parselet parser.";
|
|
39
|
+
|
|
40
|
+
static struct argp_option options[] = {
|
|
41
|
+
{"input-xml", 'x', 0, 0, "Use the XML parser (not HTML)" },
|
|
42
|
+
{"output-xml", 'X', 0, 0, "Output XML (not JSON)" },
|
|
43
|
+
{"output", 'o', "FILE", 0, "Output to FILE instead of standard output" },
|
|
44
|
+
{"include", 'i', "FILE", 0, "Include the contents of FILE in the compiled XSLT" },
|
|
45
|
+
{"no-prune", 'n', 0, 0, "Don't prune empty subtrees" },
|
|
46
|
+
{"no-collate", 'N', 0, 0, "Don't collate array entries" },
|
|
47
|
+
{"sg-wrap", 's', 0, 0, "Wrap text nodes for SelectorGadget compatibility" },
|
|
48
|
+
{"user-agent", 'U', "USER_AGENT", 0, "Value of HTTP User-Agent header" },
|
|
49
|
+
{"utf8", 'u', 0, 0, "Force input to be read as UTF-8" },
|
|
50
|
+
{"no-net", 'z', 0, 0, "Disable ftp and http access for parselets" },
|
|
51
|
+
{"no-filesystem", 'Z', 0, 0, "Disable filesystem access for parselets" },
|
|
52
|
+
{ 0 }
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
|
56
|
+
{
|
|
57
|
+
struct arguments *arguments = state->input;
|
|
58
|
+
struct list_elem *base = arguments->include_files;
|
|
59
|
+
struct list_elem *e;
|
|
60
|
+
|
|
61
|
+
switch (key)
|
|
62
|
+
{
|
|
63
|
+
case 'x':
|
|
64
|
+
arguments->flags &= ~PARSLEY_OPTIONS_HTML;
|
|
65
|
+
break;
|
|
66
|
+
case 'u':
|
|
67
|
+
arguments->flags |= PARSLEY_OPTIONS_FORCE_UTF8;
|
|
68
|
+
break;
|
|
69
|
+
case 'U':
|
|
70
|
+
parsley_set_user_agent(arg);
|
|
71
|
+
case 'n':
|
|
72
|
+
arguments->flags &= ~PARSLEY_OPTIONS_PRUNE;
|
|
73
|
+
break;
|
|
74
|
+
case 'N':
|
|
75
|
+
arguments->flags &= ~PARSLEY_OPTIONS_COLLATE;
|
|
76
|
+
break;
|
|
77
|
+
case 'z':
|
|
78
|
+
arguments->flags &= ~PARSLEY_OPTIONS_ALLOW_NET;
|
|
79
|
+
break;
|
|
80
|
+
case 's':
|
|
81
|
+
arguments->flags |= PARSLEY_OPTIONS_SGWRAP;
|
|
82
|
+
break;
|
|
83
|
+
case 'Z':
|
|
84
|
+
arguments->flags &= ~PARSLEY_OPTIONS_ALLOW_LOCAL;
|
|
85
|
+
break;
|
|
86
|
+
case 'X':
|
|
87
|
+
arguments->output_xml = 1;
|
|
88
|
+
break;
|
|
89
|
+
case 'i':
|
|
90
|
+
e = (struct list_elem *) calloc(1, sizeof(e));
|
|
91
|
+
e->string = arg;
|
|
92
|
+
while(base->has_next) base = base->next;
|
|
93
|
+
base->next = e;
|
|
94
|
+
base->has_next = 1;
|
|
95
|
+
break;
|
|
96
|
+
case 'o':
|
|
97
|
+
arguments->output_file = arg;
|
|
98
|
+
break;
|
|
99
|
+
case ARGP_KEY_ARG:
|
|
100
|
+
switch(state->arg_num){
|
|
101
|
+
case 0:
|
|
102
|
+
arguments->parsley = arg;
|
|
103
|
+
break;
|
|
104
|
+
case 1:
|
|
105
|
+
arguments->input_file = arg;
|
|
106
|
+
break;
|
|
107
|
+
default:
|
|
108
|
+
argp_usage (state);
|
|
109
|
+
}
|
|
110
|
+
break;
|
|
111
|
+
case ARGP_KEY_END:
|
|
112
|
+
if (state->arg_num < 2) argp_usage (state);
|
|
113
|
+
break;
|
|
114
|
+
default:
|
|
115
|
+
return ARGP_ERR_UNKNOWN;
|
|
116
|
+
}
|
|
117
|
+
return 0;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
static struct argp argp = { options, parse_opt, args_doc, doc };
|
|
121
|
+
|
|
122
|
+
int main (int argc, char **argv) {
|
|
123
|
+
struct arguments arguments;
|
|
124
|
+
struct list_elem elem;
|
|
125
|
+
struct list_elem *elemptr = &elem;
|
|
126
|
+
elem.has_next = 0;
|
|
127
|
+
arguments.output_xml = 0;
|
|
128
|
+
arguments.flags = ~0 & ~PARSLEY_OPTIONS_SGWRAP & ~PARSLEY_OPTIONS_FORCE_UTF8;
|
|
129
|
+
arguments.include_files = elemptr;
|
|
130
|
+
arguments.output_file = "-";
|
|
131
|
+
argp_parse (&argp, argc, argv, 0, 0, &arguments);
|
|
132
|
+
|
|
133
|
+
struct printbuf *buf = printbuf_new();
|
|
134
|
+
struct printbuf *incl = printbuf_new();
|
|
135
|
+
sprintbuf(buf, "");
|
|
136
|
+
sprintbuf(incl, "");
|
|
137
|
+
|
|
138
|
+
FILE * fd = parsley_fopen(arguments.parsley, "r");
|
|
139
|
+
printbuf_file_read(fd, buf);
|
|
140
|
+
fclose(fd);
|
|
141
|
+
|
|
142
|
+
while(elemptr->has_next) {
|
|
143
|
+
elemptr = elemptr->next;
|
|
144
|
+
FILE* f = parsley_fopen(elemptr->string, "r");
|
|
145
|
+
printbuf_file_read(f, incl);
|
|
146
|
+
fclose(f);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// printf("a\n");
|
|
150
|
+
parsleyPtr compiled = parsley_compile(buf->buf, incl->buf);
|
|
151
|
+
// printf("b\n");
|
|
152
|
+
|
|
153
|
+
if(compiled->error != NULL) {
|
|
154
|
+
fprintf(stderr, "%s\n", compiled->error);
|
|
155
|
+
exit(1);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
parsedParsleyPtr ptr = parsley_parse_file(compiled, arguments.input_file, arguments.flags);
|
|
159
|
+
|
|
160
|
+
if(ptr->error != NULL) {
|
|
161
|
+
fprintf(stderr, "Parsing failed: %s\n", ptr->error);
|
|
162
|
+
exit(1);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if(arguments.output_xml) {
|
|
166
|
+
xmlSaveFormatFile(arguments.output_file, ptr->xml, 1);
|
|
167
|
+
} else {
|
|
168
|
+
struct json_object *json = xml2json(ptr->xml->children->children);
|
|
169
|
+
if(json == NULL) {
|
|
170
|
+
fprintf(stderr, "xml2json unknown error");
|
|
171
|
+
exit(1);
|
|
172
|
+
}
|
|
173
|
+
char * json_string = json_object_to_json_string(json);
|
|
174
|
+
FILE* f = parsley_fopen(arguments.output_file, "w");
|
|
175
|
+
fprintf(f, "%s\n", json_string);
|
|
176
|
+
json_object_put(json);
|
|
177
|
+
fclose(f);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
printbuf_free(buf);
|
|
181
|
+
printbuf_free(incl);
|
|
182
|
+
parsley_free(compiled);
|
|
183
|
+
parsed_parsley_free(ptr);
|
|
184
|
+
return 0;
|
|
185
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
#include <stdlib.h>
|
|
3
|
+
#include <argp.h>
|
|
4
|
+
#include <string.h>
|
|
5
|
+
#include <json/printbuf.h>
|
|
6
|
+
#include "parsley.h"
|
|
7
|
+
#include "util.h"
|
|
8
|
+
|
|
9
|
+
struct list_elem {
|
|
10
|
+
int has_next;
|
|
11
|
+
struct list_elem *next;
|
|
12
|
+
char *string;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
struct arguments
|
|
16
|
+
{
|
|
17
|
+
struct list_elem *include_files;
|
|
18
|
+
char *parsley;
|
|
19
|
+
char *output_file;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
const char *argp_program_version = "parsleyc 0.1";
|
|
23
|
+
const char *argp_program_bug_address = "<kyle@kylemaxwell.com>";
|
|
24
|
+
static char args_doc[] = "DEX_FILE";
|
|
25
|
+
static char doc[] = "Parsleyc is a parselet to XSLT compiler";
|
|
26
|
+
|
|
27
|
+
static struct argp_option options[] = {
|
|
28
|
+
{"debug", 'd', 0, 0, "Turn on Bison parser debugging" },
|
|
29
|
+
{"output", 'o', "FILE", 0, "Output to FILE instead of standard output" },
|
|
30
|
+
{"include", 'i', "FILE", 0, "Include the contents of FILE in the produced XSLT" },
|
|
31
|
+
{ 0 }
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
|
35
|
+
{
|
|
36
|
+
struct arguments *arguments = state->input;
|
|
37
|
+
struct list_elem *base = arguments->include_files;
|
|
38
|
+
struct list_elem *e;
|
|
39
|
+
|
|
40
|
+
switch (key)
|
|
41
|
+
{
|
|
42
|
+
case 'i':
|
|
43
|
+
e = (struct list_elem *) calloc(1, sizeof(e));
|
|
44
|
+
e->string = arg;
|
|
45
|
+
while(base->has_next) base = base->next;
|
|
46
|
+
base->next = e;
|
|
47
|
+
base->has_next = 1;
|
|
48
|
+
break;
|
|
49
|
+
case 'd':
|
|
50
|
+
// parsley_set_debug_mode(1);
|
|
51
|
+
break;
|
|
52
|
+
case 'o':
|
|
53
|
+
arguments->output_file = arg;
|
|
54
|
+
break;
|
|
55
|
+
case ARGP_KEY_ARG:
|
|
56
|
+
if (state->arg_num >= 1) argp_usage (state);
|
|
57
|
+
arguments->parsley = arg;
|
|
58
|
+
break;
|
|
59
|
+
case ARGP_KEY_END:
|
|
60
|
+
if (state->arg_num < 1) argp_usage (state);
|
|
61
|
+
break;
|
|
62
|
+
default:
|
|
63
|
+
return ARGP_ERR_UNKNOWN;
|
|
64
|
+
}
|
|
65
|
+
return 0;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
static struct argp argp = { options, parse_opt, args_doc, doc };
|
|
69
|
+
|
|
70
|
+
int main (int argc, char **argv) {
|
|
71
|
+
struct arguments arguments;
|
|
72
|
+
struct list_elem elem;
|
|
73
|
+
struct list_elem *elemptr = &elem;
|
|
74
|
+
elem.has_next = 0;
|
|
75
|
+
|
|
76
|
+
arguments.include_files = elemptr;
|
|
77
|
+
arguments.output_file = "-";
|
|
78
|
+
arguments.parsley = "-";
|
|
79
|
+
argp_parse (&argp, argc, argv, 0, 0, &arguments);
|
|
80
|
+
|
|
81
|
+
struct printbuf* parsley = printbuf_new();
|
|
82
|
+
struct printbuf* incl = printbuf_new();
|
|
83
|
+
sprintbuf(parsley, "");
|
|
84
|
+
sprintbuf(incl, "");
|
|
85
|
+
|
|
86
|
+
FILE* in = parsley_fopen(arguments.parsley, "r");
|
|
87
|
+
|
|
88
|
+
printbuf_file_read(in, parsley);
|
|
89
|
+
while(elemptr->has_next) {
|
|
90
|
+
elemptr = elemptr->next;
|
|
91
|
+
FILE* f = parsley_fopen(elemptr->string, "r");
|
|
92
|
+
printbuf_file_read(f, incl);
|
|
93
|
+
fclose(f);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
parsleyPtr compiled = parsley_compile(parsley->buf, incl->buf);
|
|
97
|
+
if(compiled->error != NULL) {
|
|
98
|
+
fprintf(stderr, "%s\n", compiled->error);
|
|
99
|
+
exit(1);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
FILE* fo = parsley_fopen(arguments.output_file, "w");
|
|
103
|
+
xmlDocFormatDump(fo, compiled->stylesheet->doc, 1);
|
|
104
|
+
fclose(fo);
|
|
105
|
+
|
|
106
|
+
return 0;
|
|
107
|
+
}
|
|
108
|
+
|
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* regexp.c: Implementation of the EXSLT -- Regular Expressions module
|
|
3
|
+
*
|
|
4
|
+
* References:
|
|
5
|
+
* http://exslt.org/regexp/index.html
|
|
6
|
+
*
|
|
7
|
+
* See Copyright for the status of this software.
|
|
8
|
+
*
|
|
9
|
+
* Authors:
|
|
10
|
+
* Joel W. Reed <joelwreed@gmail.com>
|
|
11
|
+
* Some modification by Kyle Maxwell
|
|
12
|
+
*
|
|
13
|
+
* TODO:
|
|
14
|
+
* functions:
|
|
15
|
+
* regexp:match
|
|
16
|
+
* regexp:replace
|
|
17
|
+
* regexp:test
|
|
18
|
+
*/
|
|
19
|
+
#include "regexp.h"
|
|
20
|
+
|
|
21
|
+
static void
|
|
22
|
+
exsltRegexpFlagsFromString(const xmlChar* flagstr,
|
|
23
|
+
int* global, int* flags)
|
|
24
|
+
{
|
|
25
|
+
const xmlChar* i = flagstr;
|
|
26
|
+
|
|
27
|
+
/* defaults */
|
|
28
|
+
(*flags) = PCRE_UTF8;
|
|
29
|
+
(*global) = 0;
|
|
30
|
+
|
|
31
|
+
while (*i != '\0')
|
|
32
|
+
{
|
|
33
|
+
if (*i == 'i') (*flags) |= PCRE_CASELESS;
|
|
34
|
+
else if (*i == 'g') (*global)= 1;
|
|
35
|
+
/* TODO: support other flags? */
|
|
36
|
+
i++;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
static int
|
|
41
|
+
exsltRegexpExecute(xmlXPathParserContextPtr ctxt,
|
|
42
|
+
const xmlChar* haystack, const xmlChar* regexp,
|
|
43
|
+
int flags, int ovector[], int ovector_len)
|
|
44
|
+
{
|
|
45
|
+
int haystack_len = 0;
|
|
46
|
+
pcre *compiled_regexp = NULL;
|
|
47
|
+
int rc = 0, erroffset = 0;
|
|
48
|
+
const char *error = 0;
|
|
49
|
+
|
|
50
|
+
compiled_regexp = pcre_compile(regexp, /* the pattern */
|
|
51
|
+
flags, /* default options */
|
|
52
|
+
&error, /* for error message */
|
|
53
|
+
&erroffset, /* for error offset */
|
|
54
|
+
NULL); /* use default character tables */
|
|
55
|
+
|
|
56
|
+
if (compiled_regexp == NULL) {
|
|
57
|
+
xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
|
|
58
|
+
"exslt:regexp failed to compile %s (char: %d). %s", regexp, erroffset, error);
|
|
59
|
+
return -1;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
haystack_len = xmlUTF8Strlen (haystack);
|
|
63
|
+
|
|
64
|
+
rc = pcre_exec(compiled_regexp, /* result of pcre_compile() */
|
|
65
|
+
NULL, /* we didn't study the pattern */
|
|
66
|
+
haystack, /* the subject string */
|
|
67
|
+
haystack_len, /* the length of the subject string */
|
|
68
|
+
0, /* start at offset 0 in the subject */
|
|
69
|
+
0, /* default options */
|
|
70
|
+
(int*)ovector, /* vector of integers for substring information */
|
|
71
|
+
ovector_len); /* number of elements in the vector (NOT size in bytes) */
|
|
72
|
+
|
|
73
|
+
if (rc < -1) {
|
|
74
|
+
xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
|
|
75
|
+
"exslt:regexp failed to execute %s for %s", regexp, haystack);
|
|
76
|
+
rc = 0;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (compiled_regexp != NULL)
|
|
80
|
+
pcre_free(compiled_regexp);
|
|
81
|
+
|
|
82
|
+
return rc;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* exsltRegexpMatchFunction:
|
|
87
|
+
* @ns:
|
|
88
|
+
*
|
|
89
|
+
* Returns a node set of string matches
|
|
90
|
+
*/
|
|
91
|
+
|
|
92
|
+
static void
|
|
93
|
+
exsltRegexpMatchFunction (xmlXPathParserContextPtr ctxt, int nargs)
|
|
94
|
+
{
|
|
95
|
+
xsltTransformContextPtr tctxt;
|
|
96
|
+
xmlNodePtr node;
|
|
97
|
+
xmlDocPtr container;
|
|
98
|
+
xmlXPathObjectPtr ret = NULL;
|
|
99
|
+
xmlChar *haystack, *regexp, *flagstr, *working, *match;
|
|
100
|
+
int rc, x, flags, global, ovector[30];
|
|
101
|
+
|
|
102
|
+
if ((nargs < 1) || (nargs > 3)) {
|
|
103
|
+
xmlXPathSetArityError(ctxt);
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
if (nargs > 2) {
|
|
109
|
+
flagstr = xmlXPathPopString(ctxt);
|
|
110
|
+
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
} else {
|
|
114
|
+
flagstr = xmlStrdup("");
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
regexp = xmlXPathPopString(ctxt);
|
|
118
|
+
if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
|
|
119
|
+
xmlFree(flagstr);
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
haystack = xmlXPathPopString(ctxt);
|
|
124
|
+
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
|
|
125
|
+
xmlFree(regexp);
|
|
126
|
+
xmlFree(flagstr);
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/* Return a result tree fragment */
|
|
131
|
+
tctxt = xsltXPathGetTransformContext(ctxt);
|
|
132
|
+
if (tctxt == NULL) {
|
|
133
|
+
xsltTransformError(xsltXPathGetTransformContext(ctxt), NULL, NULL,
|
|
134
|
+
"exslt:regexp : internal error tctxt == NULL\n");
|
|
135
|
+
goto fail;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
container = xsltCreateRVT(tctxt);
|
|
139
|
+
if (container != NULL) {
|
|
140
|
+
xsltRegisterTmpRVT(tctxt, container);
|
|
141
|
+
ret = xmlXPathNewNodeSet(NULL);
|
|
142
|
+
if (ret != NULL) {
|
|
143
|
+
ret->boolval = 0;
|
|
144
|
+
|
|
145
|
+
exsltRegexpFlagsFromString(flagstr, &global, &flags);
|
|
146
|
+
working = haystack;
|
|
147
|
+
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
|
148
|
+
ovector, sizeof(ovector)/sizeof(int));
|
|
149
|
+
|
|
150
|
+
while (rc > 0) {
|
|
151
|
+
for(int group = 0; group < rc; group++) {
|
|
152
|
+
match = xmlStrsub(working, ovector[group*2], ovector[group*2+1]-ovector[group*2]);
|
|
153
|
+
if (NULL == match) goto fail;
|
|
154
|
+
|
|
155
|
+
node = xmlNewDocRawNode(container, NULL, "match", match);
|
|
156
|
+
xmlFree(match);
|
|
157
|
+
|
|
158
|
+
xmlAddChild((xmlNodePtr) container, node);
|
|
159
|
+
xmlXPathNodeSetAddUnique(ret->nodesetval, node);
|
|
160
|
+
}
|
|
161
|
+
if (!global) break;
|
|
162
|
+
|
|
163
|
+
working = working + ovector[1];
|
|
164
|
+
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
|
165
|
+
ovector, sizeof(ovector)/sizeof(int));
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
fail:
|
|
171
|
+
if (flagstr != NULL)
|
|
172
|
+
xmlFree(flagstr);
|
|
173
|
+
if (regexp != NULL)
|
|
174
|
+
xmlFree(regexp);
|
|
175
|
+
if (haystack != NULL)
|
|
176
|
+
xmlFree(haystack);
|
|
177
|
+
|
|
178
|
+
if (ret != NULL)
|
|
179
|
+
valuePush(ctxt, ret);
|
|
180
|
+
else
|
|
181
|
+
valuePush(ctxt, xmlXPathNewNodeSet(NULL));
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* exsltRegexpReplaceFunction:
|
|
186
|
+
* @ns:
|
|
187
|
+
*
|
|
188
|
+
* Returns a node set of string matches
|
|
189
|
+
*/
|
|
190
|
+
|
|
191
|
+
static void
|
|
192
|
+
exsltRegexpReplaceFunction (xmlXPathParserContextPtr ctxt, int nargs)
|
|
193
|
+
{
|
|
194
|
+
xmlChar *haystack, *regexp, *flagstr, *replace, *tmp;
|
|
195
|
+
xmlChar *result = NULL, *working, *end;
|
|
196
|
+
int rc, x, flags, global, ovector[3];
|
|
197
|
+
|
|
198
|
+
if ((nargs < 1) || (nargs > 4)) {
|
|
199
|
+
xmlXPathSetArityError(ctxt);
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
replace = xmlXPathPopString(ctxt);
|
|
204
|
+
if (xmlXPathCheckError(ctxt) || (replace == NULL)) {
|
|
205
|
+
return;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
flagstr = xmlXPathPopString(ctxt);
|
|
209
|
+
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
|
|
210
|
+
xmlFree(replace);
|
|
211
|
+
return;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
regexp = xmlXPathPopString(ctxt);
|
|
215
|
+
if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
|
|
216
|
+
xmlFree(flagstr);
|
|
217
|
+
xmlFree(replace);
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
haystack = xmlXPathPopString(ctxt);
|
|
222
|
+
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
|
|
223
|
+
xmlFree(regexp);
|
|
224
|
+
xmlFree(flagstr);
|
|
225
|
+
xmlFree(replace);
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
exsltRegexpFlagsFromString(flagstr, &global, &flags);
|
|
230
|
+
|
|
231
|
+
working = haystack;
|
|
232
|
+
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
|
233
|
+
ovector, sizeof(ovector)/sizeof(int));
|
|
234
|
+
|
|
235
|
+
while (rc > 0 ) {
|
|
236
|
+
if (0==ovector[0]) {
|
|
237
|
+
if (NULL==result) result = xmlStrdup(replace);
|
|
238
|
+
else result = xmlStrcat(result, replace);
|
|
239
|
+
}
|
|
240
|
+
else {
|
|
241
|
+
tmp = xmlStrsub(working, 0, ovector[0]);
|
|
242
|
+
if (NULL==result) result = tmp;
|
|
243
|
+
else {
|
|
244
|
+
result = xmlStrcat(result, tmp);
|
|
245
|
+
xmlFree(tmp);
|
|
246
|
+
}
|
|
247
|
+
result = xmlStrcat(result, replace);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
working = working + ovector[1];
|
|
251
|
+
|
|
252
|
+
if (!global) break;
|
|
253
|
+
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
|
254
|
+
ovector, sizeof(ovector)/sizeof(int));
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
end = haystack + xmlUTF8Strlen(haystack);
|
|
258
|
+
if (working < end ) {
|
|
259
|
+
if (NULL==result) result = xmlStrdup(working);
|
|
260
|
+
else {
|
|
261
|
+
result = xmlStrcat(result, working);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
fail:
|
|
266
|
+
if (replace != NULL)
|
|
267
|
+
xmlFree(replace);
|
|
268
|
+
if (flagstr != NULL)
|
|
269
|
+
xmlFree(flagstr);
|
|
270
|
+
if (regexp != NULL)
|
|
271
|
+
xmlFree(regexp);
|
|
272
|
+
if (haystack != NULL)
|
|
273
|
+
xmlFree(haystack);
|
|
274
|
+
|
|
275
|
+
xmlXPathReturnString(ctxt, result);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* exsltRegexpTestFunction:
|
|
280
|
+
* @ns:
|
|
281
|
+
*
|
|
282
|
+
* returns true if the string given as the first argument
|
|
283
|
+
* matches the regular expression given as the second argument
|
|
284
|
+
*
|
|
285
|
+
*/
|
|
286
|
+
|
|
287
|
+
static void
|
|
288
|
+
exsltRegexpTestFunction (xmlXPathParserContextPtr ctxt, int nargs)
|
|
289
|
+
{
|
|
290
|
+
xmlChar *haystack, *regexp_middle, *regexp, *flagstr;
|
|
291
|
+
int rc = 0, flags, global, ovector[3];
|
|
292
|
+
|
|
293
|
+
if ((nargs < 1) || (nargs > 3)) {
|
|
294
|
+
xmlXPathSetArityError(ctxt);
|
|
295
|
+
return;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
if(nargs > 2) {
|
|
299
|
+
flagstr = xmlXPathPopString(ctxt);
|
|
300
|
+
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
|
|
301
|
+
return;
|
|
302
|
+
}
|
|
303
|
+
} else {
|
|
304
|
+
flagstr = xmlStrdup("");
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
regexp_middle = xmlXPathPopString(ctxt);
|
|
308
|
+
if (xmlXPathCheckError(ctxt) || (regexp_middle == NULL)) {
|
|
309
|
+
xmlFree(flagstr);
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
haystack = xmlXPathPopString(ctxt);
|
|
314
|
+
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
|
|
315
|
+
xmlFree(regexp_middle);
|
|
316
|
+
xmlFree(flagstr);
|
|
317
|
+
return;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/* build the regexp */
|
|
321
|
+
regexp = xmlStrdup("\\A");
|
|
322
|
+
regexp = xmlStrcat(regexp, regexp_middle);
|
|
323
|
+
regexp = xmlStrcat(regexp, "\\Z");
|
|
324
|
+
|
|
325
|
+
exsltRegexpFlagsFromString(flagstr, &global, &flags);
|
|
326
|
+
rc = exsltRegexpExecute(ctxt, haystack, regexp, flags,
|
|
327
|
+
ovector, sizeof(ovector)/sizeof(int));
|
|
328
|
+
|
|
329
|
+
fail:
|
|
330
|
+
if (flagstr != NULL)
|
|
331
|
+
xmlFree(flagstr);
|
|
332
|
+
if (regexp != NULL)
|
|
333
|
+
xmlFree(regexp);
|
|
334
|
+
if (regexp_middle != NULL)
|
|
335
|
+
xmlFree(regexp_middle);
|
|
336
|
+
if (haystack != NULL)
|
|
337
|
+
xmlFree(haystack);
|
|
338
|
+
|
|
339
|
+
xmlXPathReturnBoolean(ctxt, (rc > 0));
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* exsltRegexpRegister:
|
|
344
|
+
*
|
|
345
|
+
* Registers the EXSLT - Regexp module
|
|
346
|
+
*/
|
|
347
|
+
void
|
|
348
|
+
PLUGINPUBFUN exslt_org_regular_expressions_init (void)
|
|
349
|
+
{
|
|
350
|
+
xsltRegisterExtModuleFunction ((const xmlChar *) "match",
|
|
351
|
+
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
|
|
352
|
+
exsltRegexpMatchFunction);
|
|
353
|
+
xsltRegisterExtModuleFunction ((const xmlChar *) "replace",
|
|
354
|
+
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
|
|
355
|
+
exsltRegexpReplaceFunction);
|
|
356
|
+
xsltRegisterExtModuleFunction ((const xmlChar *) "test",
|
|
357
|
+
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
|
|
358
|
+
exsltRegexpTestFunction);
|
|
359
|
+
}
|