gtl-parsley-ruby 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/README +32 -0
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/ext/cparsley.c +152 -0
- data/ext/extconf.rb +82 -0
- data/ext/parsley/.gitignore +32 -0
- data/ext/parsley/AUTHORS +1 -0
- data/ext/parsley/ChangeLog +0 -0
- data/ext/parsley/HACKING +4 -0
- data/ext/parsley/INSTALL +73 -0
- data/ext/parsley/INTRO +84 -0
- data/ext/parsley/Makefile.am +80 -0
- data/ext/parsley/Makefile.in +1009 -0
- data/ext/parsley/NEWS +0 -0
- data/ext/parsley/PAPER +36 -0
- data/ext/parsley/Portfile +18 -0
- data/ext/parsley/Portfile.in +17 -0
- data/ext/parsley/README.C-LANG +92 -0
- data/ext/parsley/README.markdown +1 -0
- data/ext/parsley/TODO +39 -0
- data/ext/parsley/VERSION +1 -0
- data/ext/parsley/aclocal.m4 +8918 -0
- data/ext/parsley/bootstrap.sh +6 -0
- data/ext/parsley/config.guess +1561 -0
- data/ext/parsley/config.sub +1686 -0
- data/ext/parsley/configure +13437 -0
- data/ext/parsley/configure.ac +46 -0
- data/ext/parsley/depcomp +630 -0
- data/ext/parsley/functions.c +368 -0
- data/ext/parsley/functions.h +19 -0
- data/ext/parsley/generate_bisect.sh +12 -0
- data/ext/parsley/hooks/prepare-commit-msg +16 -0
- data/ext/parsley/install-sh +520 -0
- data/ext/parsley/json-c-0.9/AUTHORS +2 -0
- data/ext/parsley/json-c-0.9/COPYING +19 -0
- data/ext/parsley/json-c-0.9/ChangeLog +103 -0
- data/ext/parsley/json-c-0.9/INSTALL +302 -0
- data/ext/parsley/json-c-0.9/Makefile.am +43 -0
- data/ext/parsley/json-c-0.9/Makefile.in +800 -0
- data/ext/parsley/json-c-0.9/NEWS +1 -0
- data/ext/parsley/json-c-0.9/README +20 -0
- data/ext/parsley/json-c-0.9/README-WIN32.html +57 -0
- data/ext/parsley/json-c-0.9/README.html +32 -0
- data/ext/parsley/json-c-0.9/aclocal.m4 +8909 -0
- data/ext/parsley/json-c-0.9/arraylist.c +94 -0
- data/ext/parsley/json-c-0.9/arraylist.h +53 -0
- data/ext/parsley/json-c-0.9/bits.h +27 -0
- data/ext/parsley/json-c-0.9/config.guess +1561 -0
- data/ext/parsley/json-c-0.9/config.h +125 -0
- data/ext/parsley/json-c-0.9/config.h.in +124 -0
- data/ext/parsley/json-c-0.9/config.h.win32 +94 -0
- data/ext/parsley/json-c-0.9/config.sub +1686 -0
- data/ext/parsley/json-c-0.9/configure +13084 -0
- data/ext/parsley/json-c-0.9/configure.in +33 -0
- data/ext/parsley/json-c-0.9/debug.c +98 -0
- data/ext/parsley/json-c-0.9/debug.h +50 -0
- data/ext/parsley/json-c-0.9/depcomp +630 -0
- data/ext/parsley/json-c-0.9/doc/html/annotated.html +40 -0
- data/ext/parsley/json-c-0.9/doc/html/arraylist_8h.html +240 -0
- data/ext/parsley/json-c-0.9/doc/html/bits_8h.html +150 -0
- data/ext/parsley/json-c-0.9/doc/html/classes.html +36 -0
- data/ext/parsley/json-c-0.9/doc/html/config_8h.html +612 -0
- data/ext/parsley/json-c-0.9/doc/html/debug_8h.html +392 -0
- data/ext/parsley/json-c-0.9/doc/html/doxygen.css +441 -0
- data/ext/parsley/json-c-0.9/doc/html/doxygen.png +0 -0
- data/ext/parsley/json-c-0.9/doc/html/files.html +42 -0
- data/ext/parsley/json-c-0.9/doc/html/functions.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/functions_vars.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/globals.html +459 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_defs.html +202 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_enum.html +50 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_eval.html +135 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_func.html +194 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_type.html +70 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_vars.html +50 -0
- data/ext/parsley/json-c-0.9/doc/html/index.html +25 -0
- data/ext/parsley/json-c-0.9/doc/html/json_8h.html +32 -0
- data/ext/parsley/json-c-0.9/doc/html/json__object_8h.html +1150 -0
- data/ext/parsley/json-c-0.9/doc/html/json__object__private_8h.html +75 -0
- data/ext/parsley/json-c-0.9/doc/html/json__tokener_8h.html +366 -0
- data/ext/parsley/json-c-0.9/doc/html/json__util_8h.html +106 -0
- data/ext/parsley/json-c-0.9/doc/html/linkhash_8h.html +740 -0
- data/ext/parsley/json-c-0.9/doc/html/printbuf_8h.html +214 -0
- data/ext/parsley/json-c-0.9/doc/html/structarray__list.html +104 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__object.html +141 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__object__iter.html +87 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__tokener.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__tokener__srec.html +104 -0
- data/ext/parsley/json-c-0.9/doc/html/structlh__entry.html +105 -0
- data/ext/parsley/json-c-0.9/doc/html/structlh__table.html +275 -0
- data/ext/parsley/json-c-0.9/doc/html/structprintbuf.html +87 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_b.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_l.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_r.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tabs.css +105 -0
- data/ext/parsley/json-c-0.9/doc/html/unionjson__object_1_1data.html +140 -0
- data/ext/parsley/json-c-0.9/install-sh +520 -0
- data/ext/parsley/json-c-0.9/json.h +31 -0
- data/ext/parsley/json-c-0.9/json.pc +11 -0
- data/ext/parsley/json-c-0.9/json.pc.in +11 -0
- data/ext/parsley/json-c-0.9/json_object.c +512 -0
- data/ext/parsley/json-c-0.9/json_object.h +319 -0
- data/ext/parsley/json-c-0.9/json_object_private.h +52 -0
- data/ext/parsley/json-c-0.9/json_tokener.c +628 -0
- data/ext/parsley/json-c-0.9/json_tokener.h +98 -0
- data/ext/parsley/json-c-0.9/json_util.c +122 -0
- data/ext/parsley/json-c-0.9/json_util.h +31 -0
- data/ext/parsley/json-c-0.9/libjson.la +41 -0
- data/ext/parsley/json-c-0.9/libtool +8890 -0
- data/ext/parsley/json-c-0.9/linkhash.c +216 -0
- data/ext/parsley/json-c-0.9/linkhash.h +272 -0
- data/ext/parsley/json-c-0.9/ltmain.sh +8406 -0
- data/ext/parsley/json-c-0.9/missing +376 -0
- data/ext/parsley/json-c-0.9/printbuf.c +149 -0
- data/ext/parsley/json-c-0.9/printbuf.h +64 -0
- data/ext/parsley/json-c-0.9/stamp-h1 +1 -0
- data/ext/parsley/json-c-0.9/test1 +130 -0
- data/ext/parsley/json-c-0.9/test1.c +164 -0
- data/ext/parsley/json-c-0.9/test2 +130 -0
- data/ext/parsley/json-c-0.9/test2.c +20 -0
- data/ext/parsley/json-c-0.9/test3 +130 -0
- data/ext/parsley/json-c-0.9/test3.c +23 -0
- data/ext/parsley/libtool +8890 -0
- data/ext/parsley/ltmain.sh +8406 -0
- data/ext/parsley/missing +376 -0
- data/ext/parsley/parsed_xpath.c +168 -0
- data/ext/parsley/parsed_xpath.h +34 -0
- data/ext/parsley/parser.y +631 -0
- data/ext/parsley/parsley.c +793 -0
- data/ext/parsley/parsley.h +87 -0
- data/ext/parsley/parsley_main.c +185 -0
- data/ext/parsley/parsleyc_main.c +108 -0
- data/ext/parsley/regexp.c +359 -0
- data/ext/parsley/regexp.h +36 -0
- data/ext/parsley/scanner.l +221 -0
- data/ext/parsley/test/ambiguous.html +207 -0
- data/ext/parsley/test/ambiguous.json +1 -0
- data/ext/parsley/test/ambiguous.let +6 -0
- data/ext/parsley/test/array-regression.html +5 -0
- data/ext/parsley/test/array-regression.json +1 -0
- data/ext/parsley/test/array-regression.let +10 -0
- data/ext/parsley/test/backslash.html +5 -0
- data/ext/parsley/test/backslash.json +1 -0
- data/ext/parsley/test/backslash.let +3 -0
- data/ext/parsley/test/bang.html +17 -0
- data/ext/parsley/test/bang.json +1 -0
- data/ext/parsley/test/bang.let +6 -0
- data/ext/parsley/test/collate_regression.html +324 -0
- data/ext/parsley/test/collate_regression.json +1 -0
- data/ext/parsley/test/collate_regression.let +9 -0
- data/ext/parsley/test/contains.html +3 -0
- data/ext/parsley/test/contains.json +1 -0
- data/ext/parsley/test/contains.let +3 -0
- data/ext/parsley/test/content.html +13 -0
- data/ext/parsley/test/content.json +1 -0
- data/ext/parsley/test/content.let +7 -0
- data/ext/parsley/test/cool.html +575 -0
- data/ext/parsley/test/cool.json +1 -0
- data/ext/parsley/test/cool.let +9 -0
- data/ext/parsley/test/craigs-simple.html +207 -0
- data/ext/parsley/test/craigs-simple.json +1 -0
- data/ext/parsley/test/craigs-simple.let +6 -0
- data/ext/parsley/test/craigs.html +207 -0
- data/ext/parsley/test/craigs.json +1 -0
- data/ext/parsley/test/craigs.let +9 -0
- data/ext/parsley/test/crash.html +157 -0
- data/ext/parsley/test/crash.json +1 -0
- data/ext/parsley/test/crash.let +1 -0
- data/ext/parsley/test/css_attr.html +3 -0
- data/ext/parsley/test/css_attr.json +1 -0
- data/ext/parsley/test/css_attr.let +3 -0
- data/ext/parsley/test/default-namespace.json +1 -0
- data/ext/parsley/test/default-namespace.let +3 -0
- data/ext/parsley/test/default-namespace.xml +1493 -0
- data/ext/parsley/test/div.html +8 -0
- data/ext/parsley/test/div.json +1 -0
- data/ext/parsley/test/div.let +10 -0
- data/ext/parsley/test/empty.html +3 -0
- data/ext/parsley/test/empty.json +1 -0
- data/ext/parsley/test/empty.let +1 -0
- data/ext/parsley/test/emptyish.html +207 -0
- data/ext/parsley/test/emptyish.let +3 -0
- data/ext/parsley/test/fictional-opt.html +43 -0
- data/ext/parsley/test/fictional-opt.json +1 -0
- data/ext/parsley/test/fictional-opt.let +14 -0
- data/ext/parsley/test/fictional.html +43 -0
- data/ext/parsley/test/fictional.json +1 -0
- data/ext/parsley/test/fictional.let +14 -0
- data/ext/parsley/test/function-magic.html +9 -0
- data/ext/parsley/test/function-magic.json +1 -0
- data/ext/parsley/test/function-magic.let +8 -0
- data/ext/parsley/test/hn.html +32 -0
- data/ext/parsley/test/hn.json +1 -0
- data/ext/parsley/test/hn.let +8 -0
- data/ext/parsley/test/malformed-array.html +2329 -0
- data/ext/parsley/test/malformed-array.json +1 -0
- data/ext/parsley/test/malformed-array.let +22 -0
- data/ext/parsley/test/malformed-expr.html +2329 -0
- data/ext/parsley/test/malformed-expr.json +1 -0
- data/ext/parsley/test/malformed-expr.let +16 -0
- data/ext/parsley/test/malformed-function.html +845 -0
- data/ext/parsley/test/malformed-function.json +197 -0
- data/ext/parsley/test/malformed-function.let +8 -0
- data/ext/parsley/test/malformed-json.html +2329 -0
- data/ext/parsley/test/malformed-json.json +1 -0
- data/ext/parsley/test/malformed-json.let +6 -0
- data/ext/parsley/test/malformed-xpath.html +8 -0
- data/ext/parsley/test/malformed-xpath.json +1 -0
- data/ext/parsley/test/malformed-xpath.let +7 -0
- data/ext/parsley/test/match.json +1 -0
- data/ext/parsley/test/match.let +9 -0
- data/ext/parsley/test/match.xml +11 -0
- data/ext/parsley/test/math_ambiguity.html +9 -0
- data/ext/parsley/test/math_ambiguity.json +1 -0
- data/ext/parsley/test/math_ambiguity.let +5 -0
- data/ext/parsley/test/nth-regression.html +13 -0
- data/ext/parsley/test/nth-regression.json +1 -0
- data/ext/parsley/test/nth-regression.let +3 -0
- data/ext/parsley/test/optional.html +2328 -0
- data/ext/parsley/test/optional.json +1 -0
- data/ext/parsley/test/optional.let +8 -0
- data/ext/parsley/test/outer-xml.html +6 -0
- data/ext/parsley/test/outer-xml.json +1 -0
- data/ext/parsley/test/outer-xml.let +5 -0
- data/ext/parsley/test/position.html +8 -0
- data/ext/parsley/test/position.json +1 -0
- data/ext/parsley/test/position.let +6 -0
- data/ext/parsley/test/question_regressions.html +443 -0
- data/ext/parsley/test/question_regressions.json +1 -0
- data/ext/parsley/test/question_regressions.let +6 -0
- data/ext/parsley/test/quote.json +1 -0
- data/ext/parsley/test/quote.let +8 -0
- data/ext/parsley/test/quote.xml +11 -0
- data/ext/parsley/test/reddit.html +1 -0
- data/ext/parsley/test/reddit.json +1 -0
- data/ext/parsley/test/reddit.let +12 -0
- data/ext/parsley/test/remote-fail.json +1 -0
- data/ext/parsley/test/remote.html +3 -0
- data/ext/parsley/test/remote.json +1 -0
- data/ext/parsley/test/remote.let +4 -0
- data/ext/parsley/test/replace.json +1 -0
- data/ext/parsley/test/replace.let +9 -0
- data/ext/parsley/test/replace.xml +11 -0
- data/ext/parsley/test/scope.html +10 -0
- data/ext/parsley/test/scope.json +1 -0
- data/ext/parsley/test/scope.let +6 -0
- data/ext/parsley/test/segfault.html +5 -0
- data/ext/parsley/test/segfault.json +1 -0
- data/ext/parsley/test/segfault.let +9 -0
- data/ext/parsley/test/sg-wrap.html +5 -0
- data/ext/parsley/test/sg-wrap.json +1 -0
- data/ext/parsley/test/sg-wrap.let +3 -0
- data/ext/parsley/test/sg_off.html +5 -0
- data/ext/parsley/test/sg_off.json +1 -0
- data/ext/parsley/test/sg_off.let +3 -0
- data/ext/parsley/test/test.json +1 -0
- data/ext/parsley/test/test.let +6 -0
- data/ext/parsley/test/test.xml +11 -0
- data/ext/parsley/test/trivial.html +2329 -0
- data/ext/parsley/test/trivial.json +1 -0
- data/ext/parsley/test/trivial.let +4 -0
- data/ext/parsley/test/trivial2.html +2329 -0
- data/ext/parsley/test/trivial2.json +1 -0
- data/ext/parsley/test/trivial2.let +7 -0
- data/ext/parsley/test/unbang.html +17 -0
- data/ext/parsley/test/unbang.json +1 -0
- data/ext/parsley/test/unbang.let +6 -0
- data/ext/parsley/test/unicode.html +3 -0
- data/ext/parsley/test/unicode.json +1 -0
- data/ext/parsley/test/unicode.let +1 -0
- data/ext/parsley/test/whitespace.html +8 -0
- data/ext/parsley/test/whitespace.json +1 -0
- data/ext/parsley/test/whitespace.let +3 -0
- data/ext/parsley/test/whitespace_regression.html +4 -0
- data/ext/parsley/test/whitespace_regression.json +1 -0
- data/ext/parsley/test/whitespace_regression.let +3 -0
- data/ext/parsley/test/yelp-benchmark.rb +53 -0
- data/ext/parsley/test/yelp-home.html +1004 -0
- data/ext/parsley/test/yelp-home.json +1 -0
- data/ext/parsley/test/yelp-home.let +6 -0
- data/ext/parsley/test/yelp.html +2329 -0
- data/ext/parsley/test/yelp.json +1 -0
- data/ext/parsley/test/yelp.let +12 -0
- data/ext/parsley/test/youtube.html +1940 -0
- data/ext/parsley/test/youtube.let +11 -0
- data/ext/parsley/util.c +237 -0
- data/ext/parsley/util.h +34 -0
- data/ext/parsley/xml2json.c +47 -0
- data/ext/parsley/xml2json.h +14 -0
- data/ext/parsley/y.tab.h +222 -0
- data/ext/parsley/ylwrap +222 -0
- data/lib/parsley.rb +84 -0
- data/test/test_parsley.rb +120 -0
- data/test/yelp-benchmark.rb +53 -0
- data/test/yelp-home.html +1004 -0
- data/test/yelp-home.let +6 -0
- data/test/yelp.html +2329 -0
- metadata +366 -0
data/ext/parsley/ylwrap
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
#! /bin/sh
|
|
2
|
+
# ylwrap - wrapper for lex/yacc invocations.
|
|
3
|
+
|
|
4
|
+
scriptversion=2009-04-28.21; # UTC
|
|
5
|
+
|
|
6
|
+
# Copyright (C) 1996, 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005,
|
|
7
|
+
# 2007, 2009 Free Software Foundation, Inc.
|
|
8
|
+
#
|
|
9
|
+
# Written by Tom Tromey <tromey@cygnus.com>.
|
|
10
|
+
#
|
|
11
|
+
# This program is free software; you can redistribute it and/or modify
|
|
12
|
+
# it under the terms of the GNU General Public License as published by
|
|
13
|
+
# the Free Software Foundation; either version 2, or (at your option)
|
|
14
|
+
# any later version.
|
|
15
|
+
#
|
|
16
|
+
# This program is distributed in the hope that it will be useful,
|
|
17
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19
|
+
# GNU General Public License for more details.
|
|
20
|
+
#
|
|
21
|
+
# You should have received a copy of the GNU General Public License
|
|
22
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
23
|
+
|
|
24
|
+
# As a special exception to the GNU General Public License, if you
|
|
25
|
+
# distribute this file as part of a program that contains a
|
|
26
|
+
# configuration script generated by Autoconf, you may include it under
|
|
27
|
+
# the same distribution terms that you use for the rest of that program.
|
|
28
|
+
|
|
29
|
+
# This file is maintained in Automake, please report
|
|
30
|
+
# bugs to <bug-automake@gnu.org> or send patches to
|
|
31
|
+
# <automake-patches@gnu.org>.
|
|
32
|
+
|
|
33
|
+
case "$1" in
|
|
34
|
+
'')
|
|
35
|
+
echo "$0: No files given. Try \`$0 --help' for more information." 1>&2
|
|
36
|
+
exit 1
|
|
37
|
+
;;
|
|
38
|
+
--basedir)
|
|
39
|
+
basedir=$2
|
|
40
|
+
shift 2
|
|
41
|
+
;;
|
|
42
|
+
-h|--h*)
|
|
43
|
+
cat <<\EOF
|
|
44
|
+
Usage: ylwrap [--help|--version] INPUT [OUTPUT DESIRED]... -- PROGRAM [ARGS]...
|
|
45
|
+
|
|
46
|
+
Wrapper for lex/yacc invocations, renaming files as desired.
|
|
47
|
+
|
|
48
|
+
INPUT is the input file
|
|
49
|
+
OUTPUT is one file PROG generates
|
|
50
|
+
DESIRED is the file we actually want instead of OUTPUT
|
|
51
|
+
PROGRAM is program to run
|
|
52
|
+
ARGS are passed to PROG
|
|
53
|
+
|
|
54
|
+
Any number of OUTPUT,DESIRED pairs may be used.
|
|
55
|
+
|
|
56
|
+
Report bugs to <bug-automake@gnu.org>.
|
|
57
|
+
EOF
|
|
58
|
+
exit $?
|
|
59
|
+
;;
|
|
60
|
+
-v|--v*)
|
|
61
|
+
echo "ylwrap $scriptversion"
|
|
62
|
+
exit $?
|
|
63
|
+
;;
|
|
64
|
+
esac
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# The input.
|
|
68
|
+
input="$1"
|
|
69
|
+
shift
|
|
70
|
+
case "$input" in
|
|
71
|
+
[\\/]* | ?:[\\/]*)
|
|
72
|
+
# Absolute path; do nothing.
|
|
73
|
+
;;
|
|
74
|
+
*)
|
|
75
|
+
# Relative path. Make it absolute.
|
|
76
|
+
input="`pwd`/$input"
|
|
77
|
+
;;
|
|
78
|
+
esac
|
|
79
|
+
|
|
80
|
+
pairlist=
|
|
81
|
+
while test "$#" -ne 0; do
|
|
82
|
+
if test "$1" = "--"; then
|
|
83
|
+
shift
|
|
84
|
+
break
|
|
85
|
+
fi
|
|
86
|
+
pairlist="$pairlist $1"
|
|
87
|
+
shift
|
|
88
|
+
done
|
|
89
|
+
|
|
90
|
+
# The program to run.
|
|
91
|
+
prog="$1"
|
|
92
|
+
shift
|
|
93
|
+
# Make any relative path in $prog absolute.
|
|
94
|
+
case "$prog" in
|
|
95
|
+
[\\/]* | ?:[\\/]*) ;;
|
|
96
|
+
*[\\/]*) prog="`pwd`/$prog" ;;
|
|
97
|
+
esac
|
|
98
|
+
|
|
99
|
+
# FIXME: add hostname here for parallel makes that run commands on
|
|
100
|
+
# other machines. But that might take us over the 14-char limit.
|
|
101
|
+
dirname=ylwrap$$
|
|
102
|
+
trap "cd '`pwd`'; rm -rf $dirname > /dev/null 2>&1" 1 2 3 15
|
|
103
|
+
mkdir $dirname || exit 1
|
|
104
|
+
|
|
105
|
+
cd $dirname
|
|
106
|
+
|
|
107
|
+
case $# in
|
|
108
|
+
0) "$prog" "$input" ;;
|
|
109
|
+
*) "$prog" "$@" "$input" ;;
|
|
110
|
+
esac
|
|
111
|
+
ret=$?
|
|
112
|
+
|
|
113
|
+
if test $ret -eq 0; then
|
|
114
|
+
set X $pairlist
|
|
115
|
+
shift
|
|
116
|
+
first=yes
|
|
117
|
+
# Since DOS filename conventions don't allow two dots,
|
|
118
|
+
# the DOS version of Bison writes out y_tab.c instead of y.tab.c
|
|
119
|
+
# and y_tab.h instead of y.tab.h. Test to see if this is the case.
|
|
120
|
+
y_tab_nodot="no"
|
|
121
|
+
if test -f y_tab.c || test -f y_tab.h; then
|
|
122
|
+
y_tab_nodot="yes"
|
|
123
|
+
fi
|
|
124
|
+
|
|
125
|
+
# The directory holding the input.
|
|
126
|
+
input_dir=`echo "$input" | sed -e 's,\([\\/]\)[^\\/]*$,\1,'`
|
|
127
|
+
# Quote $INPUT_DIR so we can use it in a regexp.
|
|
128
|
+
# FIXME: really we should care about more than `.' and `\'.
|
|
129
|
+
input_rx=`echo "$input_dir" | sed 's,\\\\,\\\\\\\\,g;s,\\.,\\\\.,g'`
|
|
130
|
+
|
|
131
|
+
while test "$#" -ne 0; do
|
|
132
|
+
from="$1"
|
|
133
|
+
# Handle y_tab.c and y_tab.h output by DOS
|
|
134
|
+
if test $y_tab_nodot = "yes"; then
|
|
135
|
+
if test $from = "y.tab.c"; then
|
|
136
|
+
from="y_tab.c"
|
|
137
|
+
else
|
|
138
|
+
if test $from = "y.tab.h"; then
|
|
139
|
+
from="y_tab.h"
|
|
140
|
+
fi
|
|
141
|
+
fi
|
|
142
|
+
fi
|
|
143
|
+
if test -f "$from"; then
|
|
144
|
+
# If $2 is an absolute path name, then just use that,
|
|
145
|
+
# otherwise prepend `../'.
|
|
146
|
+
case "$2" in
|
|
147
|
+
[\\/]* | ?:[\\/]*) target="$2";;
|
|
148
|
+
*) target="../$2";;
|
|
149
|
+
esac
|
|
150
|
+
|
|
151
|
+
# We do not want to overwrite a header file if it hasn't
|
|
152
|
+
# changed. This avoid useless recompilations. However the
|
|
153
|
+
# parser itself (the first file) should always be updated,
|
|
154
|
+
# because it is the destination of the .y.c rule in the
|
|
155
|
+
# Makefile. Divert the output of all other files to a temporary
|
|
156
|
+
# file so we can compare them to existing versions.
|
|
157
|
+
if test $first = no; then
|
|
158
|
+
realtarget="$target"
|
|
159
|
+
target="tmp-`echo $target | sed s/.*[\\/]//g`"
|
|
160
|
+
fi
|
|
161
|
+
# Edit out `#line' or `#' directives.
|
|
162
|
+
#
|
|
163
|
+
# We don't want the resulting debug information to point at
|
|
164
|
+
# an absolute srcdir; it is better for it to just mention the
|
|
165
|
+
# .y file with no path.
|
|
166
|
+
#
|
|
167
|
+
# We want to use the real output file name, not yy.lex.c for
|
|
168
|
+
# instance.
|
|
169
|
+
#
|
|
170
|
+
# We want the include guards to be adjusted too.
|
|
171
|
+
FROM=`echo "$from" | sed \
|
|
172
|
+
-e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'\
|
|
173
|
+
-e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`
|
|
174
|
+
TARGET=`echo "$2" | sed \
|
|
175
|
+
-e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'\
|
|
176
|
+
-e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`
|
|
177
|
+
|
|
178
|
+
sed -e "/^#/!b" -e "s,$input_rx,," -e "s,$from,$2," \
|
|
179
|
+
-e "s,$FROM,$TARGET," "$from" >"$target" || ret=$?
|
|
180
|
+
|
|
181
|
+
# Check whether header files must be updated.
|
|
182
|
+
if test $first = no; then
|
|
183
|
+
if test -f "$realtarget" && cmp -s "$realtarget" "$target"; then
|
|
184
|
+
echo "$2" is unchanged
|
|
185
|
+
rm -f "$target"
|
|
186
|
+
else
|
|
187
|
+
echo updating "$2"
|
|
188
|
+
mv -f "$target" "$realtarget"
|
|
189
|
+
fi
|
|
190
|
+
fi
|
|
191
|
+
else
|
|
192
|
+
# A missing file is only an error for the first file. This
|
|
193
|
+
# is a blatant hack to let us support using "yacc -d". If -d
|
|
194
|
+
# is not specified, we don't want an error when the header
|
|
195
|
+
# file is "missing".
|
|
196
|
+
if test $first = yes; then
|
|
197
|
+
ret=1
|
|
198
|
+
fi
|
|
199
|
+
fi
|
|
200
|
+
shift
|
|
201
|
+
shift
|
|
202
|
+
first=no
|
|
203
|
+
done
|
|
204
|
+
else
|
|
205
|
+
ret=$?
|
|
206
|
+
fi
|
|
207
|
+
|
|
208
|
+
# Remove the directory.
|
|
209
|
+
cd ..
|
|
210
|
+
rm -rf $dirname
|
|
211
|
+
|
|
212
|
+
exit $ret
|
|
213
|
+
|
|
214
|
+
# Local Variables:
|
|
215
|
+
# mode: shell-script
|
|
216
|
+
# sh-indentation: 2
|
|
217
|
+
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
|
218
|
+
# time-stamp-start: "scriptversion="
|
|
219
|
+
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
|
220
|
+
# time-stamp-time-zone: "UTC"
|
|
221
|
+
# time-stamp-end: "; # UTC"
|
|
222
|
+
# End:
|
data/lib/parsley.rb
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/../ext/cparsley")
|
|
2
|
+
require "rubygems"
|
|
3
|
+
require "json"
|
|
4
|
+
require "thread"
|
|
5
|
+
|
|
6
|
+
class Parsley
|
|
7
|
+
|
|
8
|
+
def self.user_agent=(agent)
|
|
9
|
+
@user_agent = agent
|
|
10
|
+
CParsley.set_user_agent(agent.to_s)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.user_agent
|
|
14
|
+
@user_agent
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def initialize(parsley, incl = "")
|
|
18
|
+
if(parsley.is_a?(Hash))
|
|
19
|
+
parsley = recursive_stringify(parsley).to_json
|
|
20
|
+
end
|
|
21
|
+
@@mutex ||= Mutex.new
|
|
22
|
+
@@mutex.synchronize do
|
|
23
|
+
@parsley = CParsley.new(parsley, incl)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Valid options:
|
|
28
|
+
#
|
|
29
|
+
# Requires one of:
|
|
30
|
+
# :file -- the input file path or url
|
|
31
|
+
# :string -- the input string
|
|
32
|
+
#
|
|
33
|
+
# And optionally (default is the first listed value):
|
|
34
|
+
# :input => [:html, :xml]
|
|
35
|
+
# :output => [:ruby, :json, :xml]
|
|
36
|
+
# :prune => [true, false]
|
|
37
|
+
# :sgwrap => [false, true]
|
|
38
|
+
# :collate => [true, false]
|
|
39
|
+
# :base => "http://some/base/href"
|
|
40
|
+
# :allow_net => [true, false]
|
|
41
|
+
# :allow_local => [true, false]
|
|
42
|
+
def parse(options = {})
|
|
43
|
+
options[:file] || options[:string] || (raise ParsleyError.new("must specify what to parse"))
|
|
44
|
+
|
|
45
|
+
options[:sgwrap] = !!options[:sgwrap]
|
|
46
|
+
options[:is_file] = !!options[:file]
|
|
47
|
+
options[:has_base] = !!options[:base]
|
|
48
|
+
|
|
49
|
+
options[:base] = options[:base].to_s
|
|
50
|
+
options[:file] = options[:file].to_s
|
|
51
|
+
options[:string] = options[:string].to_s
|
|
52
|
+
|
|
53
|
+
options[:input] ||= :html
|
|
54
|
+
options[:output] ||= :ruby
|
|
55
|
+
|
|
56
|
+
options[:collate] = true unless options.has_key?(:collate)
|
|
57
|
+
options[:prune] = true unless options.has_key?(:prune)
|
|
58
|
+
options[:allow_net] = true unless options.has_key?(:allow_net)
|
|
59
|
+
options[:allow_local] = true unless options.has_key?(:allow_local)
|
|
60
|
+
|
|
61
|
+
options[:collate] = !!options[:collate]
|
|
62
|
+
options[:prune] = !!options[:prune]
|
|
63
|
+
options[:allow_net] = !!options[:allow_net]
|
|
64
|
+
options[:allow_local] = !!options[:allow_local]
|
|
65
|
+
|
|
66
|
+
@parsley.parse(options)
|
|
67
|
+
end
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def recursive_stringify(obj)
|
|
71
|
+
case obj
|
|
72
|
+
when Hash
|
|
73
|
+
obj.inject({}) do |memo, (k, v)|
|
|
74
|
+
memo[k.to_s] = recursive_stringify(v)
|
|
75
|
+
memo
|
|
76
|
+
end
|
|
77
|
+
when Array
|
|
78
|
+
obj.map{|e| recursive_stringify(e) }
|
|
79
|
+
else
|
|
80
|
+
obj.to_s
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
#encoding: UTF-8
|
|
2
|
+
require "test/unit"
|
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + "/../lib/parsley")
|
|
4
|
+
|
|
5
|
+
class TestParsley < Test::Unit::TestCase
|
|
6
|
+
def setup
|
|
7
|
+
@page = File.expand_path(File.dirname(__FILE__) + "/yelp.html")
|
|
8
|
+
@home = File.expand_path(File.dirname(__FILE__) + "/yelp-home.html")
|
|
9
|
+
@let = File.expand_path(File.dirname(__FILE__) + "/yelp-home.let")
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def test_segfault_regression
|
|
13
|
+
simple_html = <<-HTML
|
|
14
|
+
<html>
|
|
15
|
+
<body>
|
|
16
|
+
<h1 class="iCIMS_Header_JobTitle">CEO</h1>
|
|
17
|
+
<h2 class="temperature">28ºF</h2>
|
|
18
|
+
</body>
|
|
19
|
+
</html>
|
|
20
|
+
HTML
|
|
21
|
+
|
|
22
|
+
struct = {
|
|
23
|
+
'jobs' => [{
|
|
24
|
+
'title' => ".iCIMS_Header_JobTitle",
|
|
25
|
+
'temperature' => ".temperature",
|
|
26
|
+
'description?' => "blah",
|
|
27
|
+
'location?' => "blah",
|
|
28
|
+
'experience?' => "blah",
|
|
29
|
+
'education?' => "blah"
|
|
30
|
+
}]
|
|
31
|
+
}
|
|
32
|
+
parselet = Parsley.new(struct)
|
|
33
|
+
result = parselet.parse(:string => simple_html)
|
|
34
|
+
assert_equal "CEO", result['jobs'].first['title']
|
|
35
|
+
assert_equal "28ºF", result['jobs'].first['temperature']
|
|
36
|
+
assert result['jobs'].first['description'].nil?
|
|
37
|
+
end
|
|
38
|
+
#
|
|
39
|
+
# def test_yelp
|
|
40
|
+
# @parsley = Parsley.new(File.read(@let))
|
|
41
|
+
# out = @parsley.parse(:file => @home)
|
|
42
|
+
# assert_equal "/c/sf/shopping", out["categories"][0]["href"]
|
|
43
|
+
# end
|
|
44
|
+
#
|
|
45
|
+
# def test_parsley_should_raise_if_value_syntax_error
|
|
46
|
+
# assert_raises(ParsleyError) do
|
|
47
|
+
# Parsley.new({"foo" => nil})
|
|
48
|
+
# end
|
|
49
|
+
#
|
|
50
|
+
# assert_raises(ParsleyError) do
|
|
51
|
+
# Parsley.new({"foo" => ""})
|
|
52
|
+
# end
|
|
53
|
+
#
|
|
54
|
+
# assert_raises(ParsleyError) do
|
|
55
|
+
# Parsley.new({"foo" => "<<<<<<<<<<<"})
|
|
56
|
+
# end
|
|
57
|
+
# end
|
|
58
|
+
#
|
|
59
|
+
# def test_yelp_xml
|
|
60
|
+
# @parsley = Parsley.new(File.read(@let))
|
|
61
|
+
# out = @parsley.parse(:file => @home, :output => :xml)
|
|
62
|
+
# end
|
|
63
|
+
#
|
|
64
|
+
# def test_broken
|
|
65
|
+
# @parsley = Parsley.new("hi" => "no-ns:match(h1)")
|
|
66
|
+
# assert_raises(ParsleyError) {
|
|
67
|
+
# @parsley.parse(:file => @page)
|
|
68
|
+
# }
|
|
69
|
+
# end
|
|
70
|
+
#
|
|
71
|
+
# def test_simple
|
|
72
|
+
# @parsley = Parsley.new("hi" => "h1")
|
|
73
|
+
# assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @page))
|
|
74
|
+
# end
|
|
75
|
+
#
|
|
76
|
+
# def test_simple_string
|
|
77
|
+
# @parsley = Parsley.new("hi" => "h1")
|
|
78
|
+
# assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:string => "<html><body><h1>Nick's Crispy Tacos</h1></body></html>"))
|
|
79
|
+
# end
|
|
80
|
+
#
|
|
81
|
+
# def test_xml
|
|
82
|
+
# @parsley = Parsley.new("hi" => "h1")
|
|
83
|
+
# xml = "<?xml version=\"1.0\"?>\n<parsley:root xmlns:parsley=\"http://parselets.com/json\"><hi position=\"63\">Nick's Crispy Tacos</hi></parsley:root>\n"
|
|
84
|
+
# assert_equal(xml, @parsley.parse(:file => @page, :output => :xml))
|
|
85
|
+
# end
|
|
86
|
+
#
|
|
87
|
+
# def test_sgwrap
|
|
88
|
+
# @parsley = Parsley.new("hi" => "p sg_wrap")
|
|
89
|
+
# html = "<p><b>hi</b>world</p>"
|
|
90
|
+
# assert_equal({"hi" => "world"}, @parsley.parse(:string => html, :sgwrap => true))
|
|
91
|
+
# end
|
|
92
|
+
#
|
|
93
|
+
# def test_sgwrap_off
|
|
94
|
+
# @parsley = Parsley.new("hi" => "p sg_wrap")
|
|
95
|
+
# html = "<p><b>hi</b>world</p>"
|
|
96
|
+
# assert_raises(ParsleyError) do
|
|
97
|
+
# @parsley.parse(:string => html, :sgwrap => false)
|
|
98
|
+
# end
|
|
99
|
+
# end
|
|
100
|
+
#
|
|
101
|
+
#
|
|
102
|
+
# def test_json
|
|
103
|
+
# @parsley = Parsley.new("hi" => "h1")
|
|
104
|
+
# assert_equal('{ "hi": "Nick\'s Crispy Tacos" }', @parsley.parse(:file => @page, :output => :json))
|
|
105
|
+
# end
|
|
106
|
+
#
|
|
107
|
+
# def test_rescuable_file_error
|
|
108
|
+
# @parsley = Parsley.new("hi" => "h1")
|
|
109
|
+
# @nonexistant_file = File.dirname(__FILE__) + "/../fixtures/yelp.html"
|
|
110
|
+
# assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @nonexistant_file)) rescue nil
|
|
111
|
+
# end
|
|
112
|
+
#
|
|
113
|
+
# def test_array_string
|
|
114
|
+
# @parsley = Parsley.new({"foo" => ["li"]})
|
|
115
|
+
# out = @parsley.parse(:file => @page)
|
|
116
|
+
# assert_kind_of Hash, out
|
|
117
|
+
# assert_kind_of Array, out["foo"], out.inspect
|
|
118
|
+
# assert out["foo"].length > 1
|
|
119
|
+
# end
|
|
120
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
require "rubygems"
|
|
2
|
+
require "nokogiri"
|
|
3
|
+
require "hpricot"
|
|
4
|
+
require "parsley"
|
|
5
|
+
require "benchmark"
|
|
6
|
+
require "pp"
|
|
7
|
+
|
|
8
|
+
YELP_HTML = File.dirname(__FILE__) + "/yelp.html"
|
|
9
|
+
|
|
10
|
+
def noko
|
|
11
|
+
parse Nokogiri.Hpricot(File.open(YELP_HTML))
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def hpri
|
|
15
|
+
parse Hpricot(File.open(YELP_HTML))
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def parse(doc)
|
|
19
|
+
out = {}
|
|
20
|
+
out["name"] = (doc / "h1").first.inner_text
|
|
21
|
+
out["phone"] = (doc / "#bizPhone").first.inner_text
|
|
22
|
+
out["address"] = (doc / "address").first.inner_text
|
|
23
|
+
out["reviews"] = (doc / ".nonfavoriteReview").map do |node|
|
|
24
|
+
review = {}
|
|
25
|
+
review["date"] = (node / ".ieSucks .smaller").first.inner_text
|
|
26
|
+
review["user_name"] = (node / ".reviewer_info a").first.inner_text
|
|
27
|
+
review["comment"] = (node / ".review_comment").first.inner_text
|
|
28
|
+
review
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def pars
|
|
33
|
+
parslet = Parsley.new({
|
|
34
|
+
"name" => "h1",
|
|
35
|
+
"phone" => "#bizPhone",
|
|
36
|
+
"address" => "address",
|
|
37
|
+
"reviews(.nonfavoriteReview)" => [
|
|
38
|
+
{
|
|
39
|
+
"date" => ".ieSucks .smaller",
|
|
40
|
+
"user_name" => ".reviewer_info a",
|
|
41
|
+
"comment" => ".review_comment"
|
|
42
|
+
}
|
|
43
|
+
]
|
|
44
|
+
})
|
|
45
|
+
pp parslet.parse(:file => YELP_HTML)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
Benchmark.bm do |x|
|
|
49
|
+
x.report("nokogiri: ") { 3.times { noko } }
|
|
50
|
+
x.report("hpricot: ") { 3.times { hpri } }
|
|
51
|
+
x.report("parsley: ") { 3.times { pars } }
|
|
52
|
+
end
|
|
53
|
+
|