gtl-parsley-ruby 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/README +32 -0
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/ext/cparsley.c +152 -0
- data/ext/extconf.rb +82 -0
- data/ext/parsley/.gitignore +32 -0
- data/ext/parsley/AUTHORS +1 -0
- data/ext/parsley/ChangeLog +0 -0
- data/ext/parsley/HACKING +4 -0
- data/ext/parsley/INSTALL +73 -0
- data/ext/parsley/INTRO +84 -0
- data/ext/parsley/Makefile.am +80 -0
- data/ext/parsley/Makefile.in +1009 -0
- data/ext/parsley/NEWS +0 -0
- data/ext/parsley/PAPER +36 -0
- data/ext/parsley/Portfile +18 -0
- data/ext/parsley/Portfile.in +17 -0
- data/ext/parsley/README.C-LANG +92 -0
- data/ext/parsley/README.markdown +1 -0
- data/ext/parsley/TODO +39 -0
- data/ext/parsley/VERSION +1 -0
- data/ext/parsley/aclocal.m4 +8918 -0
- data/ext/parsley/bootstrap.sh +6 -0
- data/ext/parsley/config.guess +1561 -0
- data/ext/parsley/config.sub +1686 -0
- data/ext/parsley/configure +13437 -0
- data/ext/parsley/configure.ac +46 -0
- data/ext/parsley/depcomp +630 -0
- data/ext/parsley/functions.c +368 -0
- data/ext/parsley/functions.h +19 -0
- data/ext/parsley/generate_bisect.sh +12 -0
- data/ext/parsley/hooks/prepare-commit-msg +16 -0
- data/ext/parsley/install-sh +520 -0
- data/ext/parsley/json-c-0.9/AUTHORS +2 -0
- data/ext/parsley/json-c-0.9/COPYING +19 -0
- data/ext/parsley/json-c-0.9/ChangeLog +103 -0
- data/ext/parsley/json-c-0.9/INSTALL +302 -0
- data/ext/parsley/json-c-0.9/Makefile.am +43 -0
- data/ext/parsley/json-c-0.9/Makefile.in +800 -0
- data/ext/parsley/json-c-0.9/NEWS +1 -0
- data/ext/parsley/json-c-0.9/README +20 -0
- data/ext/parsley/json-c-0.9/README-WIN32.html +57 -0
- data/ext/parsley/json-c-0.9/README.html +32 -0
- data/ext/parsley/json-c-0.9/aclocal.m4 +8909 -0
- data/ext/parsley/json-c-0.9/arraylist.c +94 -0
- data/ext/parsley/json-c-0.9/arraylist.h +53 -0
- data/ext/parsley/json-c-0.9/bits.h +27 -0
- data/ext/parsley/json-c-0.9/config.guess +1561 -0
- data/ext/parsley/json-c-0.9/config.h +125 -0
- data/ext/parsley/json-c-0.9/config.h.in +124 -0
- data/ext/parsley/json-c-0.9/config.h.win32 +94 -0
- data/ext/parsley/json-c-0.9/config.sub +1686 -0
- data/ext/parsley/json-c-0.9/configure +13084 -0
- data/ext/parsley/json-c-0.9/configure.in +33 -0
- data/ext/parsley/json-c-0.9/debug.c +98 -0
- data/ext/parsley/json-c-0.9/debug.h +50 -0
- data/ext/parsley/json-c-0.9/depcomp +630 -0
- data/ext/parsley/json-c-0.9/doc/html/annotated.html +40 -0
- data/ext/parsley/json-c-0.9/doc/html/arraylist_8h.html +240 -0
- data/ext/parsley/json-c-0.9/doc/html/bits_8h.html +150 -0
- data/ext/parsley/json-c-0.9/doc/html/classes.html +36 -0
- data/ext/parsley/json-c-0.9/doc/html/config_8h.html +612 -0
- data/ext/parsley/json-c-0.9/doc/html/debug_8h.html +392 -0
- data/ext/parsley/json-c-0.9/doc/html/doxygen.css +441 -0
- data/ext/parsley/json-c-0.9/doc/html/doxygen.png +0 -0
- data/ext/parsley/json-c-0.9/doc/html/files.html +42 -0
- data/ext/parsley/json-c-0.9/doc/html/functions.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/functions_vars.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/globals.html +459 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_defs.html +202 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_enum.html +50 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_eval.html +135 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_func.html +194 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_type.html +70 -0
- data/ext/parsley/json-c-0.9/doc/html/globals_vars.html +50 -0
- data/ext/parsley/json-c-0.9/doc/html/index.html +25 -0
- data/ext/parsley/json-c-0.9/doc/html/json_8h.html +32 -0
- data/ext/parsley/json-c-0.9/doc/html/json__object_8h.html +1150 -0
- data/ext/parsley/json-c-0.9/doc/html/json__object__private_8h.html +75 -0
- data/ext/parsley/json-c-0.9/doc/html/json__tokener_8h.html +366 -0
- data/ext/parsley/json-c-0.9/doc/html/json__util_8h.html +106 -0
- data/ext/parsley/json-c-0.9/doc/html/linkhash_8h.html +740 -0
- data/ext/parsley/json-c-0.9/doc/html/printbuf_8h.html +214 -0
- data/ext/parsley/json-c-0.9/doc/html/structarray__list.html +104 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__object.html +141 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__object__iter.html +87 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__tokener.html +206 -0
- data/ext/parsley/json-c-0.9/doc/html/structjson__tokener__srec.html +104 -0
- data/ext/parsley/json-c-0.9/doc/html/structlh__entry.html +105 -0
- data/ext/parsley/json-c-0.9/doc/html/structlh__table.html +275 -0
- data/ext/parsley/json-c-0.9/doc/html/structprintbuf.html +87 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_b.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_l.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tab_r.gif +0 -0
- data/ext/parsley/json-c-0.9/doc/html/tabs.css +105 -0
- data/ext/parsley/json-c-0.9/doc/html/unionjson__object_1_1data.html +140 -0
- data/ext/parsley/json-c-0.9/install-sh +520 -0
- data/ext/parsley/json-c-0.9/json.h +31 -0
- data/ext/parsley/json-c-0.9/json.pc +11 -0
- data/ext/parsley/json-c-0.9/json.pc.in +11 -0
- data/ext/parsley/json-c-0.9/json_object.c +512 -0
- data/ext/parsley/json-c-0.9/json_object.h +319 -0
- data/ext/parsley/json-c-0.9/json_object_private.h +52 -0
- data/ext/parsley/json-c-0.9/json_tokener.c +628 -0
- data/ext/parsley/json-c-0.9/json_tokener.h +98 -0
- data/ext/parsley/json-c-0.9/json_util.c +122 -0
- data/ext/parsley/json-c-0.9/json_util.h +31 -0
- data/ext/parsley/json-c-0.9/libjson.la +41 -0
- data/ext/parsley/json-c-0.9/libtool +8890 -0
- data/ext/parsley/json-c-0.9/linkhash.c +216 -0
- data/ext/parsley/json-c-0.9/linkhash.h +272 -0
- data/ext/parsley/json-c-0.9/ltmain.sh +8406 -0
- data/ext/parsley/json-c-0.9/missing +376 -0
- data/ext/parsley/json-c-0.9/printbuf.c +149 -0
- data/ext/parsley/json-c-0.9/printbuf.h +64 -0
- data/ext/parsley/json-c-0.9/stamp-h1 +1 -0
- data/ext/parsley/json-c-0.9/test1 +130 -0
- data/ext/parsley/json-c-0.9/test1.c +164 -0
- data/ext/parsley/json-c-0.9/test2 +130 -0
- data/ext/parsley/json-c-0.9/test2.c +20 -0
- data/ext/parsley/json-c-0.9/test3 +130 -0
- data/ext/parsley/json-c-0.9/test3.c +23 -0
- data/ext/parsley/libtool +8890 -0
- data/ext/parsley/ltmain.sh +8406 -0
- data/ext/parsley/missing +376 -0
- data/ext/parsley/parsed_xpath.c +168 -0
- data/ext/parsley/parsed_xpath.h +34 -0
- data/ext/parsley/parser.y +631 -0
- data/ext/parsley/parsley.c +793 -0
- data/ext/parsley/parsley.h +87 -0
- data/ext/parsley/parsley_main.c +185 -0
- data/ext/parsley/parsleyc_main.c +108 -0
- data/ext/parsley/regexp.c +359 -0
- data/ext/parsley/regexp.h +36 -0
- data/ext/parsley/scanner.l +221 -0
- data/ext/parsley/test/ambiguous.html +207 -0
- data/ext/parsley/test/ambiguous.json +1 -0
- data/ext/parsley/test/ambiguous.let +6 -0
- data/ext/parsley/test/array-regression.html +5 -0
- data/ext/parsley/test/array-regression.json +1 -0
- data/ext/parsley/test/array-regression.let +10 -0
- data/ext/parsley/test/backslash.html +5 -0
- data/ext/parsley/test/backslash.json +1 -0
- data/ext/parsley/test/backslash.let +3 -0
- data/ext/parsley/test/bang.html +17 -0
- data/ext/parsley/test/bang.json +1 -0
- data/ext/parsley/test/bang.let +6 -0
- data/ext/parsley/test/collate_regression.html +324 -0
- data/ext/parsley/test/collate_regression.json +1 -0
- data/ext/parsley/test/collate_regression.let +9 -0
- data/ext/parsley/test/contains.html +3 -0
- data/ext/parsley/test/contains.json +1 -0
- data/ext/parsley/test/contains.let +3 -0
- data/ext/parsley/test/content.html +13 -0
- data/ext/parsley/test/content.json +1 -0
- data/ext/parsley/test/content.let +7 -0
- data/ext/parsley/test/cool.html +575 -0
- data/ext/parsley/test/cool.json +1 -0
- data/ext/parsley/test/cool.let +9 -0
- data/ext/parsley/test/craigs-simple.html +207 -0
- data/ext/parsley/test/craigs-simple.json +1 -0
- data/ext/parsley/test/craigs-simple.let +6 -0
- data/ext/parsley/test/craigs.html +207 -0
- data/ext/parsley/test/craigs.json +1 -0
- data/ext/parsley/test/craigs.let +9 -0
- data/ext/parsley/test/crash.html +157 -0
- data/ext/parsley/test/crash.json +1 -0
- data/ext/parsley/test/crash.let +1 -0
- data/ext/parsley/test/css_attr.html +3 -0
- data/ext/parsley/test/css_attr.json +1 -0
- data/ext/parsley/test/css_attr.let +3 -0
- data/ext/parsley/test/default-namespace.json +1 -0
- data/ext/parsley/test/default-namespace.let +3 -0
- data/ext/parsley/test/default-namespace.xml +1493 -0
- data/ext/parsley/test/div.html +8 -0
- data/ext/parsley/test/div.json +1 -0
- data/ext/parsley/test/div.let +10 -0
- data/ext/parsley/test/empty.html +3 -0
- data/ext/parsley/test/empty.json +1 -0
- data/ext/parsley/test/empty.let +1 -0
- data/ext/parsley/test/emptyish.html +207 -0
- data/ext/parsley/test/emptyish.let +3 -0
- data/ext/parsley/test/fictional-opt.html +43 -0
- data/ext/parsley/test/fictional-opt.json +1 -0
- data/ext/parsley/test/fictional-opt.let +14 -0
- data/ext/parsley/test/fictional.html +43 -0
- data/ext/parsley/test/fictional.json +1 -0
- data/ext/parsley/test/fictional.let +14 -0
- data/ext/parsley/test/function-magic.html +9 -0
- data/ext/parsley/test/function-magic.json +1 -0
- data/ext/parsley/test/function-magic.let +8 -0
- data/ext/parsley/test/hn.html +32 -0
- data/ext/parsley/test/hn.json +1 -0
- data/ext/parsley/test/hn.let +8 -0
- data/ext/parsley/test/malformed-array.html +2329 -0
- data/ext/parsley/test/malformed-array.json +1 -0
- data/ext/parsley/test/malformed-array.let +22 -0
- data/ext/parsley/test/malformed-expr.html +2329 -0
- data/ext/parsley/test/malformed-expr.json +1 -0
- data/ext/parsley/test/malformed-expr.let +16 -0
- data/ext/parsley/test/malformed-function.html +845 -0
- data/ext/parsley/test/malformed-function.json +197 -0
- data/ext/parsley/test/malformed-function.let +8 -0
- data/ext/parsley/test/malformed-json.html +2329 -0
- data/ext/parsley/test/malformed-json.json +1 -0
- data/ext/parsley/test/malformed-json.let +6 -0
- data/ext/parsley/test/malformed-xpath.html +8 -0
- data/ext/parsley/test/malformed-xpath.json +1 -0
- data/ext/parsley/test/malformed-xpath.let +7 -0
- data/ext/parsley/test/match.json +1 -0
- data/ext/parsley/test/match.let +9 -0
- data/ext/parsley/test/match.xml +11 -0
- data/ext/parsley/test/math_ambiguity.html +9 -0
- data/ext/parsley/test/math_ambiguity.json +1 -0
- data/ext/parsley/test/math_ambiguity.let +5 -0
- data/ext/parsley/test/nth-regression.html +13 -0
- data/ext/parsley/test/nth-regression.json +1 -0
- data/ext/parsley/test/nth-regression.let +3 -0
- data/ext/parsley/test/optional.html +2328 -0
- data/ext/parsley/test/optional.json +1 -0
- data/ext/parsley/test/optional.let +8 -0
- data/ext/parsley/test/outer-xml.html +6 -0
- data/ext/parsley/test/outer-xml.json +1 -0
- data/ext/parsley/test/outer-xml.let +5 -0
- data/ext/parsley/test/position.html +8 -0
- data/ext/parsley/test/position.json +1 -0
- data/ext/parsley/test/position.let +6 -0
- data/ext/parsley/test/question_regressions.html +443 -0
- data/ext/parsley/test/question_regressions.json +1 -0
- data/ext/parsley/test/question_regressions.let +6 -0
- data/ext/parsley/test/quote.json +1 -0
- data/ext/parsley/test/quote.let +8 -0
- data/ext/parsley/test/quote.xml +11 -0
- data/ext/parsley/test/reddit.html +1 -0
- data/ext/parsley/test/reddit.json +1 -0
- data/ext/parsley/test/reddit.let +12 -0
- data/ext/parsley/test/remote-fail.json +1 -0
- data/ext/parsley/test/remote.html +3 -0
- data/ext/parsley/test/remote.json +1 -0
- data/ext/parsley/test/remote.let +4 -0
- data/ext/parsley/test/replace.json +1 -0
- data/ext/parsley/test/replace.let +9 -0
- data/ext/parsley/test/replace.xml +11 -0
- data/ext/parsley/test/scope.html +10 -0
- data/ext/parsley/test/scope.json +1 -0
- data/ext/parsley/test/scope.let +6 -0
- data/ext/parsley/test/segfault.html +5 -0
- data/ext/parsley/test/segfault.json +1 -0
- data/ext/parsley/test/segfault.let +9 -0
- data/ext/parsley/test/sg-wrap.html +5 -0
- data/ext/parsley/test/sg-wrap.json +1 -0
- data/ext/parsley/test/sg-wrap.let +3 -0
- data/ext/parsley/test/sg_off.html +5 -0
- data/ext/parsley/test/sg_off.json +1 -0
- data/ext/parsley/test/sg_off.let +3 -0
- data/ext/parsley/test/test.json +1 -0
- data/ext/parsley/test/test.let +6 -0
- data/ext/parsley/test/test.xml +11 -0
- data/ext/parsley/test/trivial.html +2329 -0
- data/ext/parsley/test/trivial.json +1 -0
- data/ext/parsley/test/trivial.let +4 -0
- data/ext/parsley/test/trivial2.html +2329 -0
- data/ext/parsley/test/trivial2.json +1 -0
- data/ext/parsley/test/trivial2.let +7 -0
- data/ext/parsley/test/unbang.html +17 -0
- data/ext/parsley/test/unbang.json +1 -0
- data/ext/parsley/test/unbang.let +6 -0
- data/ext/parsley/test/unicode.html +3 -0
- data/ext/parsley/test/unicode.json +1 -0
- data/ext/parsley/test/unicode.let +1 -0
- data/ext/parsley/test/whitespace.html +8 -0
- data/ext/parsley/test/whitespace.json +1 -0
- data/ext/parsley/test/whitespace.let +3 -0
- data/ext/parsley/test/whitespace_regression.html +4 -0
- data/ext/parsley/test/whitespace_regression.json +1 -0
- data/ext/parsley/test/whitespace_regression.let +3 -0
- data/ext/parsley/test/yelp-benchmark.rb +53 -0
- data/ext/parsley/test/yelp-home.html +1004 -0
- data/ext/parsley/test/yelp-home.json +1 -0
- data/ext/parsley/test/yelp-home.let +6 -0
- data/ext/parsley/test/yelp.html +2329 -0
- data/ext/parsley/test/yelp.json +1 -0
- data/ext/parsley/test/yelp.let +12 -0
- data/ext/parsley/test/youtube.html +1940 -0
- data/ext/parsley/test/youtube.let +11 -0
- data/ext/parsley/util.c +237 -0
- data/ext/parsley/util.h +34 -0
- data/ext/parsley/xml2json.c +47 -0
- data/ext/parsley/xml2json.h +14 -0
- data/ext/parsley/y.tab.h +222 -0
- data/ext/parsley/ylwrap +222 -0
- data/lib/parsley.rb +84 -0
- data/test/test_parsley.rb +120 -0
- data/test/yelp-benchmark.rb +53 -0
- data/test/yelp-home.html +1004 -0
- data/test/yelp-home.let +6 -0
- data/test/yelp.html +2329 -0
- metadata +366 -0
data/CHANGELOG
ADDED
data/README
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
ABOUT
|
2
|
+
|
3
|
+
Ruby bindings for Parsley.
|
4
|
+
|
5
|
+
INSTALLATION
|
6
|
+
|
7
|
+
= Get Parsley and Dependancies =
|
8
|
+
|
9
|
+
Download Parsley from http://github.com/fizx/parsley/tree/master following the installation directions located at http://github.com/fizx/parsley/blob/master/INSTALL
|
10
|
+
|
11
|
+
= Install parsley-ruby =
|
12
|
+
|
13
|
+
From source:
|
14
|
+
sudo rake install
|
15
|
+
|
16
|
+
From GitHub: DEPRECATED!
|
17
|
+
|
18
|
+
From GemCutter
|
19
|
+
|
20
|
+
Run the following if you haven't already:
|
21
|
+
gem sources -a http://gemcutter.org
|
22
|
+
Install the gem:
|
23
|
+
sudo gem install parsley-ruby
|
24
|
+
|
25
|
+
PARSLETS.COM INTEGRATION
|
26
|
+
|
27
|
+
We also recommend installing the free online_parselets rubygem in order to use other people's parselets and to share your own:
|
28
|
+
Run the following if you haven't already:
|
29
|
+
gem sources -a http://gems.github.com
|
30
|
+
Install the gem:
|
31
|
+
sudo gem install iterationlabs-online_parslets
|
32
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "gtl-parsley-ruby"
|
8
|
+
gem.summary = "Ruby binding for parsley (updated for 1.9 and scoped to utf-8 only)"
|
9
|
+
gem.description = "XML/HTML Parser"
|
10
|
+
gem.email = "kyle@kylemaxwell.com"
|
11
|
+
gem.homepage = "http://github.com/fizx/parsley-ruby"
|
12
|
+
gem.authors = ["Kyle Maxwell", "Brian Collins"]
|
13
|
+
gem.add_dependency("json", ["> 0.0.0"])
|
14
|
+
gem.require_paths = ["lib", "ext"]
|
15
|
+
gem.extensions = "ext/extconf.rb" end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
23
|
+
test.libs << 'lib' << 'test'
|
24
|
+
test.pattern = 'test/**/*_test.rb'
|
25
|
+
test.verbose = true
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
Rcov::RcovTask.new do |test|
|
31
|
+
test.libs << 'test'
|
32
|
+
test.pattern = 'test/**/*_test.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
rescue LoadError
|
36
|
+
task :rcov do
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test => :check_dependencies
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
if File.exist?('VERSION')
|
48
|
+
version = File.read('VERSION')
|
49
|
+
else
|
50
|
+
version = ""
|
51
|
+
end
|
52
|
+
|
53
|
+
rdoc.rdoc_dir = 'rdoc'
|
54
|
+
rdoc.title = "robots #{version}"
|
55
|
+
rdoc.rdoc_files.include('README*')
|
56
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
57
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.5.0
|
data/ext/cparsley.c
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <libxslt/xslt.h>
|
4
|
+
#include <libexslt/exslt.h>
|
5
|
+
#include <libxslt/xsltInternals.h>
|
6
|
+
#include <libxslt/transform.h>
|
7
|
+
#include <libxml/parser.h>
|
8
|
+
#include <libxml/HTMLparser.h>
|
9
|
+
#include <libxml/HTMLtree.h>
|
10
|
+
#include <libxml/xmlwriter.h>
|
11
|
+
#include <parsley.h>
|
12
|
+
#include <json/json.h>
|
13
|
+
#include <xml2json.h>
|
14
|
+
|
15
|
+
VALUE _new(VALUE, VALUE, VALUE);
|
16
|
+
VALUE _parse(VALUE, VALUE);
|
17
|
+
VALUE _rb_set_user_agent(VALUE self, VALUE agent);
|
18
|
+
VALUE c_parsley_err;
|
19
|
+
VALUE c_parsley;
|
20
|
+
|
21
|
+
#ifndef STR2CSTR
|
22
|
+
char *STR2CSTR(VALUE v) {
|
23
|
+
return StringValuePtr(v);
|
24
|
+
}
|
25
|
+
#endif
|
26
|
+
|
27
|
+
void Init_cparsley()
|
28
|
+
{
|
29
|
+
c_parsley = rb_define_class("CParsley", rb_cObject);
|
30
|
+
c_parsley_err = rb_define_class("ParsleyError", rb_eRuntimeError);
|
31
|
+
rb_define_singleton_method(c_parsley, "new", _new, 2);
|
32
|
+
rb_define_singleton_method(c_parsley, "set_user_agent", _rb_set_user_agent, 1);
|
33
|
+
rb_define_method(c_parsley, "parse", _parse, 1);
|
34
|
+
}
|
35
|
+
|
36
|
+
VALUE
|
37
|
+
_new(VALUE self, VALUE parsley, VALUE incl){
|
38
|
+
parsleyPtr ptr = parsley_compile(STR2CSTR(parsley), STR2CSTR(incl));
|
39
|
+
|
40
|
+
if(ptr->error != NULL) {
|
41
|
+
rb_raise(c_parsley_err, ptr->error);
|
42
|
+
parsley_free(ptr);
|
43
|
+
return Qnil;
|
44
|
+
}
|
45
|
+
|
46
|
+
return Data_Wrap_Struct(c_parsley, 0, parsley_free, ptr);
|
47
|
+
}
|
48
|
+
|
49
|
+
VALUE
|
50
|
+
_rb_set_user_agent(VALUE self, VALUE agent) {
|
51
|
+
parsley_set_user_agent(STR2CSTR(agent));
|
52
|
+
return Qtrue;
|
53
|
+
}
|
54
|
+
|
55
|
+
|
56
|
+
static VALUE
|
57
|
+
rubify_recurse(xmlNodePtr xml, int encoding) {
|
58
|
+
if(xml == NULL) return NULL;
|
59
|
+
xmlNodePtr child;
|
60
|
+
VALUE obj = Qnil;
|
61
|
+
|
62
|
+
switch(xml->type) {
|
63
|
+
case XML_ELEMENT_NODE:
|
64
|
+
child = xml->children;
|
65
|
+
if(xml->ns == NULL) {
|
66
|
+
child = xml;
|
67
|
+
obj = rb_hash_new();
|
68
|
+
while(child != NULL) {
|
69
|
+
VALUE key = rb_str_new2(child->name);
|
70
|
+
rb_enc_associate_index(key, encoding);
|
71
|
+
rb_hash_aset(obj, key, rubify_recurse(child->children, encoding));
|
72
|
+
child = child->next;
|
73
|
+
}
|
74
|
+
} else if(!strcmp(xml->ns->prefix, "parsley")) {
|
75
|
+
if(!strcmp(xml->name, "groups")) {
|
76
|
+
obj = rb_ary_new();
|
77
|
+
while(child != NULL) {
|
78
|
+
rb_ary_push(obj, rubify_recurse(child->children, encoding));
|
79
|
+
child = child->next;
|
80
|
+
}
|
81
|
+
} else if(!strcmp(xml->name, "group")) {
|
82
|
+
// Implicitly handled by parsley:groups handler
|
83
|
+
}
|
84
|
+
}
|
85
|
+
break;
|
86
|
+
case XML_TEXT_NODE:
|
87
|
+
obj = rb_str_new2(xml->content);
|
88
|
+
rb_enc_associate_index(obj, encoding);
|
89
|
+
break;
|
90
|
+
}
|
91
|
+
// inspect(obj);
|
92
|
+
return obj;
|
93
|
+
}
|
94
|
+
|
95
|
+
static VALUE
|
96
|
+
_parse_doc(parsedParsleyPtr ptr, VALUE type, int encoding) {
|
97
|
+
if(ptr->error != NULL || ptr->xml == NULL) {
|
98
|
+
if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
|
99
|
+
rb_raise(c_parsley_err, ptr->error);
|
100
|
+
parsed_parsley_free(ptr);
|
101
|
+
return Qnil;
|
102
|
+
}
|
103
|
+
|
104
|
+
VALUE output;
|
105
|
+
if(type == ID2SYM(rb_intern("json"))) {
|
106
|
+
struct json_object *json = xml2json(ptr->xml->children->children);
|
107
|
+
char* str = json_object_to_json_string(json);
|
108
|
+
output = rb_str_new2(str);
|
109
|
+
rb_enc_associate_index(output, encoding);
|
110
|
+
json_object_put(json);
|
111
|
+
} else if(type == ID2SYM(rb_intern("xml"))) {
|
112
|
+
xmlChar* str;
|
113
|
+
int size;
|
114
|
+
xmlDocDumpMemory(ptr->xml, &str, &size);
|
115
|
+
output = rb_str_new(str, size);
|
116
|
+
rb_enc_associate_index(output, encoding);
|
117
|
+
} else {
|
118
|
+
output = rubify_recurse(ptr->xml->children->children, encoding);
|
119
|
+
if((void*)output == NULL) output = Qnil;
|
120
|
+
}
|
121
|
+
|
122
|
+
parsed_parsley_free(ptr);
|
123
|
+
|
124
|
+
return output;
|
125
|
+
}
|
126
|
+
|
127
|
+
#define OPT(A) rb_hash_aref(options, ID2SYM(rb_intern(A)))
|
128
|
+
#define OPT_BOOL(A) (OPT(A) != Qnil && OPT(A) != Qfalse)
|
129
|
+
#define OPT_MATCH(A, B) (rb_hash_aref(options, ID2SYM(rb_intern(A))) == ID2SYM(rb_intern(B)))
|
130
|
+
|
131
|
+
VALUE _parse(VALUE self, VALUE options){
|
132
|
+
parsleyPtr parsley;
|
133
|
+
Data_Get_Struct(self, parsleyPtr, parsley);
|
134
|
+
int flags = PARSLEY_OPTIONS_FORCE_UTF8;
|
135
|
+
char *base = NULL;
|
136
|
+
if(OPT_MATCH("input", "html")) flags |= PARSLEY_OPTIONS_HTML;
|
137
|
+
if(OPT_BOOL("prune")) flags |= PARSLEY_OPTIONS_PRUNE;
|
138
|
+
if(OPT_BOOL("collate")) flags |= PARSLEY_OPTIONS_COLLATE;
|
139
|
+
if(OPT_BOOL("allow_net")) flags |= PARSLEY_OPTIONS_ALLOW_NET;
|
140
|
+
if(OPT_BOOL("allow_local")) flags |= PARSLEY_OPTIONS_ALLOW_LOCAL;
|
141
|
+
if(OPT_BOOL("sgwrap")) flags |= PARSLEY_OPTIONS_SGWRAP;
|
142
|
+
if(OPT_BOOL("has_base")) base = STR2CSTR(OPT("base"));
|
143
|
+
|
144
|
+
// printf("prune: %d\nallow_net: %d\nallow_local: %d\nhas_base: %d\nflags: %d\n", OPT_BOOL("prune"), OPT_BOOL("allow_net"), OPT_BOOL("allow_local"), OPT_BOOL("has_base"), flags);
|
145
|
+
|
146
|
+
if(OPT_BOOL("is_file")) {
|
147
|
+
return _parse_doc(parsley_parse_file(parsley, STR2CSTR(OPT("file")), flags), OPT("output"), rb_enc_find_index("UTF-8"));
|
148
|
+
} else {
|
149
|
+
char * str = STR2CSTR(OPT("string"));
|
150
|
+
return _parse_doc(parsley_parse_string(parsley, str, strlen(str), base, flags), OPT("output"), rb_enc_find_index("UTF-8"));
|
151
|
+
}
|
152
|
+
}
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# ENV["ARCHFLAGS"] ||= "-arch #{`uname -p` =~ /powerpc/ ? 'ppc' : 'i386'}"
|
3
|
+
|
4
|
+
require 'mkmf'
|
5
|
+
|
6
|
+
EXT = File.expand_path(File.dirname(__FILE__))
|
7
|
+
|
8
|
+
LIBDIR = Config::CONFIG['libdir']
|
9
|
+
INCLUDEDIR = Config::CONFIG['includedir']
|
10
|
+
|
11
|
+
system "cd #{EXT}/parsley && ./configure && make"
|
12
|
+
|
13
|
+
$CFLAGS << " -g -DXP_UNIX"
|
14
|
+
$CFLAGS << " -O3 -Wall -Wextra -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
|
15
|
+
|
16
|
+
|
17
|
+
if Config::CONFIG['target_os'] =~ /mswin32/
|
18
|
+
lib_prefix = 'lib'
|
19
|
+
|
20
|
+
# There's no default include/lib dir on Windows. Let's just add the Ruby ones
|
21
|
+
# and resort on the search path specified by INCLUDE and LIB environment
|
22
|
+
# variables
|
23
|
+
HEADER_DIRS = [INCLUDEDIR]
|
24
|
+
LIB_DIRS = [LIBDIR]
|
25
|
+
XML2_HEADER_DIRS = [File.join(INCLUDEDIR, "libxml2"), INCLUDEDIR]
|
26
|
+
|
27
|
+
else
|
28
|
+
lib_prefix = ''
|
29
|
+
|
30
|
+
HEADER_DIRS = [
|
31
|
+
# First search /opt/local for macports
|
32
|
+
'/opt/local/include',
|
33
|
+
|
34
|
+
# Then search /usr/local for people that installed from source
|
35
|
+
'/usr/local/include',
|
36
|
+
|
37
|
+
# Check the ruby install locations
|
38
|
+
INCLUDEDIR,
|
39
|
+
|
40
|
+
# Finally fall back to /usr
|
41
|
+
'/usr/include',
|
42
|
+
'/usr/include/libxml2',
|
43
|
+
]
|
44
|
+
|
45
|
+
LIB_DIRS = [
|
46
|
+
# First search /opt/local for macports
|
47
|
+
'/opt/local/lib',
|
48
|
+
|
49
|
+
# Then search /usr/local for people that installed from source
|
50
|
+
'/usr/local/lib',
|
51
|
+
|
52
|
+
# Check the ruby install locations
|
53
|
+
LIBDIR,
|
54
|
+
|
55
|
+
# Finally fall back to /usr
|
56
|
+
'/usr/lib',
|
57
|
+
]
|
58
|
+
|
59
|
+
XML2_HEADER_DIRS = [
|
60
|
+
'/opt/local/include/libxml2',
|
61
|
+
'/usr/local/include/libxml2',
|
62
|
+
File.join(INCLUDEDIR, "libxml2")
|
63
|
+
] + HEADER_DIRS
|
64
|
+
end
|
65
|
+
|
66
|
+
dir_config('xml2', XML2_HEADER_DIRS, LIB_DIRS)
|
67
|
+
dir_config('xslt', HEADER_DIRS, LIB_DIRS)
|
68
|
+
|
69
|
+
find_library("#{lib_prefix}xml2", 'xmlParseDoc') or abort "need -lxml2"
|
70
|
+
find_library("#{lib_prefix}xslt", 'xsltParseStylesheetDoc') or abort "need -lxslt"
|
71
|
+
find_header('libxml/xmlversion.h') or abort "need libxml headers"
|
72
|
+
find_header('libxslt/xslt.h') or abort "need libxslt headers"
|
73
|
+
find_header('ruby.h', INCLUDEDIR) or abort "need ruby.h"
|
74
|
+
|
75
|
+
find_header("#{EXT}/parsley/json-c-0.9/json.h", INCLUDEDIR) or abort "need json/json.h"
|
76
|
+
find_library("#{lib_prefix}json", "json_object_new_string", "#{EXT}/parsley/json-c-0.9/") or abort "need libjson"
|
77
|
+
|
78
|
+
find_header("#{EXT}/parsley/parsley.h", INCLUDEDIR) or abort "need parsley.h"
|
79
|
+
find_library("#{lib_prefix}parsley", 'parsley_compile', "#{EXT}/parsley/") or abort "need libparsley"
|
80
|
+
|
81
|
+
create_makefile('cparsley')
|
82
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
.libs/
|
2
|
+
json-c-0.8/config.h
|
3
|
+
json-c-0.8/json.pc
|
4
|
+
json-c-0.8/libjson.la
|
5
|
+
json-c-0.8/libtool
|
6
|
+
json-c-0.8/stamp-h1
|
7
|
+
json-c-0.8/test1
|
8
|
+
json-c-0.8/test2
|
9
|
+
json-c-0.8/test3
|
10
|
+
printbuf.loT
|
11
|
+
*.o
|
12
|
+
*.lo
|
13
|
+
dexterc
|
14
|
+
dexter
|
15
|
+
parsleyc
|
16
|
+
parsley
|
17
|
+
.deps/
|
18
|
+
Makefile
|
19
|
+
autom4te.cache/
|
20
|
+
autoscan.log
|
21
|
+
config.log
|
22
|
+
config.status
|
23
|
+
configure.scan
|
24
|
+
libparsley.la
|
25
|
+
test.log
|
26
|
+
ruby/parsley*.gem
|
27
|
+
ruby/ext/cparsley.bundle
|
28
|
+
ruby/ext/cparsley.so
|
29
|
+
ruby/ext/Makefile
|
30
|
+
ruby/ext/conftest.dSYM/
|
31
|
+
work
|
32
|
+
ruby/ext/mkmf.log
|
data/ext/parsley/AUTHORS
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Kyle Maxwell
|
File without changes
|
data/ext/parsley/HACKING
ADDED
@@ -0,0 +1,4 @@
|
|
1
|
+
These notes are intended for people hacking on the Parsley source.
|
2
|
+
|
3
|
+
Some gotchas:
|
4
|
+
- The parser.y is known to work with Bison 2.3, and known to break on Bison 2.4.1. This has something to do with the Bison prologue being moved from the generated parser.h file to the parser.c file. I'd love to hear anyone's solution for this.
|
data/ext/parsley/INSTALL
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
Welcome to Parsley!
|
2
|
+
|
3
|
+
Parsley depends on
|
4
|
+
- argp (standard with Linux, other platforms use argp-standalone package)
|
5
|
+
- the JSON C library from http://oss.metaparadigm.com/json-c/ (I used 0.8)
|
6
|
+
- pcre (with dev headers)
|
7
|
+
- libxml2 (>= 2.7)
|
8
|
+
- libxslt (with exslt)
|
9
|
+
|
10
|
+
|
11
|
+
Here's how to install it:
|
12
|
+
|
13
|
+
1. Get the release
|
14
|
+
------------------------------------------------------------------------
|
15
|
+
Parsley is currently still being tracked in git, and isn't ready to make a
|
16
|
+
formal release. So you need to either clone or download the latest tarball:
|
17
|
+
|
18
|
+
git clone git://github.com/fizx/parsley.git
|
19
|
+
or
|
20
|
+
wget http://github.com/fizx/parsley/tarball/master
|
21
|
+
|
22
|
+
|
23
|
+
2. Build for your platform
|
24
|
+
------------------------------------------------------------------------
|
25
|
+
Enter your parsley working directory, (from the clone or download you
|
26
|
+
just made) and, based on your platform, do the following:
|
27
|
+
|
28
|
+
|
29
|
+
Debian/Ubuntu
|
30
|
+
------------------------------------------------------------------------
|
31
|
+
sudo apt-get install libxslt1-dev libpcre3-dev build-essential wget
|
32
|
+
wget http://oss.metaparadigm.com/json-c/json-c-0.8.tar.gz
|
33
|
+
tar -zxf json-c-0.8.tar.gz
|
34
|
+
cd json-c-0.8
|
35
|
+
./configure
|
36
|
+
make
|
37
|
+
sudo make install
|
38
|
+
cd -
|
39
|
+
./configure
|
40
|
+
make
|
41
|
+
sudo make install
|
42
|
+
|
43
|
+
|
44
|
+
Mac OS X with MacPorts:
|
45
|
+
------------------------------------------------------------------------
|
46
|
+
sudo port install argp-standalone pcre wget libxslt
|
47
|
+
wget http://oss.metaparadigm.com/json-c/json-c-0.8.tar.gz
|
48
|
+
tar -zxf json-c-0.8.tar.gz
|
49
|
+
cd json-c-0.8
|
50
|
+
./configure
|
51
|
+
make
|
52
|
+
sudo make install
|
53
|
+
cd -
|
54
|
+
./configure
|
55
|
+
make
|
56
|
+
sudo make install
|
57
|
+
|
58
|
+
If you have a few extra minutes, consider replacing the last make with a
|
59
|
+
'make check' and let us know if it reports any failures from the test
|
60
|
+
suite - thanks!
|
61
|
+
|
62
|
+
3. Ruby Binding (via Gems)
|
63
|
+
------------------------------------------------------------------------
|
64
|
+
http://github.com/fizx/parsley-ruby
|
65
|
+
|
66
|
+
4. Python Binding
|
67
|
+
------------------------------------------------------------------------
|
68
|
+
http://github.com/fizx/pyparsley
|
69
|
+
|
70
|
+
Other OS/Configurations:
|
71
|
+
------------------------------------------------------------------------
|
72
|
+
Haven't tried.
|
73
|
+
|