edge-parsley-ruby 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ .libs/
2
+ *.o
3
+ *.lo
4
+ dexterc
5
+ dexter
6
+ parsleyc
7
+ parsley
8
+ .deps/
9
+ Makefile
10
+ y.tab.c
11
+ autom4te.cache/
12
+ autoscan.log
13
+ config.log
14
+ configure.scan
15
+ parser.c
16
+ scanner.c
17
+ libparsley.la
18
+ parser.h
19
+ test.log
20
+ parsley*.gem
21
+ ext/cparsley.bundle
22
+ ext/cparsley.so
23
+ ext/Makefile
24
+ ext/conftest.dSYM/
25
+ work
26
+ ext/mkmf.log
@@ -0,0 +1,3 @@
1
+ 0.4.3
2
+ - Added CHANGELOG
3
+ - Only assigning ARCHFLAGS if not already specified.
data/README ADDED
@@ -0,0 +1,32 @@
1
+ ABOUT
2
+
3
+ Ruby bindings for Parsley.
4
+
5
+ INSTALLATION
6
+
7
+ = Get Parsley and Dependancies =
8
+
9
+ Download Parsley from http://github.com/fizx/parsley/tree/master following the installation directions located at http://github.com/fizx/parsley/blob/master/INSTALL
10
+
11
+ = Install parsley-ruby =
12
+
13
+ From source:
14
+ sudo rake install
15
+
16
+ From GitHub: DEPRECATED!
17
+
18
+ From GemCutter
19
+
20
+ Run the following if you haven't already:
21
+ gem sources -a http://gemcutter.org
22
+ Install the gem:
23
+ sudo gem install parsley-ruby
24
+
25
+ PARSLETS.COM INTEGRATION
26
+
27
+ We also recommend installing the free online_parselets rubygem in order to use other people's parselets and to share your own:
28
+ Run the following if you haven't already:
29
+ gem sources -a http://gems.github.com
30
+ Install the gem:
31
+ sudo gem install iterationlabs-online_parslets
32
+
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "parsley-ruby"
8
+ gem.summary = "Ruby binding for parsley"
9
+ gem.description = "XML/HTML Parser"
10
+ gem.email = "kyle@kylemaxwell.com"
11
+ gem.homepage = "http://github.com/fizx/parsley-ruby"
12
+ gem.authors = ["Kyle Maxwell"]
13
+ gem.add_dependency("json", ["> 0.0.0"])
14
+ gem.require_paths = ["lib", "ext"]
15
+ gem.extensions = "ext/extconf.rb" end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/*_test.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/*_test.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ if File.exist?('VERSION')
48
+ version = File.read('VERSION')
49
+ else
50
+ version = ""
51
+ end
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "robots #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.4.3
@@ -0,0 +1,140 @@
1
+ #include "ruby.h"
2
+ #include <stdio.h>
3
+ #include <libxslt/xslt.h>
4
+ #include <libexslt/exslt.h>
5
+ #include <libxslt/xsltInternals.h>
6
+ #include <libxslt/transform.h>
7
+ #include <libxml/parser.h>
8
+ #include <libxml/HTMLparser.h>
9
+ #include <libxml/HTMLtree.h>
10
+ #include <libxml/xmlwriter.h>
11
+ #include <parsley.h>
12
+ #include <json/json.h>
13
+ #include <xml2json.h>
14
+
15
+ VALUE _new(VALUE, VALUE, VALUE);
16
+ VALUE _parse(VALUE, VALUE);
17
+ VALUE _rb_set_user_agent(VALUE self, VALUE agent);
18
+ VALUE c_parsley_err;
19
+ VALUE c_parsley;
20
+
21
+ void Init_cparsley()
22
+ {
23
+ c_parsley = rb_define_class("CParsley", rb_cObject);
24
+ c_parsley_err = rb_define_class("ParsleyError", rb_eRuntimeError);
25
+ rb_define_singleton_method(c_parsley, "new", _new, 2);
26
+ rb_define_singleton_method(c_parsley, "set_user_agent", _rb_set_user_agent, 1);
27
+ rb_define_method(c_parsley, "parse", _parse, 1);
28
+ }
29
+
30
+ VALUE
31
+ _new(VALUE self, VALUE parsley, VALUE incl){
32
+ parsleyPtr ptr = parsley_compile(STR2CSTR(parsley), STR2CSTR(incl));
33
+ if(ptr->error != NULL) {
34
+ rb_raise(c_parsley_err, ptr->error);
35
+ parsley_free(ptr);
36
+ return Qnil;
37
+ }
38
+
39
+ return Data_Wrap_Struct(c_parsley, 0, parsley_free, ptr);
40
+ }
41
+
42
+ VALUE
43
+ _rb_set_user_agent(VALUE self, VALUE agent) {
44
+ parsley_set_user_agent(STR2CSTR(agent));
45
+ return Qtrue;
46
+ }
47
+
48
+
49
+ static VALUE
50
+ rubify_recurse(xmlNodePtr xml) {
51
+ if(xml == NULL) return NULL;
52
+ xmlNodePtr child;
53
+ VALUE obj = Qnil;
54
+
55
+ switch(xml->type) {
56
+ case XML_ELEMENT_NODE:
57
+ child = xml->children;
58
+ if(xml->ns == NULL) {
59
+ child = xml;
60
+ obj = rb_hash_new();
61
+ while(child != NULL) {
62
+ rb_hash_aset(obj, rb_str_new2(child->name), rubify_recurse(child->children));
63
+ child = child->next;
64
+ }
65
+ } else if(!strcmp(xml->ns->prefix, "parsley")) {
66
+ if(!strcmp(xml->name, "groups")) {
67
+ obj = rb_ary_new();
68
+ while(child != NULL) {
69
+ rb_ary_push(obj, rubify_recurse(child->children));
70
+ child = child->next;
71
+ }
72
+ } else if(!strcmp(xml->name, "group")) {
73
+ // Implicitly handled by parsley:groups handler
74
+ }
75
+ }
76
+ break;
77
+ case XML_TEXT_NODE:
78
+ obj = rb_str_new2(xml->content);
79
+ break;
80
+ }
81
+ // inspect(obj);
82
+ return obj;
83
+ }
84
+
85
+ static VALUE
86
+ _parse_doc(parsedParsleyPtr ptr, VALUE type) {
87
+ if(ptr->error != NULL || ptr->xml == NULL) {
88
+ if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
89
+ rb_raise(c_parsley_err, ptr->error);
90
+ parsed_parsley_free(ptr);
91
+ return Qnil;
92
+ }
93
+
94
+ VALUE output;
95
+ if(type == ID2SYM(rb_intern("json"))) {
96
+ struct json_object *json = xml2json(ptr->xml->children->children);
97
+ char* str = json_object_to_json_string(json);
98
+ output = rb_str_new2(str);
99
+ json_object_put(json);
100
+ } else if(type == ID2SYM(rb_intern("xml"))) {
101
+ xmlChar* str;
102
+ int size;
103
+ xmlDocDumpMemory(ptr->xml, &str, &size);
104
+ output = rb_str_new(str, size);
105
+ } else {
106
+ output = rubify_recurse(ptr->xml->children->children);
107
+ if((void*)output == NULL) output = Qnil;
108
+ }
109
+
110
+ parsed_parsley_free(ptr);
111
+
112
+ return output;
113
+ }
114
+
115
+ #define OPT(A) rb_hash_aref(options, ID2SYM(rb_intern(A)))
116
+ #define OPT_BOOL(A) (OPT(A) != Qnil && OPT(A) != Qfalse)
117
+ #define OPT_MATCH(A, B) (rb_hash_aref(options, ID2SYM(rb_intern(A))) == ID2SYM(rb_intern(B)))
118
+
119
+ VALUE _parse(VALUE self, VALUE options){
120
+ parsleyPtr parsley;
121
+ Data_Get_Struct(self, parsleyPtr, parsley);
122
+ int flags = 0;
123
+ char *base = NULL;
124
+ if(OPT_MATCH("input", "html")) flags |= PARSLEY_OPTIONS_HTML;
125
+ if(OPT_BOOL("prune")) flags |= PARSLEY_OPTIONS_PRUNE;
126
+ if(OPT_BOOL("collate")) flags |= PARSLEY_OPTIONS_COLLATE;
127
+ if(OPT_BOOL("allow_net")) flags |= PARSLEY_OPTIONS_ALLOW_NET;
128
+ if(OPT_BOOL("allow_local")) flags |= PARSLEY_OPTIONS_ALLOW_LOCAL;
129
+ if(OPT_BOOL("sgwrap")) flags |= PARSLEY_OPTIONS_SGWRAP;
130
+ if(OPT_BOOL("has_base")) base = STR2CSTR(OPT("base"));
131
+
132
+ // printf("prune: %d\nallow_net: %d\nallow_local: %d\nhas_base: %d\nflags: %d\n", OPT_BOOL("prune"), OPT_BOOL("allow_net"), OPT_BOOL("allow_local"), OPT_BOOL("has_base"), flags);
133
+
134
+ if(OPT_BOOL("is_file")) {
135
+ return _parse_doc(parsley_parse_file(parsley, STR2CSTR(OPT("file")), flags), OPT("output"));
136
+ } else {
137
+ char * str = STR2CSTR(OPT("string"));
138
+ return _parse_doc(parsley_parse_string(parsley, str, strlen(str), base, flags), OPT("output"));
139
+ }
140
+ }
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env ruby
2
+ ENV["ARCHFLAGS"] ||= "-arch #{`uname -p` =~ /powerpc/ ? 'ppc' : 'i386'}"
3
+
4
+ require 'mkmf'
5
+
6
+ ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
7
+ LIBDIR = Config::CONFIG['libdir']
8
+ INCLUDEDIR = Config::CONFIG['includedir']
9
+
10
+ $CFLAGS << " #{ENV["CFLAGS"]}"
11
+ if Config::CONFIG['target_os'] == 'mingw32'
12
+ $CFLAGS << " -DXP_WIN -DXP_WIN32"
13
+ else
14
+ $CFLAGS << " -g -DXP_UNIX"
15
+ end
16
+
17
+ $CFLAGS << " -O3 -Wall -Wextra -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
18
+
19
+ if Config::CONFIG['target_os'] == 'mingw32'
20
+ find_library('xml2', 'xmlParseDoc',
21
+ File.join(ROOT, 'cross', 'libxml2-2.7.2.win32', 'bin'))
22
+ find_library('xslt', 'xsltParseStylesheetDoc',
23
+ File.join(ROOT, 'cross', 'libxslt-1.1.24.win32', 'bin'))
24
+ else
25
+ find_library('xml2', 'xmlParseDoc', LIBDIR)
26
+ find_library('xslt', 'xsltParseStylesheetDoc', LIBDIR)
27
+ end
28
+
29
+
30
+ if Config::CONFIG['target_os'] == 'mingw32'
31
+ header = File.join(ROOT, 'cross', 'libxml2-2.7.2.win32', 'include')
32
+ unless find_header('libxml/xmlversion.h', header)
33
+ abort "need libxml"
34
+ end
35
+
36
+ header = File.join(ROOT, 'cross', 'libxslt-1.1.24.win32', 'include')
37
+ unless find_header('libxslt/libxslt.h', header)
38
+ abort "need libxslt"
39
+ end
40
+
41
+ header = File.join(ROOT, 'cross', 'iconv-1.9.2.win32', 'include')
42
+ unless find_header('iconv.h', header)
43
+ abort "need iconv"
44
+ end
45
+ else
46
+ unless find_header('libxml/xmlversion.h',
47
+ File.join(INCLUDEDIR, "libxml2"), '/usr/include/libxml2'
48
+ )
49
+ abort "need libxml"
50
+ end
51
+ unless find_header('libxslt/xslt.h', INCLUDEDIR, '/usr/include')
52
+ abort "need libxslt"
53
+ end
54
+
55
+ version = try_constant('LIBXML_VERSION', 'libxml/xmlversion.h')
56
+ end
57
+
58
+ myincl = %w[/usr/local/include /opt/local/include /usr/include]
59
+ mylib = %w[/usr/local/lib /opt/local/lib /usr/lib]
60
+
61
+ find_header('ruby.h', INCLUDEDIR, *myincl) or abort "need ruby.h"
62
+
63
+ find_header('json/json.h', INCLUDEDIR, *myincl) or abort "need json/json.h"
64
+ find_library('json', 'json_object_new_string', LIBDIR, *mylib) or abort "need libjson"
65
+
66
+ find_header('parsley.h', INCLUDEDIR, *myincl) or abort "need parsley.h"
67
+ find_library('parsley', 'parsley_compile', LIBDIR, *mylib) or abort "need libparsley"
68
+
69
+ create_makefile('cparsley')
@@ -0,0 +1,84 @@
1
+ require File.dirname(__FILE__) + "/../ext/cparsley"
2
+ require "rubygems"
3
+ require "json"
4
+ require "thread"
5
+
6
+ class Parsley
7
+
8
+ def self.user_agent=(agent)
9
+ @user_agent = agent
10
+ CParsley.set_user_agent(agent.to_s)
11
+ end
12
+
13
+ def self.user_agent
14
+ @user_agent
15
+ end
16
+
17
+ def initialize(parsley, incl = "")
18
+ if(parsley.is_a?(Hash))
19
+ parsley = recursive_stringify(parsley).to_json
20
+ end
21
+ @@mutex ||= Mutex.new
22
+ @@mutex.synchronize do
23
+ @parsley = CParsley.new(parsley, incl)
24
+ end
25
+ end
26
+
27
+ # Valid options:
28
+ #
29
+ # Requires one of:
30
+ # :file -- the input file path or url
31
+ # :string -- the input string
32
+ #
33
+ # And optionally (default is the first listed value):
34
+ # :input => [:html, :xml]
35
+ # :output => [:ruby, :json, :xml]
36
+ # :prune => [true, false]
37
+ # :sgwrap => [false, true]
38
+ # :collate => [true, false]
39
+ # :base => "http://some/base/href"
40
+ # :allow_net => [true, false]
41
+ # :allow_local => [true, false]
42
+ def parse(options = {})
43
+ options[:file] || options[:string] || (raise ParsleyError.new("must specify what to parse"))
44
+
45
+ options[:sgwrap] = !!options[:sgwrap]
46
+ options[:is_file] = !!options[:file]
47
+ options[:has_base] = !!options[:base]
48
+
49
+ options[:base] = options[:base].to_s
50
+ options[:file] = options[:file].to_s
51
+ options[:string] = options[:string].to_s
52
+
53
+ options[:input] ||= :html
54
+ options[:output] ||= :ruby
55
+
56
+ options[:collate] = true unless options.has_key?(:collate)
57
+ options[:prune] = true unless options.has_key?(:prune)
58
+ options[:allow_net] = true unless options.has_key?(:allow_net)
59
+ options[:allow_local] = true unless options.has_key?(:allow_local)
60
+
61
+ options[:collate] = !!options[:collate]
62
+ options[:prune] = !!options[:prune]
63
+ options[:allow_net] = !!options[:allow_net]
64
+ options[:allow_local] = !!options[:allow_local]
65
+
66
+ @parsley.parse(options)
67
+ end
68
+ private
69
+
70
+ def recursive_stringify(obj)
71
+ case obj
72
+ when Hash
73
+ obj.inject({}) do |memo, (k, v)|
74
+ memo[k.to_s] = recursive_stringify(v)
75
+ memo
76
+ end
77
+ when Array
78
+ obj.map{|e| recursive_stringify(e) }
79
+ else
80
+ obj.to_s
81
+ end
82
+ end
83
+
84
+ end
@@ -0,0 +1,58 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{parsley-ruby}
8
+ s.version = "0.4.3"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Kyle Maxwell"]
12
+ s.date = %q{2009-12-28}
13
+ s.description = %q{XML/HTML Parser}
14
+ s.email = %q{kyle@kylemaxwell.com}
15
+ s.extensions = ["ext/extconf.rb"]
16
+ s.extra_rdoc_files = [
17
+ "README"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "CHANGELOG",
22
+ "README",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "ext/cparsley.c",
26
+ "ext/extconf.rb",
27
+ "lib/parsley.rb",
28
+ "parsley-ruby.gemspec",
29
+ "test/test_parsley.rb",
30
+ "test/yelp-benchmark.rb",
31
+ "test/yelp-home.html",
32
+ "test/yelp-home.let",
33
+ "test/yelp.html"
34
+ ]
35
+ s.homepage = %q{http://github.com/fizx/parsley-ruby}
36
+ s.rdoc_options = ["--charset=UTF-8"]
37
+ s.require_paths = ["lib", "ext"]
38
+ s.rubygems_version = %q{1.3.5}
39
+ s.summary = %q{Ruby binding for parsley}
40
+ s.test_files = [
41
+ "test/test_parsley.rb",
42
+ "test/yelp-benchmark.rb"
43
+ ]
44
+
45
+ if s.respond_to? :specification_version then
46
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
47
+ s.specification_version = 3
48
+
49
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
50
+ s.add_runtime_dependency(%q<json>, ["> 0.0.0"])
51
+ else
52
+ s.add_dependency(%q<json>, ["> 0.0.0"])
53
+ end
54
+ else
55
+ s.add_dependency(%q<json>, ["> 0.0.0"])
56
+ end
57
+ end
58
+