edge-parsley-ruby 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,26 @@
1
+ .libs/
2
+ *.o
3
+ *.lo
4
+ dexterc
5
+ dexter
6
+ parsleyc
7
+ parsley
8
+ .deps/
9
+ Makefile
10
+ y.tab.c
11
+ autom4te.cache/
12
+ autoscan.log
13
+ config.log
14
+ configure.scan
15
+ parser.c
16
+ scanner.c
17
+ libparsley.la
18
+ parser.h
19
+ test.log
20
+ parsley*.gem
21
+ ext/cparsley.bundle
22
+ ext/cparsley.so
23
+ ext/Makefile
24
+ ext/conftest.dSYM/
25
+ work
26
+ ext/mkmf.log
@@ -0,0 +1,3 @@
1
+ 0.4.3
2
+ - Added CHANGELOG
3
+ - Only assigning ARCHFLAGS if not already specified.
data/README ADDED
@@ -0,0 +1,32 @@
1
+ ABOUT
2
+
3
+ Ruby bindings for Parsley.
4
+
5
+ INSTALLATION
6
+
7
+ = Get Parsley and Dependancies =
8
+
9
+ Download Parsley from http://github.com/fizx/parsley/tree/master following the installation directions located at http://github.com/fizx/parsley/blob/master/INSTALL
10
+
11
+ = Install parsley-ruby =
12
+
13
+ From source:
14
+ sudo rake install
15
+
16
+ From GitHub: DEPRECATED!
17
+
18
+ From GemCutter
19
+
20
+ Run the following if you haven't already:
21
+ gem sources -a http://gemcutter.org
22
+ Install the gem:
23
+ sudo gem install parsley-ruby
24
+
25
+ PARSLETS.COM INTEGRATION
26
+
27
+ We also recommend installing the free online_parselets rubygem in order to use other people's parselets and to share your own:
28
+ Run the following if you haven't already:
29
+ gem sources -a http://gems.github.com
30
+ Install the gem:
31
+ sudo gem install iterationlabs-online_parslets
32
+
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "parsley-ruby"
8
+ gem.summary = "Ruby binding for parsley"
9
+ gem.description = "XML/HTML Parser"
10
+ gem.email = "kyle@kylemaxwell.com"
11
+ gem.homepage = "http://github.com/fizx/parsley-ruby"
12
+ gem.authors = ["Kyle Maxwell"]
13
+ gem.add_dependency("json", ["> 0.0.0"])
14
+ gem.require_paths = ["lib", "ext"]
15
+ gem.extensions = "ext/extconf.rb" end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/*_test.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/*_test.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ if File.exist?('VERSION')
48
+ version = File.read('VERSION')
49
+ else
50
+ version = ""
51
+ end
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "robots #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.4.3
@@ -0,0 +1,140 @@
1
+ #include "ruby.h"
2
+ #include <stdio.h>
3
+ #include <libxslt/xslt.h>
4
+ #include <libexslt/exslt.h>
5
+ #include <libxslt/xsltInternals.h>
6
+ #include <libxslt/transform.h>
7
+ #include <libxml/parser.h>
8
+ #include <libxml/HTMLparser.h>
9
+ #include <libxml/HTMLtree.h>
10
+ #include <libxml/xmlwriter.h>
11
+ #include <parsley.h>
12
+ #include <json/json.h>
13
+ #include <xml2json.h>
14
+
15
+ VALUE _new(VALUE, VALUE, VALUE);
16
+ VALUE _parse(VALUE, VALUE);
17
+ VALUE _rb_set_user_agent(VALUE self, VALUE agent);
18
+ VALUE c_parsley_err;
19
+ VALUE c_parsley;
20
+
21
+ void Init_cparsley()
22
+ {
23
+ c_parsley = rb_define_class("CParsley", rb_cObject);
24
+ c_parsley_err = rb_define_class("ParsleyError", rb_eRuntimeError);
25
+ rb_define_singleton_method(c_parsley, "new", _new, 2);
26
+ rb_define_singleton_method(c_parsley, "set_user_agent", _rb_set_user_agent, 1);
27
+ rb_define_method(c_parsley, "parse", _parse, 1);
28
+ }
29
+
30
+ VALUE
31
+ _new(VALUE self, VALUE parsley, VALUE incl){
32
+ parsleyPtr ptr = parsley_compile(STR2CSTR(parsley), STR2CSTR(incl));
33
+ if(ptr->error != NULL) {
34
+ rb_raise(c_parsley_err, ptr->error);
35
+ parsley_free(ptr);
36
+ return Qnil;
37
+ }
38
+
39
+ return Data_Wrap_Struct(c_parsley, 0, parsley_free, ptr);
40
+ }
41
+
42
+ VALUE
43
+ _rb_set_user_agent(VALUE self, VALUE agent) {
44
+ parsley_set_user_agent(STR2CSTR(agent));
45
+ return Qtrue;
46
+ }
47
+
48
+
49
+ static VALUE
50
+ rubify_recurse(xmlNodePtr xml) {
51
+ if(xml == NULL) return NULL;
52
+ xmlNodePtr child;
53
+ VALUE obj = Qnil;
54
+
55
+ switch(xml->type) {
56
+ case XML_ELEMENT_NODE:
57
+ child = xml->children;
58
+ if(xml->ns == NULL) {
59
+ child = xml;
60
+ obj = rb_hash_new();
61
+ while(child != NULL) {
62
+ rb_hash_aset(obj, rb_str_new2(child->name), rubify_recurse(child->children));
63
+ child = child->next;
64
+ }
65
+ } else if(!strcmp(xml->ns->prefix, "parsley")) {
66
+ if(!strcmp(xml->name, "groups")) {
67
+ obj = rb_ary_new();
68
+ while(child != NULL) {
69
+ rb_ary_push(obj, rubify_recurse(child->children));
70
+ child = child->next;
71
+ }
72
+ } else if(!strcmp(xml->name, "group")) {
73
+ // Implicitly handled by parsley:groups handler
74
+ }
75
+ }
76
+ break;
77
+ case XML_TEXT_NODE:
78
+ obj = rb_str_new2(xml->content);
79
+ break;
80
+ }
81
+ // inspect(obj);
82
+ return obj;
83
+ }
84
+
85
+ static VALUE
86
+ _parse_doc(parsedParsleyPtr ptr, VALUE type) {
87
+ if(ptr->error != NULL || ptr->xml == NULL) {
88
+ if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
89
+ rb_raise(c_parsley_err, ptr->error);
90
+ parsed_parsley_free(ptr);
91
+ return Qnil;
92
+ }
93
+
94
+ VALUE output;
95
+ if(type == ID2SYM(rb_intern("json"))) {
96
+ struct json_object *json = xml2json(ptr->xml->children->children);
97
+ char* str = json_object_to_json_string(json);
98
+ output = rb_str_new2(str);
99
+ json_object_put(json);
100
+ } else if(type == ID2SYM(rb_intern("xml"))) {
101
+ xmlChar* str;
102
+ int size;
103
+ xmlDocDumpMemory(ptr->xml, &str, &size);
104
+ output = rb_str_new(str, size);
105
+ } else {
106
+ output = rubify_recurse(ptr->xml->children->children);
107
+ if((void*)output == NULL) output = Qnil;
108
+ }
109
+
110
+ parsed_parsley_free(ptr);
111
+
112
+ return output;
113
+ }
114
+
115
+ #define OPT(A) rb_hash_aref(options, ID2SYM(rb_intern(A)))
116
+ #define OPT_BOOL(A) (OPT(A) != Qnil && OPT(A) != Qfalse)
117
+ #define OPT_MATCH(A, B) (rb_hash_aref(options, ID2SYM(rb_intern(A))) == ID2SYM(rb_intern(B)))
118
+
119
+ VALUE _parse(VALUE self, VALUE options){
120
+ parsleyPtr parsley;
121
+ Data_Get_Struct(self, parsleyPtr, parsley);
122
+ int flags = 0;
123
+ char *base = NULL;
124
+ if(OPT_MATCH("input", "html")) flags |= PARSLEY_OPTIONS_HTML;
125
+ if(OPT_BOOL("prune")) flags |= PARSLEY_OPTIONS_PRUNE;
126
+ if(OPT_BOOL("collate")) flags |= PARSLEY_OPTIONS_COLLATE;
127
+ if(OPT_BOOL("allow_net")) flags |= PARSLEY_OPTIONS_ALLOW_NET;
128
+ if(OPT_BOOL("allow_local")) flags |= PARSLEY_OPTIONS_ALLOW_LOCAL;
129
+ if(OPT_BOOL("sgwrap")) flags |= PARSLEY_OPTIONS_SGWRAP;
130
+ if(OPT_BOOL("has_base")) base = STR2CSTR(OPT("base"));
131
+
132
+ // printf("prune: %d\nallow_net: %d\nallow_local: %d\nhas_base: %d\nflags: %d\n", OPT_BOOL("prune"), OPT_BOOL("allow_net"), OPT_BOOL("allow_local"), OPT_BOOL("has_base"), flags);
133
+
134
+ if(OPT_BOOL("is_file")) {
135
+ return _parse_doc(parsley_parse_file(parsley, STR2CSTR(OPT("file")), flags), OPT("output"));
136
+ } else {
137
+ char * str = STR2CSTR(OPT("string"));
138
+ return _parse_doc(parsley_parse_string(parsley, str, strlen(str), base, flags), OPT("output"));
139
+ }
140
+ }
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env ruby
2
+ ENV["ARCHFLAGS"] ||= "-arch #{`uname -p` =~ /powerpc/ ? 'ppc' : 'i386'}"
3
+
4
+ require 'mkmf'
5
+
6
+ ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
7
+ LIBDIR = Config::CONFIG['libdir']
8
+ INCLUDEDIR = Config::CONFIG['includedir']
9
+
10
+ $CFLAGS << " #{ENV["CFLAGS"]}"
11
+ if Config::CONFIG['target_os'] == 'mingw32'
12
+ $CFLAGS << " -DXP_WIN -DXP_WIN32"
13
+ else
14
+ $CFLAGS << " -g -DXP_UNIX"
15
+ end
16
+
17
+ $CFLAGS << " -O3 -Wall -Wextra -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
18
+
19
+ if Config::CONFIG['target_os'] == 'mingw32'
20
+ find_library('xml2', 'xmlParseDoc',
21
+ File.join(ROOT, 'cross', 'libxml2-2.7.2.win32', 'bin'))
22
+ find_library('xslt', 'xsltParseStylesheetDoc',
23
+ File.join(ROOT, 'cross', 'libxslt-1.1.24.win32', 'bin'))
24
+ else
25
+ find_library('xml2', 'xmlParseDoc', LIBDIR)
26
+ find_library('xslt', 'xsltParseStylesheetDoc', LIBDIR)
27
+ end
28
+
29
+
30
+ if Config::CONFIG['target_os'] == 'mingw32'
31
+ header = File.join(ROOT, 'cross', 'libxml2-2.7.2.win32', 'include')
32
+ unless find_header('libxml/xmlversion.h', header)
33
+ abort "need libxml"
34
+ end
35
+
36
+ header = File.join(ROOT, 'cross', 'libxslt-1.1.24.win32', 'include')
37
+ unless find_header('libxslt/libxslt.h', header)
38
+ abort "need libxslt"
39
+ end
40
+
41
+ header = File.join(ROOT, 'cross', 'iconv-1.9.2.win32', 'include')
42
+ unless find_header('iconv.h', header)
43
+ abort "need iconv"
44
+ end
45
+ else
46
+ unless find_header('libxml/xmlversion.h',
47
+ File.join(INCLUDEDIR, "libxml2"), '/usr/include/libxml2'
48
+ )
49
+ abort "need libxml"
50
+ end
51
+ unless find_header('libxslt/xslt.h', INCLUDEDIR, '/usr/include')
52
+ abort "need libxslt"
53
+ end
54
+
55
+ version = try_constant('LIBXML_VERSION', 'libxml/xmlversion.h')
56
+ end
57
+
58
+ myincl = %w[/usr/local/include /opt/local/include /usr/include]
59
+ mylib = %w[/usr/local/lib /opt/local/lib /usr/lib]
60
+
61
+ find_header('ruby.h', INCLUDEDIR, *myincl) or abort "need ruby.h"
62
+
63
+ find_header('json/json.h', INCLUDEDIR, *myincl) or abort "need json/json.h"
64
+ find_library('json', 'json_object_new_string', LIBDIR, *mylib) or abort "need libjson"
65
+
66
+ find_header('parsley.h', INCLUDEDIR, *myincl) or abort "need parsley.h"
67
+ find_library('parsley', 'parsley_compile', LIBDIR, *mylib) or abort "need libparsley"
68
+
69
+ create_makefile('cparsley')
@@ -0,0 +1,84 @@
1
+ require File.dirname(__FILE__) + "/../ext/cparsley"
2
+ require "rubygems"
3
+ require "json"
4
+ require "thread"
5
+
6
+ class Parsley
7
+
8
+ def self.user_agent=(agent)
9
+ @user_agent = agent
10
+ CParsley.set_user_agent(agent.to_s)
11
+ end
12
+
13
+ def self.user_agent
14
+ @user_agent
15
+ end
16
+
17
+ def initialize(parsley, incl = "")
18
+ if(parsley.is_a?(Hash))
19
+ parsley = recursive_stringify(parsley).to_json
20
+ end
21
+ @@mutex ||= Mutex.new
22
+ @@mutex.synchronize do
23
+ @parsley = CParsley.new(parsley, incl)
24
+ end
25
+ end
26
+
27
+ # Valid options:
28
+ #
29
+ # Requires one of:
30
+ # :file -- the input file path or url
31
+ # :string -- the input string
32
+ #
33
+ # And optionally (default is the first listed value):
34
+ # :input => [:html, :xml]
35
+ # :output => [:ruby, :json, :xml]
36
+ # :prune => [true, false]
37
+ # :sgwrap => [false, true]
38
+ # :collate => [true, false]
39
+ # :base => "http://some/base/href"
40
+ # :allow_net => [true, false]
41
+ # :allow_local => [true, false]
42
+ def parse(options = {})
43
+ options[:file] || options[:string] || (raise ParsleyError.new("must specify what to parse"))
44
+
45
+ options[:sgwrap] = !!options[:sgwrap]
46
+ options[:is_file] = !!options[:file]
47
+ options[:has_base] = !!options[:base]
48
+
49
+ options[:base] = options[:base].to_s
50
+ options[:file] = options[:file].to_s
51
+ options[:string] = options[:string].to_s
52
+
53
+ options[:input] ||= :html
54
+ options[:output] ||= :ruby
55
+
56
+ options[:collate] = true unless options.has_key?(:collate)
57
+ options[:prune] = true unless options.has_key?(:prune)
58
+ options[:allow_net] = true unless options.has_key?(:allow_net)
59
+ options[:allow_local] = true unless options.has_key?(:allow_local)
60
+
61
+ options[:collate] = !!options[:collate]
62
+ options[:prune] = !!options[:prune]
63
+ options[:allow_net] = !!options[:allow_net]
64
+ options[:allow_local] = !!options[:allow_local]
65
+
66
+ @parsley.parse(options)
67
+ end
68
+ private
69
+
70
+ def recursive_stringify(obj)
71
+ case obj
72
+ when Hash
73
+ obj.inject({}) do |memo, (k, v)|
74
+ memo[k.to_s] = recursive_stringify(v)
75
+ memo
76
+ end
77
+ when Array
78
+ obj.map{|e| recursive_stringify(e) }
79
+ else
80
+ obj.to_s
81
+ end
82
+ end
83
+
84
+ end
@@ -0,0 +1,58 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{parsley-ruby}
8
+ s.version = "0.4.3"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Kyle Maxwell"]
12
+ s.date = %q{2009-12-28}
13
+ s.description = %q{XML/HTML Parser}
14
+ s.email = %q{kyle@kylemaxwell.com}
15
+ s.extensions = ["ext/extconf.rb"]
16
+ s.extra_rdoc_files = [
17
+ "README"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "CHANGELOG",
22
+ "README",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "ext/cparsley.c",
26
+ "ext/extconf.rb",
27
+ "lib/parsley.rb",
28
+ "parsley-ruby.gemspec",
29
+ "test/test_parsley.rb",
30
+ "test/yelp-benchmark.rb",
31
+ "test/yelp-home.html",
32
+ "test/yelp-home.let",
33
+ "test/yelp.html"
34
+ ]
35
+ s.homepage = %q{http://github.com/fizx/parsley-ruby}
36
+ s.rdoc_options = ["--charset=UTF-8"]
37
+ s.require_paths = ["lib", "ext"]
38
+ s.rubygems_version = %q{1.3.5}
39
+ s.summary = %q{Ruby binding for parsley}
40
+ s.test_files = [
41
+ "test/test_parsley.rb",
42
+ "test/yelp-benchmark.rb"
43
+ ]
44
+
45
+ if s.respond_to? :specification_version then
46
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
47
+ s.specification_version = 3
48
+
49
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
50
+ s.add_runtime_dependency(%q<json>, ["> 0.0.0"])
51
+ else
52
+ s.add_dependency(%q<json>, ["> 0.0.0"])
53
+ end
54
+ else
55
+ s.add_dependency(%q<json>, ["> 0.0.0"])
56
+ end
57
+ end
58
+