fizx-parsley-ruby 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/cparsley.c +67 -47
- data/ext/extconf.rb +2 -0
- data/lib/parsley.rb +39 -15
- data/parsley-ruby.gemspec +2 -2
- data/test/test_parsley.rb +7 -0
- metadata +2 -2
data/ext/cparsley.c
CHANGED
@@ -13,10 +13,8 @@
|
|
13
13
|
#include <xml2json.h>
|
14
14
|
|
15
15
|
VALUE _new(VALUE, VALUE, VALUE);
|
16
|
-
VALUE
|
17
|
-
VALUE
|
18
|
-
VALUE _parse_doc(parsedParsleyPtr, VALUE);
|
19
|
-
VALUE rubify_recurse(xmlNodePtr xml);
|
16
|
+
VALUE _parse(VALUE, VALUE);
|
17
|
+
VALUE _rb_set_user_agent(VALUE self, VALUE agent);
|
20
18
|
VALUE c_parsley_err;
|
21
19
|
VALUE c_parsley;
|
22
20
|
|
@@ -25,11 +23,12 @@ void Init_cparsley()
|
|
25
23
|
c_parsley = rb_define_class("CParsley", rb_cObject);
|
26
24
|
c_parsley_err = rb_define_class("ParsleyError", rb_eRuntimeError);
|
27
25
|
rb_define_singleton_method(c_parsley, "new", _new, 2);
|
28
|
-
|
29
|
-
rb_define_method(c_parsley, "
|
26
|
+
rb_define_singleton_method(c_parsley, "set_user_agent", _rb_set_user_agent, 1);
|
27
|
+
rb_define_method(c_parsley, "parse", _parse, 1);
|
30
28
|
}
|
31
29
|
|
32
|
-
VALUE
|
30
|
+
VALUE
|
31
|
+
_new(VALUE self, VALUE parsley, VALUE incl){
|
33
32
|
parsleyPtr ptr = parsley_compile(STR2CSTR(parsley), STR2CSTR(incl));
|
34
33
|
if(ptr->error != NULL) {
|
35
34
|
rb_raise(c_parsley_err, ptr->error);
|
@@ -40,49 +39,15 @@ VALUE _new(VALUE self, VALUE parsley, VALUE incl){
|
|
40
39
|
return Data_Wrap_Struct(c_parsley, 0, parsley_free, ptr);
|
41
40
|
}
|
42
41
|
|
43
|
-
VALUE
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
VALUE
|
43
|
+
_rb_set_user_agent(VALUE self, VALUE agent) {
|
44
|
+
parsley_set_user_agent(STR2CSTR(agent));
|
45
|
+
return Qtrue;
|
47
46
|
}
|
48
47
|
|
49
|
-
VALUE _parse_string(VALUE self, VALUE string, VALUE input, VALUE output) {
|
50
|
-
parsleyPtr parsley;
|
51
|
-
Data_Get_Struct(self, parsleyPtr, parsley);
|
52
|
-
char* cstr = STR2CSTR(string);
|
53
|
-
return _parse_doc(parsley_parse_string(parsley, cstr, strlen(cstr), input == ID2SYM(rb_intern("html")), 1), output);
|
54
|
-
}
|
55
48
|
|
56
|
-
|
57
|
-
|
58
|
-
if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
|
59
|
-
rb_raise(c_parsley_err, ptr->error);
|
60
|
-
parsed_parsley_free(ptr);
|
61
|
-
return Qnil;
|
62
|
-
}
|
63
|
-
|
64
|
-
VALUE output;
|
65
|
-
if(type == ID2SYM(rb_intern("json"))) {
|
66
|
-
struct json_object *json = xml2json(ptr->xml->children->children);
|
67
|
-
char* str = json_object_to_json_string(json);
|
68
|
-
output = rb_str_new2(str);
|
69
|
-
json_object_put(json);
|
70
|
-
} else if(type == ID2SYM(rb_intern("xml"))) {
|
71
|
-
char* str;
|
72
|
-
int size;
|
73
|
-
xmlDocDumpMemory(ptr->xml, &str, &size);
|
74
|
-
output = rb_str_new(str, size);
|
75
|
-
} else {
|
76
|
-
output = rubify_recurse(ptr->xml->children->children);
|
77
|
-
if(output == NULL) output = Qnil;
|
78
|
-
}
|
79
|
-
|
80
|
-
parsed_parsley_free(ptr);
|
81
|
-
|
82
|
-
return output;
|
83
|
-
}
|
84
|
-
|
85
|
-
VALUE rubify_recurse(xmlNodePtr xml) {
|
49
|
+
static VALUE
|
50
|
+
rubify_recurse(xmlNodePtr xml) {
|
86
51
|
if(xml == NULL) return NULL;
|
87
52
|
xmlNodePtr child;
|
88
53
|
VALUE obj = Qnil;
|
@@ -115,4 +80,59 @@ VALUE rubify_recurse(xmlNodePtr xml) {
|
|
115
80
|
}
|
116
81
|
// inspect(obj);
|
117
82
|
return obj;
|
83
|
+
}
|
84
|
+
|
85
|
+
static VALUE
|
86
|
+
_parse_doc(parsedParsleyPtr ptr, VALUE type) {
|
87
|
+
if(ptr->error != NULL || ptr->xml == NULL) {
|
88
|
+
if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
|
89
|
+
rb_raise(c_parsley_err, ptr->error);
|
90
|
+
parsed_parsley_free(ptr);
|
91
|
+
return Qnil;
|
92
|
+
}
|
93
|
+
|
94
|
+
VALUE output;
|
95
|
+
if(type == ID2SYM(rb_intern("json"))) {
|
96
|
+
struct json_object *json = xml2json(ptr->xml->children->children);
|
97
|
+
char* str = json_object_to_json_string(json);
|
98
|
+
output = rb_str_new2(str);
|
99
|
+
json_object_put(json);
|
100
|
+
} else if(type == ID2SYM(rb_intern("xml"))) {
|
101
|
+
xmlChar* str;
|
102
|
+
int size;
|
103
|
+
xmlDocDumpMemory(ptr->xml, &str, &size);
|
104
|
+
output = rb_str_new(str, size);
|
105
|
+
} else {
|
106
|
+
output = rubify_recurse(ptr->xml->children->children);
|
107
|
+
if((void*)output == NULL) output = Qnil;
|
108
|
+
}
|
109
|
+
|
110
|
+
parsed_parsley_free(ptr);
|
111
|
+
|
112
|
+
return output;
|
113
|
+
}
|
114
|
+
|
115
|
+
#define OPT(A) rb_hash_aref(options, ID2SYM(rb_intern(A)))
|
116
|
+
#define OPT_BOOL(A) (OPT(A) != Qnil && OPT(A) != Qfalse)
|
117
|
+
#define OPT_MATCH(A, B) (rb_hash_aref(options, ID2SYM(rb_intern(A))) == ID2SYM(rb_intern(B)))
|
118
|
+
|
119
|
+
VALUE _parse(VALUE self, VALUE options){
|
120
|
+
parsleyPtr parsley;
|
121
|
+
Data_Get_Struct(self, parsleyPtr, parsley);
|
122
|
+
int flags = 0;
|
123
|
+
char *base = NULL;
|
124
|
+
if(OPT_MATCH("input", "html")) flags |= PARSLEY_OPTIONS_HTML;
|
125
|
+
if(OPT_BOOL("prune")) flags |= PARSLEY_OPTIONS_PRUNE;
|
126
|
+
if(OPT_BOOL("allow_net")) flags |= PARSLEY_OPTIONS_ALLOW_NET;
|
127
|
+
if(OPT_BOOL("allow_local")) flags |= PARSLEY_OPTIONS_ALLOW_LOCAL;
|
128
|
+
if(OPT_BOOL("has_base")) base = STR2CSTR(OPT("base"));
|
129
|
+
|
130
|
+
// printf("prune: %d\nallow_net: %d\nallow_local: %d\nhas_base: %d\nflags: %d\n", OPT_BOOL("prune"), OPT_BOOL("allow_net"), OPT_BOOL("allow_local"), OPT_BOOL("has_base"), flags);
|
131
|
+
|
132
|
+
if(OPT_BOOL("is_file")) {
|
133
|
+
return _parse_doc(parsley_parse_file(parsley, STR2CSTR(OPT("file")), flags), OPT("output"));
|
134
|
+
} else {
|
135
|
+
char * str = STR2CSTR(OPT("string"));
|
136
|
+
return _parse_doc(parsley_parse_string(parsley, str, strlen(str), base, flags), OPT("output"));
|
137
|
+
}
|
118
138
|
}
|
data/ext/extconf.rb
CHANGED
@@ -58,6 +58,8 @@ end
|
|
58
58
|
myincl = %w[/usr/local/include /opt/local/include /usr/include]
|
59
59
|
mylib = %w[/usr/local/lib /opt/local/lib /usr/lib]
|
60
60
|
|
61
|
+
find_header('ruby.h', INCLUDEDIR, *myincl) or abort "need ruby.h"
|
62
|
+
|
61
63
|
find_header('json/json.h', INCLUDEDIR, *myincl) or abort "need json/json.h"
|
62
64
|
find_library('json', 'json_object_new_string', LIBDIR, *mylib) or abort "need libjson"
|
63
65
|
|
data/lib/parsley.rb
CHANGED
@@ -4,6 +4,16 @@ require "json"
|
|
4
4
|
require "thread"
|
5
5
|
|
6
6
|
class Parsley
|
7
|
+
|
8
|
+
def self.user_agent=(agent)
|
9
|
+
@user_agent = agent
|
10
|
+
CParsley.set_user_agent(agent.to_s)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.user_agent
|
14
|
+
@user_agent
|
15
|
+
end
|
16
|
+
|
7
17
|
def initialize(parsley, incl = "")
|
8
18
|
if(parsley.is_a?(Hash))
|
9
19
|
parsley = parsley.to_json
|
@@ -17,23 +27,37 @@ class Parsley
|
|
17
27
|
# Valid options:
|
18
28
|
#
|
19
29
|
# Requires one of:
|
20
|
-
# :file -- the input file path
|
30
|
+
# :file -- the input file path or url
|
21
31
|
# :string -- the input string
|
22
32
|
#
|
23
|
-
# And optionally:
|
24
|
-
# :input => [:
|
25
|
-
# :output => [:
|
26
|
-
# :
|
27
|
-
#
|
28
|
-
#
|
33
|
+
# And optionally (default is the first listed value):
|
34
|
+
# :input => [:html, :xml]
|
35
|
+
# :output => [:ruby, :json, :xml]
|
36
|
+
# :prune => [true, false]
|
37
|
+
# :base => "http://some/base/href"
|
38
|
+
# :allow_net => [true, false]
|
39
|
+
# :allow_local => [true, false]
|
29
40
|
def parse(options = {})
|
30
|
-
options[:file] || options[:string] ||
|
31
|
-
|
32
|
-
options[:
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
41
|
+
options[:file] || options[:string] || (raise ParsleyError.new("must specify what to parse"))
|
42
|
+
|
43
|
+
options[:is_file] = !!options[:file]
|
44
|
+
options[:has_base] = !!options[:base]
|
45
|
+
|
46
|
+
options[:base] = options[:base].to_s
|
47
|
+
options[:file] = options[:file].to_s
|
48
|
+
options[:string] = options[:string].to_s
|
49
|
+
|
50
|
+
options[:input] ||= :html
|
51
|
+
options[:output] ||= :ruby
|
52
|
+
|
53
|
+
options[:prune] = true unless options.has_key?(:prune)
|
54
|
+
options[:allow_net] = true unless options.has_key?(:allow_net)
|
55
|
+
options[:allow_local] = true unless options.has_key?(:allow_local)
|
56
|
+
|
57
|
+
options[:prune] = !!options[:prune]
|
58
|
+
options[:allow_net] = !!options[:allow_net]
|
59
|
+
options[:allow_local] = !!options[:allow_local]
|
60
|
+
|
61
|
+
@parsley.parse(options)
|
38
62
|
end
|
39
63
|
end
|
data/parsley-ruby.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "parsley-ruby"
|
3
|
-
s.version = "0.
|
4
|
-
s.date = "
|
3
|
+
s.version = "0.3.0"
|
4
|
+
s.date = "2009-03-23"
|
5
5
|
s.summary = "Ruby binding for parsley"
|
6
6
|
s.email = "kyle@kylemaxwell.com"
|
7
7
|
s.homepage = "http://github.com/fizx/parsley-ruby"
|
data/test/test_parsley.rb
CHANGED
@@ -19,6 +19,13 @@ class TestParsley < Test::Unit::TestCase
|
|
19
19
|
out = @parsley.parse(:file => @home, :output => :xml)
|
20
20
|
end
|
21
21
|
|
22
|
+
def test_broken
|
23
|
+
@parsley = Parsley.new("hi" => "no-ns:match(h1)")
|
24
|
+
assert_raises(ParsleyError) {
|
25
|
+
@parsley.parse(:file => @page)
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
22
29
|
def test_simple
|
23
30
|
@parsley = Parsley.new("hi" => "h1")
|
24
31
|
assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @page))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fizx-parsley-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kyle Maxwell
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-03-23 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|