fizx-parsley-ruby 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/cparsley.c +67 -47
- data/ext/extconf.rb +2 -0
- data/lib/parsley.rb +39 -15
- data/parsley-ruby.gemspec +2 -2
- data/test/test_parsley.rb +7 -0
- metadata +2 -2
data/ext/cparsley.c
CHANGED
@@ -13,10 +13,8 @@
|
|
13
13
|
#include <xml2json.h>
|
14
14
|
|
15
15
|
VALUE _new(VALUE, VALUE, VALUE);
|
16
|
-
VALUE
|
17
|
-
VALUE
|
18
|
-
VALUE _parse_doc(parsedParsleyPtr, VALUE);
|
19
|
-
VALUE rubify_recurse(xmlNodePtr xml);
|
16
|
+
VALUE _parse(VALUE, VALUE);
|
17
|
+
VALUE _rb_set_user_agent(VALUE self, VALUE agent);
|
20
18
|
VALUE c_parsley_err;
|
21
19
|
VALUE c_parsley;
|
22
20
|
|
@@ -25,11 +23,12 @@ void Init_cparsley()
|
|
25
23
|
c_parsley = rb_define_class("CParsley", rb_cObject);
|
26
24
|
c_parsley_err = rb_define_class("ParsleyError", rb_eRuntimeError);
|
27
25
|
rb_define_singleton_method(c_parsley, "new", _new, 2);
|
28
|
-
|
29
|
-
rb_define_method(c_parsley, "
|
26
|
+
rb_define_singleton_method(c_parsley, "set_user_agent", _rb_set_user_agent, 1);
|
27
|
+
rb_define_method(c_parsley, "parse", _parse, 1);
|
30
28
|
}
|
31
29
|
|
32
|
-
VALUE
|
30
|
+
VALUE
|
31
|
+
_new(VALUE self, VALUE parsley, VALUE incl){
|
33
32
|
parsleyPtr ptr = parsley_compile(STR2CSTR(parsley), STR2CSTR(incl));
|
34
33
|
if(ptr->error != NULL) {
|
35
34
|
rb_raise(c_parsley_err, ptr->error);
|
@@ -40,49 +39,15 @@ VALUE _new(VALUE self, VALUE parsley, VALUE incl){
|
|
40
39
|
return Data_Wrap_Struct(c_parsley, 0, parsley_free, ptr);
|
41
40
|
}
|
42
41
|
|
43
|
-
VALUE
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
VALUE
|
43
|
+
_rb_set_user_agent(VALUE self, VALUE agent) {
|
44
|
+
parsley_set_user_agent(STR2CSTR(agent));
|
45
|
+
return Qtrue;
|
47
46
|
}
|
48
47
|
|
49
|
-
VALUE _parse_string(VALUE self, VALUE string, VALUE input, VALUE output) {
|
50
|
-
parsleyPtr parsley;
|
51
|
-
Data_Get_Struct(self, parsleyPtr, parsley);
|
52
|
-
char* cstr = STR2CSTR(string);
|
53
|
-
return _parse_doc(parsley_parse_string(parsley, cstr, strlen(cstr), input == ID2SYM(rb_intern("html")), 1), output);
|
54
|
-
}
|
55
48
|
|
56
|
-
|
57
|
-
|
58
|
-
if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
|
59
|
-
rb_raise(c_parsley_err, ptr->error);
|
60
|
-
parsed_parsley_free(ptr);
|
61
|
-
return Qnil;
|
62
|
-
}
|
63
|
-
|
64
|
-
VALUE output;
|
65
|
-
if(type == ID2SYM(rb_intern("json"))) {
|
66
|
-
struct json_object *json = xml2json(ptr->xml->children->children);
|
67
|
-
char* str = json_object_to_json_string(json);
|
68
|
-
output = rb_str_new2(str);
|
69
|
-
json_object_put(json);
|
70
|
-
} else if(type == ID2SYM(rb_intern("xml"))) {
|
71
|
-
char* str;
|
72
|
-
int size;
|
73
|
-
xmlDocDumpMemory(ptr->xml, &str, &size);
|
74
|
-
output = rb_str_new(str, size);
|
75
|
-
} else {
|
76
|
-
output = rubify_recurse(ptr->xml->children->children);
|
77
|
-
if(output == NULL) output = Qnil;
|
78
|
-
}
|
79
|
-
|
80
|
-
parsed_parsley_free(ptr);
|
81
|
-
|
82
|
-
return output;
|
83
|
-
}
|
84
|
-
|
85
|
-
VALUE rubify_recurse(xmlNodePtr xml) {
|
49
|
+
static VALUE
|
50
|
+
rubify_recurse(xmlNodePtr xml) {
|
86
51
|
if(xml == NULL) return NULL;
|
87
52
|
xmlNodePtr child;
|
88
53
|
VALUE obj = Qnil;
|
@@ -115,4 +80,59 @@ VALUE rubify_recurse(xmlNodePtr xml) {
|
|
115
80
|
}
|
116
81
|
// inspect(obj);
|
117
82
|
return obj;
|
83
|
+
}
|
84
|
+
|
85
|
+
static VALUE
|
86
|
+
_parse_doc(parsedParsleyPtr ptr, VALUE type) {
|
87
|
+
if(ptr->error != NULL || ptr->xml == NULL) {
|
88
|
+
if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
|
89
|
+
rb_raise(c_parsley_err, ptr->error);
|
90
|
+
parsed_parsley_free(ptr);
|
91
|
+
return Qnil;
|
92
|
+
}
|
93
|
+
|
94
|
+
VALUE output;
|
95
|
+
if(type == ID2SYM(rb_intern("json"))) {
|
96
|
+
struct json_object *json = xml2json(ptr->xml->children->children);
|
97
|
+
char* str = json_object_to_json_string(json);
|
98
|
+
output = rb_str_new2(str);
|
99
|
+
json_object_put(json);
|
100
|
+
} else if(type == ID2SYM(rb_intern("xml"))) {
|
101
|
+
xmlChar* str;
|
102
|
+
int size;
|
103
|
+
xmlDocDumpMemory(ptr->xml, &str, &size);
|
104
|
+
output = rb_str_new(str, size);
|
105
|
+
} else {
|
106
|
+
output = rubify_recurse(ptr->xml->children->children);
|
107
|
+
if((void*)output == NULL) output = Qnil;
|
108
|
+
}
|
109
|
+
|
110
|
+
parsed_parsley_free(ptr);
|
111
|
+
|
112
|
+
return output;
|
113
|
+
}
|
114
|
+
|
115
|
+
#define OPT(A) rb_hash_aref(options, ID2SYM(rb_intern(A)))
|
116
|
+
#define OPT_BOOL(A) (OPT(A) != Qnil && OPT(A) != Qfalse)
|
117
|
+
#define OPT_MATCH(A, B) (rb_hash_aref(options, ID2SYM(rb_intern(A))) == ID2SYM(rb_intern(B)))
|
118
|
+
|
119
|
+
VALUE _parse(VALUE self, VALUE options){
|
120
|
+
parsleyPtr parsley;
|
121
|
+
Data_Get_Struct(self, parsleyPtr, parsley);
|
122
|
+
int flags = 0;
|
123
|
+
char *base = NULL;
|
124
|
+
if(OPT_MATCH("input", "html")) flags |= PARSLEY_OPTIONS_HTML;
|
125
|
+
if(OPT_BOOL("prune")) flags |= PARSLEY_OPTIONS_PRUNE;
|
126
|
+
if(OPT_BOOL("allow_net")) flags |= PARSLEY_OPTIONS_ALLOW_NET;
|
127
|
+
if(OPT_BOOL("allow_local")) flags |= PARSLEY_OPTIONS_ALLOW_LOCAL;
|
128
|
+
if(OPT_BOOL("has_base")) base = STR2CSTR(OPT("base"));
|
129
|
+
|
130
|
+
// printf("prune: %d\nallow_net: %d\nallow_local: %d\nhas_base: %d\nflags: %d\n", OPT_BOOL("prune"), OPT_BOOL("allow_net"), OPT_BOOL("allow_local"), OPT_BOOL("has_base"), flags);
|
131
|
+
|
132
|
+
if(OPT_BOOL("is_file")) {
|
133
|
+
return _parse_doc(parsley_parse_file(parsley, STR2CSTR(OPT("file")), flags), OPT("output"));
|
134
|
+
} else {
|
135
|
+
char * str = STR2CSTR(OPT("string"));
|
136
|
+
return _parse_doc(parsley_parse_string(parsley, str, strlen(str), base, flags), OPT("output"));
|
137
|
+
}
|
118
138
|
}
|
data/ext/extconf.rb
CHANGED
@@ -58,6 +58,8 @@ end
|
|
58
58
|
myincl = %w[/usr/local/include /opt/local/include /usr/include]
|
59
59
|
mylib = %w[/usr/local/lib /opt/local/lib /usr/lib]
|
60
60
|
|
61
|
+
find_header('ruby.h', INCLUDEDIR, *myincl) or abort "need ruby.h"
|
62
|
+
|
61
63
|
find_header('json/json.h', INCLUDEDIR, *myincl) or abort "need json/json.h"
|
62
64
|
find_library('json', 'json_object_new_string', LIBDIR, *mylib) or abort "need libjson"
|
63
65
|
|
data/lib/parsley.rb
CHANGED
@@ -4,6 +4,16 @@ require "json"
|
|
4
4
|
require "thread"
|
5
5
|
|
6
6
|
class Parsley
|
7
|
+
|
8
|
+
def self.user_agent=(agent)
|
9
|
+
@user_agent = agent
|
10
|
+
CParsley.set_user_agent(agent.to_s)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.user_agent
|
14
|
+
@user_agent
|
15
|
+
end
|
16
|
+
|
7
17
|
def initialize(parsley, incl = "")
|
8
18
|
if(parsley.is_a?(Hash))
|
9
19
|
parsley = parsley.to_json
|
@@ -17,23 +27,37 @@ class Parsley
|
|
17
27
|
# Valid options:
|
18
28
|
#
|
19
29
|
# Requires one of:
|
20
|
-
# :file -- the input file path
|
30
|
+
# :file -- the input file path or url
|
21
31
|
# :string -- the input string
|
22
32
|
#
|
23
|
-
# And optionally:
|
24
|
-
# :input => [:
|
25
|
-
# :output => [:
|
26
|
-
# :
|
27
|
-
#
|
28
|
-
#
|
33
|
+
# And optionally (default is the first listed value):
|
34
|
+
# :input => [:html, :xml]
|
35
|
+
# :output => [:ruby, :json, :xml]
|
36
|
+
# :prune => [true, false]
|
37
|
+
# :base => "http://some/base/href"
|
38
|
+
# :allow_net => [true, false]
|
39
|
+
# :allow_local => [true, false]
|
29
40
|
def parse(options = {})
|
30
|
-
options[:file] || options[:string] ||
|
31
|
-
|
32
|
-
options[:
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
41
|
+
options[:file] || options[:string] || (raise ParsleyError.new("must specify what to parse"))
|
42
|
+
|
43
|
+
options[:is_file] = !!options[:file]
|
44
|
+
options[:has_base] = !!options[:base]
|
45
|
+
|
46
|
+
options[:base] = options[:base].to_s
|
47
|
+
options[:file] = options[:file].to_s
|
48
|
+
options[:string] = options[:string].to_s
|
49
|
+
|
50
|
+
options[:input] ||= :html
|
51
|
+
options[:output] ||= :ruby
|
52
|
+
|
53
|
+
options[:prune] = true unless options.has_key?(:prune)
|
54
|
+
options[:allow_net] = true unless options.has_key?(:allow_net)
|
55
|
+
options[:allow_local] = true unless options.has_key?(:allow_local)
|
56
|
+
|
57
|
+
options[:prune] = !!options[:prune]
|
58
|
+
options[:allow_net] = !!options[:allow_net]
|
59
|
+
options[:allow_local] = !!options[:allow_local]
|
60
|
+
|
61
|
+
@parsley.parse(options)
|
38
62
|
end
|
39
63
|
end
|
data/parsley-ruby.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "parsley-ruby"
|
3
|
-
s.version = "0.
|
4
|
-
s.date = "
|
3
|
+
s.version = "0.3.0"
|
4
|
+
s.date = "2009-03-23"
|
5
5
|
s.summary = "Ruby binding for parsley"
|
6
6
|
s.email = "kyle@kylemaxwell.com"
|
7
7
|
s.homepage = "http://github.com/fizx/parsley-ruby"
|
data/test/test_parsley.rb
CHANGED
@@ -19,6 +19,13 @@ class TestParsley < Test::Unit::TestCase
|
|
19
19
|
out = @parsley.parse(:file => @home, :output => :xml)
|
20
20
|
end
|
21
21
|
|
22
|
+
def test_broken
|
23
|
+
@parsley = Parsley.new("hi" => "no-ns:match(h1)")
|
24
|
+
assert_raises(ParsleyError) {
|
25
|
+
@parsley.parse(:file => @page)
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
22
29
|
def test_simple
|
23
30
|
@parsley = Parsley.new("hi" => "h1")
|
24
31
|
assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @page))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fizx-parsley-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kyle Maxwell
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-03-23 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|