fizx-parsley-ruby 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/cparsley.c CHANGED
@@ -13,10 +13,8 @@
13
13
  #include <xml2json.h>
14
14
 
15
15
  VALUE _new(VALUE, VALUE, VALUE);
16
- VALUE _parse_file(VALUE, VALUE, VALUE, VALUE);
17
- VALUE _parse_string(VALUE, VALUE, VALUE, VALUE);
18
- VALUE _parse_doc(parsedParsleyPtr, VALUE);
19
- VALUE rubify_recurse(xmlNodePtr xml);
16
+ VALUE _parse(VALUE, VALUE);
17
+ VALUE _rb_set_user_agent(VALUE self, VALUE agent);
20
18
  VALUE c_parsley_err;
21
19
  VALUE c_parsley;
22
20
 
@@ -25,11 +23,12 @@ void Init_cparsley()
25
23
  c_parsley = rb_define_class("CParsley", rb_cObject);
26
24
  c_parsley_err = rb_define_class("ParsleyError", rb_eRuntimeError);
27
25
  rb_define_singleton_method(c_parsley, "new", _new, 2);
28
- rb_define_method(c_parsley, "parse_file", _parse_file, 3);
29
- rb_define_method(c_parsley, "parse_string", _parse_string, 3);
26
+ rb_define_singleton_method(c_parsley, "set_user_agent", _rb_set_user_agent, 1);
27
+ rb_define_method(c_parsley, "parse", _parse, 1);
30
28
  }
31
29
 
32
- VALUE _new(VALUE self, VALUE parsley, VALUE incl){
30
+ VALUE
31
+ _new(VALUE self, VALUE parsley, VALUE incl){
33
32
  parsleyPtr ptr = parsley_compile(STR2CSTR(parsley), STR2CSTR(incl));
34
33
  if(ptr->error != NULL) {
35
34
  rb_raise(c_parsley_err, ptr->error);
@@ -40,49 +39,15 @@ VALUE _new(VALUE self, VALUE parsley, VALUE incl){
40
39
  return Data_Wrap_Struct(c_parsley, 0, parsley_free, ptr);
41
40
  }
42
41
 
43
- VALUE _parse_file(VALUE self, VALUE name, VALUE input, VALUE output){
44
- parsleyPtr parsley;
45
- Data_Get_Struct(self, parsleyPtr, parsley);
46
- return _parse_doc(parsley_parse_file(parsley, STR2CSTR(name), input == ID2SYM(rb_intern("html")), 1), output);
42
+ VALUE
43
+ _rb_set_user_agent(VALUE self, VALUE agent) {
44
+ parsley_set_user_agent(STR2CSTR(agent));
45
+ return Qtrue;
47
46
  }
48
47
 
49
- VALUE _parse_string(VALUE self, VALUE string, VALUE input, VALUE output) {
50
- parsleyPtr parsley;
51
- Data_Get_Struct(self, parsleyPtr, parsley);
52
- char* cstr = STR2CSTR(string);
53
- return _parse_doc(parsley_parse_string(parsley, cstr, strlen(cstr), input == ID2SYM(rb_intern("html")), 1), output);
54
- }
55
48
 
56
- VALUE _parse_doc(parsedParsleyPtr ptr, VALUE type) {
57
- if(ptr->error != NULL || ptr->xml == NULL) {
58
- if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
59
- rb_raise(c_parsley_err, ptr->error);
60
- parsed_parsley_free(ptr);
61
- return Qnil;
62
- }
63
-
64
- VALUE output;
65
- if(type == ID2SYM(rb_intern("json"))) {
66
- struct json_object *json = xml2json(ptr->xml->children->children);
67
- char* str = json_object_to_json_string(json);
68
- output = rb_str_new2(str);
69
- json_object_put(json);
70
- } else if(type == ID2SYM(rb_intern("xml"))) {
71
- char* str;
72
- int size;
73
- xmlDocDumpMemory(ptr->xml, &str, &size);
74
- output = rb_str_new(str, size);
75
- } else {
76
- output = rubify_recurse(ptr->xml->children->children);
77
- if(output == NULL) output = Qnil;
78
- }
79
-
80
- parsed_parsley_free(ptr);
81
-
82
- return output;
83
- }
84
-
85
- VALUE rubify_recurse(xmlNodePtr xml) {
49
+ static VALUE
50
+ rubify_recurse(xmlNodePtr xml) {
86
51
  if(xml == NULL) return NULL;
87
52
  xmlNodePtr child;
88
53
  VALUE obj = Qnil;
@@ -115,4 +80,59 @@ VALUE rubify_recurse(xmlNodePtr xml) {
115
80
  }
116
81
  // inspect(obj);
117
82
  return obj;
83
+ }
84
+
85
+ static VALUE
86
+ _parse_doc(parsedParsleyPtr ptr, VALUE type) {
87
+ if(ptr->error != NULL || ptr->xml == NULL) {
88
+ if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
89
+ rb_raise(c_parsley_err, ptr->error);
90
+ parsed_parsley_free(ptr);
91
+ return Qnil;
92
+ }
93
+
94
+ VALUE output;
95
+ if(type == ID2SYM(rb_intern("json"))) {
96
+ struct json_object *json = xml2json(ptr->xml->children->children);
97
+ char* str = json_object_to_json_string(json);
98
+ output = rb_str_new2(str);
99
+ json_object_put(json);
100
+ } else if(type == ID2SYM(rb_intern("xml"))) {
101
+ xmlChar* str;
102
+ int size;
103
+ xmlDocDumpMemory(ptr->xml, &str, &size);
104
+ output = rb_str_new(str, size);
105
+ } else {
106
+ output = rubify_recurse(ptr->xml->children->children);
107
+ if((void*)output == NULL) output = Qnil;
108
+ }
109
+
110
+ parsed_parsley_free(ptr);
111
+
112
+ return output;
113
+ }
114
+
115
+ #define OPT(A) rb_hash_aref(options, ID2SYM(rb_intern(A)))
116
+ #define OPT_BOOL(A) (OPT(A) != Qnil && OPT(A) != Qfalse)
117
+ #define OPT_MATCH(A, B) (rb_hash_aref(options, ID2SYM(rb_intern(A))) == ID2SYM(rb_intern(B)))
118
+
119
+ VALUE _parse(VALUE self, VALUE options){
120
+ parsleyPtr parsley;
121
+ Data_Get_Struct(self, parsleyPtr, parsley);
122
+ int flags = 0;
123
+ char *base = NULL;
124
+ if(OPT_MATCH("input", "html")) flags |= PARSLEY_OPTIONS_HTML;
125
+ if(OPT_BOOL("prune")) flags |= PARSLEY_OPTIONS_PRUNE;
126
+ if(OPT_BOOL("allow_net")) flags |= PARSLEY_OPTIONS_ALLOW_NET;
127
+ if(OPT_BOOL("allow_local")) flags |= PARSLEY_OPTIONS_ALLOW_LOCAL;
128
+ if(OPT_BOOL("has_base")) base = STR2CSTR(OPT("base"));
129
+
130
+ // printf("prune: %d\nallow_net: %d\nallow_local: %d\nhas_base: %d\nflags: %d\n", OPT_BOOL("prune"), OPT_BOOL("allow_net"), OPT_BOOL("allow_local"), OPT_BOOL("has_base"), flags);
131
+
132
+ if(OPT_BOOL("is_file")) {
133
+ return _parse_doc(parsley_parse_file(parsley, STR2CSTR(OPT("file")), flags), OPT("output"));
134
+ } else {
135
+ char * str = STR2CSTR(OPT("string"));
136
+ return _parse_doc(parsley_parse_string(parsley, str, strlen(str), base, flags), OPT("output"));
137
+ }
118
138
  }
data/ext/extconf.rb CHANGED
@@ -58,6 +58,8 @@ end
58
58
  myincl = %w[/usr/local/include /opt/local/include /usr/include]
59
59
  mylib = %w[/usr/local/lib /opt/local/lib /usr/lib]
60
60
 
61
+ find_header('ruby.h', INCLUDEDIR, *myincl) or abort "need ruby.h"
62
+
61
63
  find_header('json/json.h', INCLUDEDIR, *myincl) or abort "need json/json.h"
62
64
  find_library('json', 'json_object_new_string', LIBDIR, *mylib) or abort "need libjson"
63
65
 
data/lib/parsley.rb CHANGED
@@ -4,6 +4,16 @@ require "json"
4
4
  require "thread"
5
5
 
6
6
  class Parsley
7
+
8
+ def self.user_agent=(agent)
9
+ @user_agent = agent
10
+ CParsley.set_user_agent(agent.to_s)
11
+ end
12
+
13
+ def self.user_agent
14
+ @user_agent
15
+ end
16
+
7
17
  def initialize(parsley, incl = "")
8
18
  if(parsley.is_a?(Hash))
9
19
  parsley = parsley.to_json
@@ -17,23 +27,37 @@ class Parsley
17
27
  # Valid options:
18
28
  #
19
29
  # Requires one of:
20
- # :file -- the input file path
30
+ # :file -- the input file path or url
21
31
  # :string -- the input string
22
32
  #
23
- # And optionally:
24
- # :input => [:xml, :html]
25
- # :output => [:json, :xml, :ruby]
26
- # :allow_empty -- If false, throws an exception if any value is empty.
27
- #
28
- # Defaults are :input => :html, :output => :ruby, :allow_empty => false
33
+ # And optionally (default is the first listed value):
34
+ # :input => [:html, :xml]
35
+ # :output => [:ruby, :json, :xml]
36
+ # :prune => [true, false]
37
+ # :base => "http://some/base/href"
38
+ # :allow_net => [true, false]
39
+ # :allow_local => [true, false]
29
40
  def parse(options = {})
30
- options[:file] || options[:string] || throw("must specify what to parse")
31
- options[:input] ||= :html
32
- options[:output]||= :ruby
33
- if options[:file]
34
- @parsley.parse_file options[:file], options[:input], options[:output]
35
- else
36
- @parsley.parse_string options[:string], options[:input], options[:output]
37
- end
41
+ options[:file] || options[:string] || (raise ParsleyError.new("must specify what to parse"))
42
+
43
+ options[:is_file] = !!options[:file]
44
+ options[:has_base] = !!options[:base]
45
+
46
+ options[:base] = options[:base].to_s
47
+ options[:file] = options[:file].to_s
48
+ options[:string] = options[:string].to_s
49
+
50
+ options[:input] ||= :html
51
+ options[:output] ||= :ruby
52
+
53
+ options[:prune] = true unless options.has_key?(:prune)
54
+ options[:allow_net] = true unless options.has_key?(:allow_net)
55
+ options[:allow_local] = true unless options.has_key?(:allow_local)
56
+
57
+ options[:prune] = !!options[:prune]
58
+ options[:allow_net] = !!options[:allow_net]
59
+ options[:allow_local] = !!options[:allow_local]
60
+
61
+ @parsley.parse(options)
38
62
  end
39
63
  end
data/parsley-ruby.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "parsley-ruby"
3
- s.version = "0.2.0"
4
- s.date = "2008-08-10"
3
+ s.version = "0.3.0"
4
+ s.date = "2009-03-23"
5
5
  s.summary = "Ruby binding for parsley"
6
6
  s.email = "kyle@kylemaxwell.com"
7
7
  s.homepage = "http://github.com/fizx/parsley-ruby"
data/test/test_parsley.rb CHANGED
@@ -19,6 +19,13 @@ class TestParsley < Test::Unit::TestCase
19
19
  out = @parsley.parse(:file => @home, :output => :xml)
20
20
  end
21
21
 
22
+ def test_broken
23
+ @parsley = Parsley.new("hi" => "no-ns:match(h1)")
24
+ assert_raises(ParsleyError) {
25
+ @parsley.parse(:file => @page)
26
+ }
27
+ end
28
+
22
29
  def test_simple
23
30
  @parsley = Parsley.new("hi" => "h1")
24
31
  assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @page))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fizx-parsley-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kyle Maxwell
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-08-10 00:00:00 -07:00
12
+ date: 2009-03-23 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency