fizx-parsley-ruby 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/cparsley.c CHANGED
@@ -13,10 +13,8 @@
13
13
  #include <xml2json.h>
14
14
 
15
15
  VALUE _new(VALUE, VALUE, VALUE);
16
- VALUE _parse_file(VALUE, VALUE, VALUE, VALUE);
17
- VALUE _parse_string(VALUE, VALUE, VALUE, VALUE);
18
- VALUE _parse_doc(parsedParsleyPtr, VALUE);
19
- VALUE rubify_recurse(xmlNodePtr xml);
16
+ VALUE _parse(VALUE, VALUE);
17
+ VALUE _rb_set_user_agent(VALUE self, VALUE agent);
20
18
  VALUE c_parsley_err;
21
19
  VALUE c_parsley;
22
20
 
@@ -25,11 +23,12 @@ void Init_cparsley()
25
23
  c_parsley = rb_define_class("CParsley", rb_cObject);
26
24
  c_parsley_err = rb_define_class("ParsleyError", rb_eRuntimeError);
27
25
  rb_define_singleton_method(c_parsley, "new", _new, 2);
28
- rb_define_method(c_parsley, "parse_file", _parse_file, 3);
29
- rb_define_method(c_parsley, "parse_string", _parse_string, 3);
26
+ rb_define_singleton_method(c_parsley, "set_user_agent", _rb_set_user_agent, 1);
27
+ rb_define_method(c_parsley, "parse", _parse, 1);
30
28
  }
31
29
 
32
- VALUE _new(VALUE self, VALUE parsley, VALUE incl){
30
+ VALUE
31
+ _new(VALUE self, VALUE parsley, VALUE incl){
33
32
  parsleyPtr ptr = parsley_compile(STR2CSTR(parsley), STR2CSTR(incl));
34
33
  if(ptr->error != NULL) {
35
34
  rb_raise(c_parsley_err, ptr->error);
@@ -40,49 +39,15 @@ VALUE _new(VALUE self, VALUE parsley, VALUE incl){
40
39
  return Data_Wrap_Struct(c_parsley, 0, parsley_free, ptr);
41
40
  }
42
41
 
43
- VALUE _parse_file(VALUE self, VALUE name, VALUE input, VALUE output){
44
- parsleyPtr parsley;
45
- Data_Get_Struct(self, parsleyPtr, parsley);
46
- return _parse_doc(parsley_parse_file(parsley, STR2CSTR(name), input == ID2SYM(rb_intern("html")), 1), output);
42
+ VALUE
43
+ _rb_set_user_agent(VALUE self, VALUE agent) {
44
+ parsley_set_user_agent(STR2CSTR(agent));
45
+ return Qtrue;
47
46
  }
48
47
 
49
- VALUE _parse_string(VALUE self, VALUE string, VALUE input, VALUE output) {
50
- parsleyPtr parsley;
51
- Data_Get_Struct(self, parsleyPtr, parsley);
52
- char* cstr = STR2CSTR(string);
53
- return _parse_doc(parsley_parse_string(parsley, cstr, strlen(cstr), input == ID2SYM(rb_intern("html")), 1), output);
54
- }
55
48
 
56
- VALUE _parse_doc(parsedParsleyPtr ptr, VALUE type) {
57
- if(ptr->error != NULL || ptr->xml == NULL) {
58
- if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
59
- rb_raise(c_parsley_err, ptr->error);
60
- parsed_parsley_free(ptr);
61
- return Qnil;
62
- }
63
-
64
- VALUE output;
65
- if(type == ID2SYM(rb_intern("json"))) {
66
- struct json_object *json = xml2json(ptr->xml->children->children);
67
- char* str = json_object_to_json_string(json);
68
- output = rb_str_new2(str);
69
- json_object_put(json);
70
- } else if(type == ID2SYM(rb_intern("xml"))) {
71
- char* str;
72
- int size;
73
- xmlDocDumpMemory(ptr->xml, &str, &size);
74
- output = rb_str_new(str, size);
75
- } else {
76
- output = rubify_recurse(ptr->xml->children->children);
77
- if(output == NULL) output = Qnil;
78
- }
79
-
80
- parsed_parsley_free(ptr);
81
-
82
- return output;
83
- }
84
-
85
- VALUE rubify_recurse(xmlNodePtr xml) {
49
+ static VALUE
50
+ rubify_recurse(xmlNodePtr xml) {
86
51
  if(xml == NULL) return NULL;
87
52
  xmlNodePtr child;
88
53
  VALUE obj = Qnil;
@@ -115,4 +80,59 @@ VALUE rubify_recurse(xmlNodePtr xml) {
115
80
  }
116
81
  // inspect(obj);
117
82
  return obj;
83
+ }
84
+
85
+ static VALUE
86
+ _parse_doc(parsedParsleyPtr ptr, VALUE type) {
87
+ if(ptr->error != NULL || ptr->xml == NULL) {
88
+ if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
89
+ rb_raise(c_parsley_err, ptr->error);
90
+ parsed_parsley_free(ptr);
91
+ return Qnil;
92
+ }
93
+
94
+ VALUE output;
95
+ if(type == ID2SYM(rb_intern("json"))) {
96
+ struct json_object *json = xml2json(ptr->xml->children->children);
97
+ char* str = json_object_to_json_string(json);
98
+ output = rb_str_new2(str);
99
+ json_object_put(json);
100
+ } else if(type == ID2SYM(rb_intern("xml"))) {
101
+ xmlChar* str;
102
+ int size;
103
+ xmlDocDumpMemory(ptr->xml, &str, &size);
104
+ output = rb_str_new(str, size);
105
+ } else {
106
+ output = rubify_recurse(ptr->xml->children->children);
107
+ if((void*)output == NULL) output = Qnil;
108
+ }
109
+
110
+ parsed_parsley_free(ptr);
111
+
112
+ return output;
113
+ }
114
+
115
+ #define OPT(A) rb_hash_aref(options, ID2SYM(rb_intern(A)))
116
+ #define OPT_BOOL(A) (OPT(A) != Qnil && OPT(A) != Qfalse)
117
+ #define OPT_MATCH(A, B) (rb_hash_aref(options, ID2SYM(rb_intern(A))) == ID2SYM(rb_intern(B)))
118
+
119
+ VALUE _parse(VALUE self, VALUE options){
120
+ parsleyPtr parsley;
121
+ Data_Get_Struct(self, parsleyPtr, parsley);
122
+ int flags = 0;
123
+ char *base = NULL;
124
+ if(OPT_MATCH("input", "html")) flags |= PARSLEY_OPTIONS_HTML;
125
+ if(OPT_BOOL("prune")) flags |= PARSLEY_OPTIONS_PRUNE;
126
+ if(OPT_BOOL("allow_net")) flags |= PARSLEY_OPTIONS_ALLOW_NET;
127
+ if(OPT_BOOL("allow_local")) flags |= PARSLEY_OPTIONS_ALLOW_LOCAL;
128
+ if(OPT_BOOL("has_base")) base = STR2CSTR(OPT("base"));
129
+
130
+ // printf("prune: %d\nallow_net: %d\nallow_local: %d\nhas_base: %d\nflags: %d\n", OPT_BOOL("prune"), OPT_BOOL("allow_net"), OPT_BOOL("allow_local"), OPT_BOOL("has_base"), flags);
131
+
132
+ if(OPT_BOOL("is_file")) {
133
+ return _parse_doc(parsley_parse_file(parsley, STR2CSTR(OPT("file")), flags), OPT("output"));
134
+ } else {
135
+ char * str = STR2CSTR(OPT("string"));
136
+ return _parse_doc(parsley_parse_string(parsley, str, strlen(str), base, flags), OPT("output"));
137
+ }
118
138
  }
data/ext/extconf.rb CHANGED
@@ -58,6 +58,8 @@ end
58
58
  myincl = %w[/usr/local/include /opt/local/include /usr/include]
59
59
  mylib = %w[/usr/local/lib /opt/local/lib /usr/lib]
60
60
 
61
+ find_header('ruby.h', INCLUDEDIR, *myincl) or abort "need ruby.h"
62
+
61
63
  find_header('json/json.h', INCLUDEDIR, *myincl) or abort "need json/json.h"
62
64
  find_library('json', 'json_object_new_string', LIBDIR, *mylib) or abort "need libjson"
63
65
 
data/lib/parsley.rb CHANGED
@@ -4,6 +4,16 @@ require "json"
4
4
  require "thread"
5
5
 
6
6
  class Parsley
7
+
8
+ def self.user_agent=(agent)
9
+ @user_agent = agent
10
+ CParsley.set_user_agent(agent.to_s)
11
+ end
12
+
13
+ def self.user_agent
14
+ @user_agent
15
+ end
16
+
7
17
  def initialize(parsley, incl = "")
8
18
  if(parsley.is_a?(Hash))
9
19
  parsley = parsley.to_json
@@ -17,23 +27,37 @@ class Parsley
17
27
  # Valid options:
18
28
  #
19
29
  # Requires one of:
20
- # :file -- the input file path
30
+ # :file -- the input file path or url
21
31
  # :string -- the input string
22
32
  #
23
- # And optionally:
24
- # :input => [:xml, :html]
25
- # :output => [:json, :xml, :ruby]
26
- # :allow_empty -- If false, throws an exception if any value is empty.
27
- #
28
- # Defaults are :input => :html, :output => :ruby, :allow_empty => false
33
+ # And optionally (default is the first listed value):
34
+ # :input => [:html, :xml]
35
+ # :output => [:ruby, :json, :xml]
36
+ # :prune => [true, false]
37
+ # :base => "http://some/base/href"
38
+ # :allow_net => [true, false]
39
+ # :allow_local => [true, false]
29
40
  def parse(options = {})
30
- options[:file] || options[:string] || throw("must specify what to parse")
31
- options[:input] ||= :html
32
- options[:output]||= :ruby
33
- if options[:file]
34
- @parsley.parse_file options[:file], options[:input], options[:output]
35
- else
36
- @parsley.parse_string options[:string], options[:input], options[:output]
37
- end
41
+ options[:file] || options[:string] || (raise ParsleyError.new("must specify what to parse"))
42
+
43
+ options[:is_file] = !!options[:file]
44
+ options[:has_base] = !!options[:base]
45
+
46
+ options[:base] = options[:base].to_s
47
+ options[:file] = options[:file].to_s
48
+ options[:string] = options[:string].to_s
49
+
50
+ options[:input] ||= :html
51
+ options[:output] ||= :ruby
52
+
53
+ options[:prune] = true unless options.has_key?(:prune)
54
+ options[:allow_net] = true unless options.has_key?(:allow_net)
55
+ options[:allow_local] = true unless options.has_key?(:allow_local)
56
+
57
+ options[:prune] = !!options[:prune]
58
+ options[:allow_net] = !!options[:allow_net]
59
+ options[:allow_local] = !!options[:allow_local]
60
+
61
+ @parsley.parse(options)
38
62
  end
39
63
  end
data/parsley-ruby.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "parsley-ruby"
3
- s.version = "0.2.0"
4
- s.date = "2008-08-10"
3
+ s.version = "0.3.0"
4
+ s.date = "2009-03-23"
5
5
  s.summary = "Ruby binding for parsley"
6
6
  s.email = "kyle@kylemaxwell.com"
7
7
  s.homepage = "http://github.com/fizx/parsley-ruby"
data/test/test_parsley.rb CHANGED
@@ -19,6 +19,13 @@ class TestParsley < Test::Unit::TestCase
19
19
  out = @parsley.parse(:file => @home, :output => :xml)
20
20
  end
21
21
 
22
+ def test_broken
23
+ @parsley = Parsley.new("hi" => "no-ns:match(h1)")
24
+ assert_raises(ParsleyError) {
25
+ @parsley.parse(:file => @page)
26
+ }
27
+ end
28
+
22
29
  def test_simple
23
30
  @parsley = Parsley.new("hi" => "h1")
24
31
  assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @page))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fizx-parsley-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kyle Maxwell
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-08-10 00:00:00 -07:00
12
+ date: 2009-03-23 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency