le1t0-parsley-ruby 0.4.5.001
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +28 -0
- data/CHANGELOG +8 -0
- data/README +32 -0
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/ext/cparsley.c +140 -0
- data/ext/extconf.rb +8 -0
- data/lib/parsley.rb +84 -0
- data/test/test_parsley.rb +116 -0
- data/test/yelp-benchmark.rb +53 -0
- data/test/yelp-home.html +1004 -0
- data/test/yelp-home.let +6 -0
- data/test/yelp.html +2329 -0
- metadata +376 -0
data/.gitignore
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
.libs/
|
|
2
|
+
*.o
|
|
3
|
+
*.lo
|
|
4
|
+
dexterc
|
|
5
|
+
dexter
|
|
6
|
+
parsleyc
|
|
7
|
+
parsley
|
|
8
|
+
.deps/
|
|
9
|
+
Makefile
|
|
10
|
+
y.tab.c
|
|
11
|
+
autom4te.cache/
|
|
12
|
+
autoscan.log
|
|
13
|
+
config.log
|
|
14
|
+
configure.scan
|
|
15
|
+
parser.c
|
|
16
|
+
scanner.c
|
|
17
|
+
libparsley.la
|
|
18
|
+
parser.h
|
|
19
|
+
test.log
|
|
20
|
+
parsley*.gem
|
|
21
|
+
ext/cparsley.bundle
|
|
22
|
+
ext/cparsley.so
|
|
23
|
+
ext/Makefile
|
|
24
|
+
ext/conftest.dSYM/
|
|
25
|
+
work
|
|
26
|
+
ext/mkmf.log
|
|
27
|
+
pkg
|
|
28
|
+
le1t0-parsley-ruby.gemspec
|
data/CHANGELOG
ADDED
data/README
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
ABOUT
|
|
2
|
+
|
|
3
|
+
Ruby bindings for Parsley.
|
|
4
|
+
|
|
5
|
+
INSTALLATION
|
|
6
|
+
|
|
7
|
+
= Get Parsley and Dependancies =
|
|
8
|
+
|
|
9
|
+
Download Parsley from http://github.com/fizx/parsley/tree/master following the installation directions located at http://github.com/fizx/parsley/blob/master/INSTALL
|
|
10
|
+
|
|
11
|
+
= Install parsley-ruby =
|
|
12
|
+
|
|
13
|
+
From source:
|
|
14
|
+
sudo rake install
|
|
15
|
+
|
|
16
|
+
From GitHub: DEPRECATED!
|
|
17
|
+
|
|
18
|
+
From GemCutter
|
|
19
|
+
|
|
20
|
+
Run the following if you haven't already:
|
|
21
|
+
gem sources -a http://gemcutter.org
|
|
22
|
+
Install the gem:
|
|
23
|
+
sudo gem install parsley-ruby
|
|
24
|
+
|
|
25
|
+
PARSLETS.COM INTEGRATION
|
|
26
|
+
|
|
27
|
+
We also recommend installing the free online_parselets rubygem in order to use other people's parselets and to share your own:
|
|
28
|
+
Run the following if you haven't already:
|
|
29
|
+
gem sources -a http://gems.github.com
|
|
30
|
+
Install the gem:
|
|
31
|
+
sudo gem install iterationlabs-online_parslets
|
|
32
|
+
|
data/Rakefile
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'rake'
|
|
3
|
+
|
|
4
|
+
begin
|
|
5
|
+
require 'jeweler'
|
|
6
|
+
Jeweler::Tasks.new do |gem|
|
|
7
|
+
gem.name = "le1t0-parsley-ruby"
|
|
8
|
+
gem.summary = "Ruby binding for parsley"
|
|
9
|
+
gem.description = "XML/HTML Parser"
|
|
10
|
+
gem.email = "dev@ewout.to"
|
|
11
|
+
gem.homepage = "http://github.com/le1t0/parsley-ruby"
|
|
12
|
+
gem.authors = ["Le1t0"]
|
|
13
|
+
gem.add_dependency("json", ["> 0.0.0"])
|
|
14
|
+
gem.require_paths = ["lib", "ext"]
|
|
15
|
+
gem.extensions = "ext/extconf.rb" end
|
|
16
|
+
Jeweler::GemcutterTasks.new
|
|
17
|
+
rescue LoadError
|
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
require 'rake/testtask'
|
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
|
23
|
+
test.libs << 'lib' << 'test'
|
|
24
|
+
test.pattern = 'test/**/*_test.rb'
|
|
25
|
+
test.verbose = true
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
begin
|
|
29
|
+
require 'rcov/rcovtask'
|
|
30
|
+
Rcov::RcovTask.new do |test|
|
|
31
|
+
test.libs << 'test'
|
|
32
|
+
test.pattern = 'test/**/*_test.rb'
|
|
33
|
+
test.verbose = true
|
|
34
|
+
end
|
|
35
|
+
rescue LoadError
|
|
36
|
+
task :rcov do
|
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
task :test => :check_dependencies
|
|
42
|
+
|
|
43
|
+
task :default => :test
|
|
44
|
+
|
|
45
|
+
require 'rake/rdoctask'
|
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
|
47
|
+
if File.exist?('VERSION')
|
|
48
|
+
version = File.read('VERSION')
|
|
49
|
+
else
|
|
50
|
+
version = ""
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
rdoc.rdoc_dir = 'rdoc'
|
|
54
|
+
rdoc.title = "robots #{version}"
|
|
55
|
+
rdoc.rdoc_files.include('README*')
|
|
56
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
57
|
+
end
|
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.4.5.001
|
data/ext/cparsley.c
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
#include "ruby.h"
|
|
2
|
+
#include <stdio.h>
|
|
3
|
+
#include <libxslt/xslt.h>
|
|
4
|
+
#include <libexslt/exslt.h>
|
|
5
|
+
#include <libxslt/xsltInternals.h>
|
|
6
|
+
#include <libxslt/transform.h>
|
|
7
|
+
#include <libxml/parser.h>
|
|
8
|
+
#include <libxml/HTMLparser.h>
|
|
9
|
+
#include <libxml/HTMLtree.h>
|
|
10
|
+
#include <libxml/xmlwriter.h>
|
|
11
|
+
#include <parsley.h>
|
|
12
|
+
#include <json/json.h>
|
|
13
|
+
#include <xml2json.h>
|
|
14
|
+
|
|
15
|
+
VALUE _new(VALUE, VALUE, VALUE);
|
|
16
|
+
VALUE _parse(VALUE, VALUE);
|
|
17
|
+
VALUE _rb_set_user_agent(VALUE self, VALUE agent);
|
|
18
|
+
VALUE c_parsley_err;
|
|
19
|
+
VALUE c_parsley;
|
|
20
|
+
|
|
21
|
+
void Init_cparsley()
|
|
22
|
+
{
|
|
23
|
+
c_parsley = rb_define_class("CParsley", rb_cObject);
|
|
24
|
+
c_parsley_err = rb_define_class("ParsleyError", rb_eRuntimeError);
|
|
25
|
+
rb_define_singleton_method(c_parsley, "new", _new, 2);
|
|
26
|
+
rb_define_singleton_method(c_parsley, "set_user_agent", _rb_set_user_agent, 1);
|
|
27
|
+
rb_define_method(c_parsley, "parse", _parse, 1);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
VALUE
|
|
31
|
+
_new(VALUE self, VALUE parsley, VALUE incl){
|
|
32
|
+
parsleyPtr ptr = parsley_compile(STR2CSTR(parsley), STR2CSTR(incl));
|
|
33
|
+
if(ptr->error != NULL) {
|
|
34
|
+
rb_raise(c_parsley_err, ptr->error);
|
|
35
|
+
parsley_free(ptr);
|
|
36
|
+
return Qnil;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return Data_Wrap_Struct(c_parsley, 0, parsley_free, ptr);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
VALUE
|
|
43
|
+
_rb_set_user_agent(VALUE self, VALUE agent) {
|
|
44
|
+
parsley_set_user_agent(STR2CSTR(agent));
|
|
45
|
+
return Qtrue;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
static VALUE
|
|
50
|
+
rubify_recurse(xmlNodePtr xml) {
|
|
51
|
+
if(xml == NULL) return NULL;
|
|
52
|
+
xmlNodePtr child;
|
|
53
|
+
VALUE obj = Qnil;
|
|
54
|
+
|
|
55
|
+
switch(xml->type) {
|
|
56
|
+
case XML_ELEMENT_NODE:
|
|
57
|
+
child = xml->children;
|
|
58
|
+
if(xml->ns == NULL) {
|
|
59
|
+
child = xml;
|
|
60
|
+
obj = rb_hash_new();
|
|
61
|
+
while(child != NULL) {
|
|
62
|
+
rb_hash_aset(obj, rb_str_new2(child->name), rubify_recurse(child->children));
|
|
63
|
+
child = child->next;
|
|
64
|
+
}
|
|
65
|
+
} else if(!strcmp(xml->ns->prefix, "parsley")) {
|
|
66
|
+
if(!strcmp(xml->name, "groups")) {
|
|
67
|
+
obj = rb_ary_new();
|
|
68
|
+
while(child != NULL) {
|
|
69
|
+
rb_ary_push(obj, rubify_recurse(child->children));
|
|
70
|
+
child = child->next;
|
|
71
|
+
}
|
|
72
|
+
} else if(!strcmp(xml->name, "group")) {
|
|
73
|
+
// Implicitly handled by parsley:groups handler
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
break;
|
|
77
|
+
case XML_TEXT_NODE:
|
|
78
|
+
obj = rb_str_new2(xml->content);
|
|
79
|
+
break;
|
|
80
|
+
}
|
|
81
|
+
// inspect(obj);
|
|
82
|
+
return obj;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
static VALUE
|
|
86
|
+
_parse_doc(parsedParsleyPtr ptr, VALUE type) {
|
|
87
|
+
if(ptr->error != NULL || ptr->xml == NULL) {
|
|
88
|
+
if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
|
|
89
|
+
rb_raise(c_parsley_err, ptr->error);
|
|
90
|
+
parsed_parsley_free(ptr);
|
|
91
|
+
return Qnil;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
VALUE output;
|
|
95
|
+
if(type == ID2SYM(rb_intern("json"))) {
|
|
96
|
+
struct json_object *json = xml2json(ptr->xml->children->children);
|
|
97
|
+
char* str = json_object_to_json_string(json);
|
|
98
|
+
output = rb_str_new2(str);
|
|
99
|
+
json_object_put(json);
|
|
100
|
+
} else if(type == ID2SYM(rb_intern("xml"))) {
|
|
101
|
+
xmlChar* str;
|
|
102
|
+
int size;
|
|
103
|
+
xmlDocDumpMemory(ptr->xml, &str, &size);
|
|
104
|
+
output = rb_str_new(str, size);
|
|
105
|
+
} else {
|
|
106
|
+
output = rubify_recurse(ptr->xml->children->children);
|
|
107
|
+
if((void*)output == NULL) output = Qnil;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
parsed_parsley_free(ptr);
|
|
111
|
+
|
|
112
|
+
return output;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
#define OPT(A) rb_hash_aref(options, ID2SYM(rb_intern(A)))
|
|
116
|
+
#define OPT_BOOL(A) (OPT(A) != Qnil && OPT(A) != Qfalse)
|
|
117
|
+
#define OPT_MATCH(A, B) (rb_hash_aref(options, ID2SYM(rb_intern(A))) == ID2SYM(rb_intern(B)))
|
|
118
|
+
|
|
119
|
+
VALUE _parse(VALUE self, VALUE options){
|
|
120
|
+
parsleyPtr parsley;
|
|
121
|
+
Data_Get_Struct(self, parsleyPtr, parsley);
|
|
122
|
+
int flags = 0;
|
|
123
|
+
char *base = NULL;
|
|
124
|
+
if(OPT_MATCH("input", "html")) flags |= PARSLEY_OPTIONS_HTML;
|
|
125
|
+
if(OPT_BOOL("prune")) flags |= PARSLEY_OPTIONS_PRUNE;
|
|
126
|
+
if(OPT_BOOL("collate")) flags |= PARSLEY_OPTIONS_COLLATE;
|
|
127
|
+
if(OPT_BOOL("allow_net")) flags |= PARSLEY_OPTIONS_ALLOW_NET;
|
|
128
|
+
if(OPT_BOOL("allow_local")) flags |= PARSLEY_OPTIONS_ALLOW_LOCAL;
|
|
129
|
+
if(OPT_BOOL("sgwrap")) flags |= PARSLEY_OPTIONS_SGWRAP;
|
|
130
|
+
if(OPT_BOOL("has_base")) base = STR2CSTR(OPT("base"));
|
|
131
|
+
|
|
132
|
+
// printf("prune: %d\nallow_net: %d\nallow_local: %d\nhas_base: %d\nflags: %d\n", OPT_BOOL("prune"), OPT_BOOL("allow_net"), OPT_BOOL("allow_local"), OPT_BOOL("has_base"), flags);
|
|
133
|
+
|
|
134
|
+
if(OPT_BOOL("is_file")) {
|
|
135
|
+
return _parse_doc(parsley_parse_file(parsley, STR2CSTR(OPT("file")), flags), OPT("output"));
|
|
136
|
+
} else {
|
|
137
|
+
char * str = STR2CSTR(OPT("string"));
|
|
138
|
+
return _parse_doc(parsley_parse_string(parsley, str, strlen(str), base, flags), OPT("output"));
|
|
139
|
+
}
|
|
140
|
+
}
|
data/ext/extconf.rb
ADDED
data/lib/parsley.rb
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/../ext/cparsley"
|
|
2
|
+
require "rubygems"
|
|
3
|
+
require "json"
|
|
4
|
+
require "thread"
|
|
5
|
+
|
|
6
|
+
class Parsley
|
|
7
|
+
|
|
8
|
+
def self.user_agent=(agent)
|
|
9
|
+
@user_agent = agent
|
|
10
|
+
CParsley.set_user_agent(agent.to_s)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.user_agent
|
|
14
|
+
@user_agent
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def initialize(parsley, incl = "")
|
|
18
|
+
if(parsley.is_a?(Hash))
|
|
19
|
+
parsley = recursive_stringify(parsley).to_json
|
|
20
|
+
end
|
|
21
|
+
@@mutex ||= Mutex.new
|
|
22
|
+
@@mutex.synchronize do
|
|
23
|
+
@parsley = CParsley.new(parsley, incl)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Valid options:
|
|
28
|
+
#
|
|
29
|
+
# Requires one of:
|
|
30
|
+
# :file -- the input file path or url
|
|
31
|
+
# :string -- the input string
|
|
32
|
+
#
|
|
33
|
+
# And optionally (default is the first listed value):
|
|
34
|
+
# :input => [:html, :xml]
|
|
35
|
+
# :output => [:ruby, :json, :xml]
|
|
36
|
+
# :prune => [true, false]
|
|
37
|
+
# :sgwrap => [false, true]
|
|
38
|
+
# :collate => [true, false]
|
|
39
|
+
# :base => "http://some/base/href"
|
|
40
|
+
# :allow_net => [true, false]
|
|
41
|
+
# :allow_local => [true, false]
|
|
42
|
+
def parse(options = {})
|
|
43
|
+
options[:file] || options[:string] || (raise ParsleyError.new("must specify what to parse"))
|
|
44
|
+
|
|
45
|
+
options[:sgwrap] = !!options[:sgwrap]
|
|
46
|
+
options[:is_file] = !!options[:file]
|
|
47
|
+
options[:has_base] = !!options[:base]
|
|
48
|
+
|
|
49
|
+
options[:base] = options[:base].to_s
|
|
50
|
+
options[:file] = options[:file].to_s
|
|
51
|
+
options[:string] = options[:string].to_s
|
|
52
|
+
|
|
53
|
+
options[:input] ||= :html
|
|
54
|
+
options[:output] ||= :ruby
|
|
55
|
+
|
|
56
|
+
options[:collate] = true unless options.has_key?(:collate)
|
|
57
|
+
options[:prune] = true unless options.has_key?(:prune)
|
|
58
|
+
options[:allow_net] = true unless options.has_key?(:allow_net)
|
|
59
|
+
options[:allow_local] = true unless options.has_key?(:allow_local)
|
|
60
|
+
|
|
61
|
+
options[:collate] = !!options[:collate]
|
|
62
|
+
options[:prune] = !!options[:prune]
|
|
63
|
+
options[:allow_net] = !!options[:allow_net]
|
|
64
|
+
options[:allow_local] = !!options[:allow_local]
|
|
65
|
+
|
|
66
|
+
@parsley.parse(options)
|
|
67
|
+
end
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def recursive_stringify(obj)
|
|
71
|
+
case obj
|
|
72
|
+
when Hash
|
|
73
|
+
obj.inject({}) do |memo, (k, v)|
|
|
74
|
+
memo[k.to_s] = recursive_stringify(v)
|
|
75
|
+
memo
|
|
76
|
+
end
|
|
77
|
+
when Array
|
|
78
|
+
obj.map{|e| recursive_stringify(e) }
|
|
79
|
+
else
|
|
80
|
+
obj.to_s
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
require "test/unit"
|
|
2
|
+
require File.dirname(__FILE__) + "/../lib/parsley"
|
|
3
|
+
|
|
4
|
+
class TestParsley < Test::Unit::TestCase
|
|
5
|
+
def setup
|
|
6
|
+
@page = File.expand_path(File.dirname(__FILE__) + "/yelp.html")
|
|
7
|
+
@home = File.expand_path(File.dirname(__FILE__) + "/yelp-home.html")
|
|
8
|
+
@let = File.expand_path(File.dirname(__FILE__) + "/yelp-home.let")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_segfault_regression
|
|
12
|
+
simple_html = <<-HTML
|
|
13
|
+
<html>
|
|
14
|
+
<body>
|
|
15
|
+
<h1 class="iCIMS_Header_JobTitle">CEO</h1>
|
|
16
|
+
</body>
|
|
17
|
+
</html>
|
|
18
|
+
HTML
|
|
19
|
+
|
|
20
|
+
struct = {
|
|
21
|
+
'jobs' => [{
|
|
22
|
+
'title' => ".iCIMS_Header_JobTitle",
|
|
23
|
+
'description?' => "blah",
|
|
24
|
+
'location?' => "blah",
|
|
25
|
+
'experience?' => "blah",
|
|
26
|
+
'education?' => "blah"
|
|
27
|
+
}]
|
|
28
|
+
}
|
|
29
|
+
parselet = Parsley.new(struct)
|
|
30
|
+
result = parselet.parse(:string => simple_html)
|
|
31
|
+
assert_equal "CEO", result['jobs'].first['title']
|
|
32
|
+
assert result['jobs'].first['description'].nil?
|
|
33
|
+
end
|
|
34
|
+
#
|
|
35
|
+
# def test_yelp
|
|
36
|
+
# @parsley = Parsley.new(File.read(@let))
|
|
37
|
+
# out = @parsley.parse(:file => @home)
|
|
38
|
+
# assert_equal "/c/sf/shopping", out["categories"][0]["href"]
|
|
39
|
+
# end
|
|
40
|
+
#
|
|
41
|
+
# def test_parsley_should_raise_if_value_syntax_error
|
|
42
|
+
# assert_raises(ParsleyError) do
|
|
43
|
+
# Parsley.new({"foo" => nil})
|
|
44
|
+
# end
|
|
45
|
+
#
|
|
46
|
+
# assert_raises(ParsleyError) do
|
|
47
|
+
# Parsley.new({"foo" => ""})
|
|
48
|
+
# end
|
|
49
|
+
#
|
|
50
|
+
# assert_raises(ParsleyError) do
|
|
51
|
+
# Parsley.new({"foo" => "<<<<<<<<<<<"})
|
|
52
|
+
# end
|
|
53
|
+
# end
|
|
54
|
+
#
|
|
55
|
+
# def test_yelp_xml
|
|
56
|
+
# @parsley = Parsley.new(File.read(@let))
|
|
57
|
+
# out = @parsley.parse(:file => @home, :output => :xml)
|
|
58
|
+
# end
|
|
59
|
+
#
|
|
60
|
+
# def test_broken
|
|
61
|
+
# @parsley = Parsley.new("hi" => "no-ns:match(h1)")
|
|
62
|
+
# assert_raises(ParsleyError) {
|
|
63
|
+
# @parsley.parse(:file => @page)
|
|
64
|
+
# }
|
|
65
|
+
# end
|
|
66
|
+
#
|
|
67
|
+
# def test_simple
|
|
68
|
+
# @parsley = Parsley.new("hi" => "h1")
|
|
69
|
+
# assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @page))
|
|
70
|
+
# end
|
|
71
|
+
#
|
|
72
|
+
# def test_simple_string
|
|
73
|
+
# @parsley = Parsley.new("hi" => "h1")
|
|
74
|
+
# assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:string => "<html><body><h1>Nick's Crispy Tacos</h1></body></html>"))
|
|
75
|
+
# end
|
|
76
|
+
#
|
|
77
|
+
# def test_xml
|
|
78
|
+
# @parsley = Parsley.new("hi" => "h1")
|
|
79
|
+
# xml = "<?xml version=\"1.0\"?>\n<parsley:root xmlns:parsley=\"http://parselets.com/json\"><hi position=\"63\">Nick's Crispy Tacos</hi></parsley:root>\n"
|
|
80
|
+
# assert_equal(xml, @parsley.parse(:file => @page, :output => :xml))
|
|
81
|
+
# end
|
|
82
|
+
#
|
|
83
|
+
# def test_sgwrap
|
|
84
|
+
# @parsley = Parsley.new("hi" => "p sg_wrap")
|
|
85
|
+
# html = "<p><b>hi</b>world</p>"
|
|
86
|
+
# assert_equal({"hi" => "world"}, @parsley.parse(:string => html, :sgwrap => true))
|
|
87
|
+
# end
|
|
88
|
+
#
|
|
89
|
+
# def test_sgwrap_off
|
|
90
|
+
# @parsley = Parsley.new("hi" => "p sg_wrap")
|
|
91
|
+
# html = "<p><b>hi</b>world</p>"
|
|
92
|
+
# assert_raises(ParsleyError) do
|
|
93
|
+
# @parsley.parse(:string => html, :sgwrap => false)
|
|
94
|
+
# end
|
|
95
|
+
# end
|
|
96
|
+
#
|
|
97
|
+
#
|
|
98
|
+
# def test_json
|
|
99
|
+
# @parsley = Parsley.new("hi" => "h1")
|
|
100
|
+
# assert_equal('{ "hi": "Nick\'s Crispy Tacos" }', @parsley.parse(:file => @page, :output => :json))
|
|
101
|
+
# end
|
|
102
|
+
#
|
|
103
|
+
# def test_rescuable_file_error
|
|
104
|
+
# @parsley = Parsley.new("hi" => "h1")
|
|
105
|
+
# @nonexistant_file = File.dirname(__FILE__) + "/../fixtures/yelp.html"
|
|
106
|
+
# assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @nonexistant_file)) rescue nil
|
|
107
|
+
# end
|
|
108
|
+
#
|
|
109
|
+
# def test_array_string
|
|
110
|
+
# @parsley = Parsley.new({"foo" => ["li"]})
|
|
111
|
+
# out = @parsley.parse(:file => @page)
|
|
112
|
+
# assert_kind_of Hash, out
|
|
113
|
+
# assert_kind_of Array, out["foo"], out.inspect
|
|
114
|
+
# assert out["foo"].length > 1
|
|
115
|
+
# end
|
|
116
|
+
end
|