wombat 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +13 -30
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/fixtures/vcr_cassettes/follow_links.yml +2143 -0
- data/lib/wombat/crawler.rb +7 -17
- data/lib/wombat/dsl/follower.rb +19 -0
- data/lib/wombat/dsl/iterator.rb +19 -0
- data/lib/wombat/dsl/metadata.rb +27 -0
- data/lib/wombat/dsl/property.rb +27 -0
- data/lib/wombat/dsl/property_group.rb +48 -0
- data/lib/wombat/processing/node_selector.rb +12 -0
- data/lib/wombat/processing/parser.rb +48 -0
- data/lib/wombat/property/locators/base.rb +33 -0
- data/lib/wombat/property/locators/factory.rb +39 -0
- data/lib/wombat/property/locators/follow.rb +25 -0
- data/lib/wombat/property/locators/html.rb +14 -0
- data/lib/wombat/property/locators/iterator.rb +23 -0
- data/lib/wombat/property/locators/list.rb +17 -0
- data/lib/wombat/property/locators/property_group.rb +20 -0
- data/lib/wombat/property/locators/text.rb +22 -0
- data/lib/wombat.rb +8 -4
- data/spec/crawler_spec.rb +38 -48
- data/spec/dsl/property_spec.rb +12 -0
- data/spec/helpers/sample_crawler.rb +2 -15
- data/spec/integration/integration_spec.rb +61 -33
- data/spec/processing/parser_spec.rb +32 -0
- data/spec/property/locators/factory_spec.rb +18 -0
- data/spec/property/locators/follow_spec.rb +4 -0
- data/spec/property/locators/html_spec.rb +15 -0
- data/spec/property/locators/iterator_spec.rb +4 -0
- data/spec/property/locators/list_spec.rb +13 -0
- data/spec/property/locators/text_spec.rb +49 -0
- data/spec/sample_crawler_spec.rb +7 -11
- data/spec/wombat_spec.rb +13 -1
- data/wombat.gemspec +27 -16
- metadata +27 -16
- data/lib/wombat/iterator.rb +0 -38
- data/lib/wombat/metadata.rb +0 -24
- data/lib/wombat/node_selector.rb +0 -10
- data/lib/wombat/parser.rb +0 -59
- data/lib/wombat/property.rb +0 -21
- data/lib/wombat/property_container.rb +0 -70
- data/lib/wombat/property_locator.rb +0 -20
- data/spec/iterator_spec.rb +0 -52
- data/spec/metadata_spec.rb +0 -20
- data/spec/parser_spec.rb +0 -125
- data/spec/property_container_spec.rb +0 -62
- data/spec/property_locator_spec.rb +0 -75
- data/spec/property_spec.rb +0 -16
data/lib/wombat/metadata.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
#coding: utf-8
|
2
|
-
require 'wombat/property_container'
|
3
|
-
require 'wombat/iterator'
|
4
|
-
|
5
|
-
module Wombat
|
6
|
-
class Metadata < PropertyContainer
|
7
|
-
def initialize
|
8
|
-
self[:document_format] = :html
|
9
|
-
super
|
10
|
-
end
|
11
|
-
|
12
|
-
def base_url(url)
|
13
|
-
self[:base_url] = url
|
14
|
-
end
|
15
|
-
|
16
|
-
def list_page(url)
|
17
|
-
self[:list_page] = url
|
18
|
-
end
|
19
|
-
|
20
|
-
def document_format(format)
|
21
|
-
self[:document_format] = format
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
data/lib/wombat/node_selector.rb
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
module Wombat
|
2
|
-
module NodeSelector
|
3
|
-
def select_nodes(selector, namespaces = nil)
|
4
|
-
return [selector.to_s] if selector.is_a? Symbol
|
5
|
-
return context.xpath selector[6..-1], namespaces if selector.start_with? "xpath="
|
6
|
-
return context.css selector[4..-1] if selector.start_with? "css="
|
7
|
-
[selector]
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
data/lib/wombat/parser.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
#coding: utf-8
|
2
|
-
require 'wombat/property_locator'
|
3
|
-
require 'mechanize'
|
4
|
-
require 'restclient'
|
5
|
-
|
6
|
-
module Wombat
|
7
|
-
module Parser
|
8
|
-
include PropertyLocator
|
9
|
-
attr_accessor :mechanize, :context, :response_code
|
10
|
-
|
11
|
-
def initialize
|
12
|
-
@mechanize = Mechanize.new
|
13
|
-
end
|
14
|
-
|
15
|
-
def parse(metadata)
|
16
|
-
self.context = parser_for metadata
|
17
|
-
original_context = self.context
|
18
|
-
|
19
|
-
metadata.iterators.each do |it|
|
20
|
-
it.reset # Clean up iterator results before starting
|
21
|
-
select_nodes(it.selector).each do |node|
|
22
|
-
self.context = node
|
23
|
-
it.parse { |p| locate p }
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
self.context = original_context
|
28
|
-
|
29
|
-
metadata.parse { |p| locate p }
|
30
|
-
|
31
|
-
metadata.flatten
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
def parser_for(metadata)
|
36
|
-
url = "#{metadata[:base_url]}#{metadata[:list_page]}"
|
37
|
-
page = nil
|
38
|
-
parser = nil
|
39
|
-
begin
|
40
|
-
if metadata[:document_format] == :html
|
41
|
-
page = @mechanize.get(url)
|
42
|
-
parser = page.parser
|
43
|
-
else
|
44
|
-
page = RestClient.get(url)
|
45
|
-
parser = Nokogiri::XML page
|
46
|
-
end
|
47
|
-
self.response_code = page.code.to_i if page.respond_to? :code
|
48
|
-
parser
|
49
|
-
rescue
|
50
|
-
if $!.respond_to? :http_code
|
51
|
-
self.response_code = $!.http_code.to_i
|
52
|
-
elsif $!.respond_to? :response_code
|
53
|
-
self.response_code = $!.response_code.to_i
|
54
|
-
end
|
55
|
-
raise $!
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
data/lib/wombat/property.rb
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
module Wombat
|
2
|
-
class Property
|
3
|
-
attr_accessor :name, :selector, :format, :namespaces, :callback, :result
|
4
|
-
|
5
|
-
def initialize(options)
|
6
|
-
@name = options[:name]
|
7
|
-
@selector = options[:selector]
|
8
|
-
@format = options[:format]
|
9
|
-
@namespaces = options[:namespaces]
|
10
|
-
@callback = options[:callback]
|
11
|
-
end
|
12
|
-
|
13
|
-
def flatten(depth = nil)
|
14
|
-
depth ? result[depth] : result
|
15
|
-
end
|
16
|
-
|
17
|
-
def reset
|
18
|
-
self.result = nil
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
@@ -1,70 +0,0 @@
|
|
1
|
-
#coding: utf-8
|
2
|
-
|
3
|
-
module Wombat
|
4
|
-
class PropertyContainer < Hash
|
5
|
-
attr_accessor :iterators
|
6
|
-
|
7
|
-
def initialize
|
8
|
-
@iterators = []
|
9
|
-
end
|
10
|
-
|
11
|
-
def method_missing(method, *args, &block)
|
12
|
-
if args.empty? && block
|
13
|
-
self["#{method.to_s}"] = PropertyContainer.new unless self["#{method.to_s}"]
|
14
|
-
block.call(self["#{method.to_s}"])
|
15
|
-
else
|
16
|
-
self[method.to_s] = Property.new(
|
17
|
-
name: method.to_s,
|
18
|
-
selector: args.first,
|
19
|
-
format: args[1],
|
20
|
-
namespaces: args[2],
|
21
|
-
callback: block)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def to_ary
|
26
|
-
end
|
27
|
-
|
28
|
-
def all_properties
|
29
|
-
values.flat_map { |v|
|
30
|
-
if v.kind_of? PropertyContainer
|
31
|
-
v.all_properties
|
32
|
-
elsif v.kind_of? Property
|
33
|
-
v
|
34
|
-
else
|
35
|
-
nil
|
36
|
-
end
|
37
|
-
}.compact
|
38
|
-
end
|
39
|
-
|
40
|
-
def parse
|
41
|
-
all_properties.each do |p|
|
42
|
-
result = yield p if block_given?
|
43
|
-
p.result = p.callback ? p.callback.call(result) : result
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def flatten(depth = nil)
|
48
|
-
properties = Hash.new.tap do |h|
|
49
|
-
keys.map do |k|
|
50
|
-
val = self[k]
|
51
|
-
if val.is_a?(PropertyContainer) || val.is_a?(Property)
|
52
|
-
h[k] = val.flatten depth
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
iters = iterators.reduce({}) do |memo, i|
|
58
|
-
memo.merge("iterator#{iterators.index(i)}" => i.flatten)
|
59
|
-
end
|
60
|
-
|
61
|
-
properties.merge iters
|
62
|
-
end
|
63
|
-
|
64
|
-
def for_each(selector)
|
65
|
-
Iterator.new(selector).tap do |i|
|
66
|
-
iterators << i
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
#coding: utf-8
|
2
|
-
require 'wombat/node_selector'
|
3
|
-
|
4
|
-
module Wombat
|
5
|
-
module PropertyLocator
|
6
|
-
include NodeSelector
|
7
|
-
|
8
|
-
def locate(property)
|
9
|
-
props = _locate property
|
10
|
-
property.format != :list ? props.first : props
|
11
|
-
end
|
12
|
-
|
13
|
-
private
|
14
|
-
def _locate(property)
|
15
|
-
result = select_nodes(property.selector, property.namespaces).to_a
|
16
|
-
result.map! {|r| r.inner_html.strip } if property.format == :html
|
17
|
-
result.map {|r| r.kind_of?(String) ? r : r.inner_text }.map(&:strip)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
data/spec/iterator_spec.rb
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::Iterator do
|
4
|
-
let(:it) { Wombat::Iterator.new "it_selector" }
|
5
|
-
|
6
|
-
context 'parse' do
|
7
|
-
it 'should iterate in for_each properties' do
|
8
|
-
it.prop_1 "some_selector"
|
9
|
-
it.prop_2 "another_selector"
|
10
|
-
|
11
|
-
it['prop_1'].should_receive(:result).twice.and_return([])
|
12
|
-
it['prop_2'].should_receive(:result).twice.and_return([])
|
13
|
-
|
14
|
-
parser = double :parser
|
15
|
-
parser.should_receive(:locate).with(it['prop_1']).twice
|
16
|
-
parser.should_receive(:locate).with(it['prop_2']).twice
|
17
|
-
|
18
|
-
it.parse { |p| parser.locate p }
|
19
|
-
it.parse { |p| parser.locate p }
|
20
|
-
end
|
21
|
-
|
22
|
-
it 'should raise if no block given' do
|
23
|
-
expect{
|
24
|
-
it.parse
|
25
|
-
}.to raise_error(ArgumentError)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
context 'reset' do
|
30
|
-
it 'should clean up properties results' do
|
31
|
-
it.prop_1 'some_selector'
|
32
|
-
it['prop_1'].result = [1, 2]
|
33
|
-
it.reset
|
34
|
-
it['prop_1'].result.should be_nil
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
it 'should flatten properties to plain hash format' do
|
39
|
-
it.prop_1 "some_selector"
|
40
|
-
it.prop_2 "another_selector"
|
41
|
-
|
42
|
-
it.parse {|p| }
|
43
|
-
it.parse {|p| }
|
44
|
-
it['prop_1'].result = ['result 1', 'result 2']
|
45
|
-
it['prop_2'].result = ['result 3', 'result 4']
|
46
|
-
|
47
|
-
it.flatten.should == [
|
48
|
-
{ "prop_1" => "result 1", "prop_2" => "result 3" },
|
49
|
-
{ "prop_1" => "result 2", "prop_2" => "result 4" }
|
50
|
-
]
|
51
|
-
end
|
52
|
-
end
|
data/spec/metadata_spec.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::Metadata do
|
4
|
-
before(:each) do
|
5
|
-
@metadata = Wombat::Metadata.new
|
6
|
-
end
|
7
|
-
|
8
|
-
it 'should not include non-properties in all properties list' do
|
9
|
-
@metadata.another_property "/some/selector", :text
|
10
|
-
@metadata.base_url "felipecsl.com"
|
11
|
-
@metadata.list_page "/yeah"
|
12
|
-
@metadata.all_properties.should == [@metadata['another_property']]
|
13
|
-
end
|
14
|
-
|
15
|
-
it 'should store iterators' do
|
16
|
-
@metadata.for_each("some_selector").kind_of?(Wombat::Iterator).should be_true
|
17
|
-
@metadata.iterators.size.should == 1
|
18
|
-
@metadata.iterators.first.selector.should == "some_selector"
|
19
|
-
end
|
20
|
-
end
|
data/spec/parser_spec.rb
DELETED
@@ -1,125 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::Parser do
|
4
|
-
before(:each) do
|
5
|
-
crawler = Class.new
|
6
|
-
crawler.send(:include, Wombat::Parser)
|
7
|
-
@parser = crawler.new
|
8
|
-
@metadata = Wombat::Metadata.new
|
9
|
-
end
|
10
|
-
|
11
|
-
it 'should request page document with correct url' do
|
12
|
-
@metadata.base_url "http://www.google.com"
|
13
|
-
@metadata.list_page "/search"
|
14
|
-
fake_document = double :document
|
15
|
-
fake_parser = double :parser
|
16
|
-
fake_document.should_receive(:parser).and_return(fake_parser)
|
17
|
-
@parser.mechanize.should_receive(:get).with("http://www.google.com/search").and_return fake_document
|
18
|
-
|
19
|
-
@parser.parse @metadata
|
20
|
-
end
|
21
|
-
|
22
|
-
it 'should send correct data to locate method' do
|
23
|
-
fake_document = double :document
|
24
|
-
fake_parser = double :parser
|
25
|
-
fake_document.should_receive(:parser).and_return(fake_parser)
|
26
|
-
@parser.mechanize.stub(:get).and_return fake_document
|
27
|
-
@parser.should_not_receive :locate
|
28
|
-
@parser.parse @metadata
|
29
|
-
end
|
30
|
-
|
31
|
-
it 'should invoke metadata callbacks' do
|
32
|
-
fake_document = double :document
|
33
|
-
fake_parser = double :parser
|
34
|
-
property = double :property
|
35
|
-
block_called = false
|
36
|
-
block = lambda { |p| block_called = true }
|
37
|
-
|
38
|
-
property.stub(:result)
|
39
|
-
fake_document.should_receive(:parser).and_return(fake_parser)
|
40
|
-
property.should_receive(:callback).twice.and_return(block)
|
41
|
-
property.should_receive(:result=).with(true)
|
42
|
-
|
43
|
-
@parser.mechanize.stub(:get).and_return fake_document
|
44
|
-
@metadata.stub(:all_properties).and_return [property]
|
45
|
-
@parser.should_receive(:locate).with(property)
|
46
|
-
|
47
|
-
@parser.parse @metadata
|
48
|
-
|
49
|
-
block_called.should be_true
|
50
|
-
end
|
51
|
-
|
52
|
-
it 'should invoke callback with parsed data' do
|
53
|
-
fake_document = double :document
|
54
|
-
fake_parser = double :parser
|
55
|
-
property = double :property
|
56
|
-
block_called = false
|
57
|
-
block = lambda { |p|
|
58
|
-
block_called = true
|
59
|
-
p.should == "blah"
|
60
|
-
}
|
61
|
-
|
62
|
-
fake_document.should_receive(:parser).and_return(fake_parser)
|
63
|
-
property.should_receive(:callback).twice.and_return(block)
|
64
|
-
property.should_receive(:result=).with(true)
|
65
|
-
|
66
|
-
@parser.mechanize.stub(:get).and_return fake_document
|
67
|
-
@metadata.stub(:all_properties).and_return [property]
|
68
|
-
@parser.should_receive(:locate).with(property).and_return("blah")
|
69
|
-
|
70
|
-
@parser.parse @metadata
|
71
|
-
|
72
|
-
block_called.should be_true
|
73
|
-
end
|
74
|
-
|
75
|
-
it 'should return hash with requested properties' do
|
76
|
-
hash = double :results
|
77
|
-
fake_parser = double :parser
|
78
|
-
fake_document = double :document
|
79
|
-
|
80
|
-
fake_document.should_receive(:parser).and_return fake_parser
|
81
|
-
@parser.mechanize.stub(:get).and_return fake_document
|
82
|
-
@metadata.should_receive(:flatten).and_return hash
|
83
|
-
|
84
|
-
@parser.parse(@metadata).should == hash
|
85
|
-
end
|
86
|
-
|
87
|
-
it 'should not include null results in iterated block' do
|
88
|
-
fake_parser = double :parser
|
89
|
-
fake_document = double :document
|
90
|
-
c1 = double :context
|
91
|
-
c2 = double :context
|
92
|
-
it = Wombat::Iterator.new "it_selector"
|
93
|
-
it.prop_1 "some_selector"
|
94
|
-
|
95
|
-
@parser.should_receive(:context=).ordered
|
96
|
-
@metadata.should_receive(:iterators).and_return [it]
|
97
|
-
@metadata.should_receive(:flatten)
|
98
|
-
fake_document.should_receive(:parser).and_return(fake_parser)
|
99
|
-
@parser.mechanize.stub(:get).and_return fake_document
|
100
|
-
@parser.should_receive(:select_nodes).with("it_selector").and_return [c1, c2]
|
101
|
-
@parser.should_receive(:context=).with(c1).ordered
|
102
|
-
@parser.should_receive(:context=).with(c2).ordered
|
103
|
-
@parser.should_receive(:context=).ordered
|
104
|
-
@parser.should_receive(:locate).with(it['prop_1']).and_return(12)
|
105
|
-
@parser.should_receive(:locate).with(it['prop_1']).and_return(nil)
|
106
|
-
@parser.stub(:locate)
|
107
|
-
|
108
|
-
@parser.parse(@metadata)
|
109
|
-
|
110
|
-
it["prop_1"].result.should == [12]
|
111
|
-
end
|
112
|
-
|
113
|
-
it 'should correctly parse xml documents' do
|
114
|
-
fake_document = double :xml
|
115
|
-
fake_parser = double :parser
|
116
|
-
@metadata.document_format :xml
|
117
|
-
@parser.mechanize.should_not_receive(:get)
|
118
|
-
RestClient.should_receive(:get).and_return fake_document
|
119
|
-
Nokogiri.should_receive(:XML).with(fake_document).and_return fake_parser
|
120
|
-
@parser.should_receive(:context=).with(fake_parser)
|
121
|
-
@parser.should_receive(:context=)
|
122
|
-
|
123
|
-
@parser.parse @metadata
|
124
|
-
end
|
125
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::PropertyContainer do
|
4
|
-
before(:each) do
|
5
|
-
@metadata = Wombat::PropertyContainer.new
|
6
|
-
end
|
7
|
-
|
8
|
-
it 'should return an array with all the metadata properties excluding iterators' do
|
9
|
-
@metadata["event"] = Wombat::PropertyContainer.new
|
10
|
-
@metadata["venue"] = Wombat::PropertyContainer.new
|
11
|
-
@metadata.another_property "/some/selector", :text
|
12
|
-
@metadata["event"]["something"] = Wombat::PropertyContainer.new
|
13
|
-
@metadata["event"]["something"].else "Wohooo"
|
14
|
-
@metadata["venue"].awesome "whooea"
|
15
|
-
it = Wombat::Iterator.new "it_selector"
|
16
|
-
it.felipe "lima"
|
17
|
-
@metadata.iterators << it
|
18
|
-
|
19
|
-
all_propes = @metadata.all_properties
|
20
|
-
|
21
|
-
all_propes.should =~ [
|
22
|
-
@metadata["another_property"],
|
23
|
-
@metadata["event"]["something"]["else"],
|
24
|
-
@metadata["venue"]["awesome"]
|
25
|
-
]
|
26
|
-
end
|
27
|
-
|
28
|
-
it 'should be able to change properties via all_properties' do
|
29
|
-
@metadata.another_property "/some/selector", :text
|
30
|
-
@metadata.all_properties.first.selector = "abc"
|
31
|
-
@metadata["another_property"].selector.should == "abc"
|
32
|
-
end
|
33
|
-
|
34
|
-
it 'should return metadata in plain hash format including iterators' do
|
35
|
-
@metadata.title "/some/selector"
|
36
|
-
@metadata["title"].result = "Gogobot Inc."
|
37
|
-
@metadata["holder"] = Wombat::PropertyContainer.new
|
38
|
-
@metadata["holder"].heading "css=.heading"
|
39
|
-
@metadata["holder"]["heading"].result = 123456
|
40
|
-
@metadata["holder"]["subheader"] = Wombat::PropertyContainer.new
|
41
|
-
@metadata["holder"]["subheader"].section "/blah"
|
42
|
-
@metadata["holder"]["subheader"]["section"].result = "Lorem Ipsum"
|
43
|
-
it = Wombat::Iterator.new "it_selector"
|
44
|
-
it.felipe "lima"
|
45
|
-
it["felipe"].result = ["correa", "de souza", "lima"]
|
46
|
-
@metadata.iterators = [it]
|
47
|
-
@metadata.footer("another thing", :html) { |a| true }
|
48
|
-
@metadata["footer"].result = "bla bla bla"
|
49
|
-
|
50
|
-
@metadata.flatten.should == {
|
51
|
-
"title" => "Gogobot Inc.",
|
52
|
-
"holder" => {
|
53
|
-
"heading" => 123456,
|
54
|
-
"subheader" => {
|
55
|
-
"section" => "Lorem Ipsum"
|
56
|
-
}
|
57
|
-
},
|
58
|
-
"iterator0"=>[{"felipe"=>"correa"}, {"felipe"=>"de souza"}, {"felipe"=>"lima"}],
|
59
|
-
"footer" => "bla bla bla"
|
60
|
-
}
|
61
|
-
end
|
62
|
-
end
|
@@ -1,75 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::PropertyLocator do
|
4
|
-
before(:each) do
|
5
|
-
@locator = Class.new
|
6
|
-
@locator.send(:include, Wombat::PropertyLocator)
|
7
|
-
@locator_instance = @locator.new
|
8
|
-
@metadata = Wombat::Metadata.new
|
9
|
-
@metadata["event"] = Wombat::PropertyContainer.new
|
10
|
-
@metadata["venue"] = Wombat::PropertyContainer.new
|
11
|
-
@metadata["location"] = Wombat::PropertyContainer.new
|
12
|
-
end
|
13
|
-
|
14
|
-
it 'should locate metadata properties' do
|
15
|
-
context = double :context
|
16
|
-
abc = double :abc
|
17
|
-
|
18
|
-
abc.stub(:inner_text).and_return("Something cool")
|
19
|
-
|
20
|
-
context.stub(:xpath).with("/abc", nil).and_return([abc])
|
21
|
-
context.stub(:xpath).with("/bah", nil).and_return(["abc"])
|
22
|
-
context.stub(:css).with("/ghi").and_return(["Another stuff"])
|
23
|
-
|
24
|
-
@metadata["event"].data1 "xpath=/abc"
|
25
|
-
@metadata["venue"].data2 :farms
|
26
|
-
@metadata["location"].data3 "css=/ghi"
|
27
|
-
@metadata.blah "xpath=/bah"
|
28
|
-
|
29
|
-
@locator_instance.stub(:context).and_return context
|
30
|
-
|
31
|
-
@metadata.all_properties.each { |p| p.result = @locator_instance.locate p }
|
32
|
-
|
33
|
-
@metadata["blah"].result.should == "abc"
|
34
|
-
@metadata["event"]["data1"].result.should == "Something cool"
|
35
|
-
@metadata["venue"]["data2"].result.should == "farms"
|
36
|
-
@metadata["location"]["data3"].result.should == "Another stuff"
|
37
|
-
end
|
38
|
-
|
39
|
-
it 'should support properties with html format' do
|
40
|
-
context = double :context
|
41
|
-
html_info = double :html_info
|
42
|
-
|
43
|
-
html_info.should_receive(:inner_html).and_return("some another info ")
|
44
|
-
context.should_receive(:xpath).with("/anotherData", nil).and_return([html_info])
|
45
|
-
|
46
|
-
@locator_instance.stub(:context).and_return context
|
47
|
-
|
48
|
-
@metadata["event"].another_info "xpath=/anotherData", :html
|
49
|
-
|
50
|
-
@metadata.all_properties.each { |p| p.result = @locator_instance.locate p }
|
51
|
-
|
52
|
-
@metadata["event"]["another_info"].result.should == "some another info"
|
53
|
-
end
|
54
|
-
|
55
|
-
it 'should trim property contents and use namespaces if present' do
|
56
|
-
context = double :context
|
57
|
-
context.should_receive(:xpath).with("/event/some/description", "blah").and_return([" awesome event "])
|
58
|
-
|
59
|
-
@locator_instance.stub(:context).and_return context
|
60
|
-
@metadata["event"].description "xpath=/event/some/description", :text, "blah"
|
61
|
-
|
62
|
-
@metadata.all_properties.each { |p| p.result = @locator_instance.locate p }
|
63
|
-
|
64
|
-
@metadata["event"]["description"].result.should == "awesome event"
|
65
|
-
end
|
66
|
-
|
67
|
-
it 'should return array of matching nodes for list properties' do
|
68
|
-
context = double :context
|
69
|
-
@metadata.list_prop "css=.selector", :list
|
70
|
-
@locator_instance.stub(:context).and_return context
|
71
|
-
@locator_instance.should_receive(:select_nodes).with("css=.selector", nil).and_return %w(1 2 3 4 5)
|
72
|
-
|
73
|
-
@locator_instance.locate(@metadata["list_prop"]).should == %w(1 2 3 4 5)
|
74
|
-
end
|
75
|
-
end
|
data/spec/property_spec.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::Property do
|
4
|
-
it 'should store property data' do
|
5
|
-
property = Wombat::Property.new(
|
6
|
-
name: "title",
|
7
|
-
selector: "/some/selector",
|
8
|
-
format: :html,
|
9
|
-
callback: lambda {})
|
10
|
-
|
11
|
-
property.name.should == "title"
|
12
|
-
property.selector.should == "/some/selector"
|
13
|
-
property.format.should == :html
|
14
|
-
property.callback.should == lambda {}
|
15
|
-
end
|
16
|
-
end
|