wombat 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +13 -30
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/fixtures/vcr_cassettes/follow_links.yml +2143 -0
- data/lib/wombat/crawler.rb +7 -17
- data/lib/wombat/dsl/follower.rb +19 -0
- data/lib/wombat/dsl/iterator.rb +19 -0
- data/lib/wombat/dsl/metadata.rb +27 -0
- data/lib/wombat/dsl/property.rb +27 -0
- data/lib/wombat/dsl/property_group.rb +48 -0
- data/lib/wombat/processing/node_selector.rb +12 -0
- data/lib/wombat/processing/parser.rb +48 -0
- data/lib/wombat/property/locators/base.rb +33 -0
- data/lib/wombat/property/locators/factory.rb +39 -0
- data/lib/wombat/property/locators/follow.rb +25 -0
- data/lib/wombat/property/locators/html.rb +14 -0
- data/lib/wombat/property/locators/iterator.rb +23 -0
- data/lib/wombat/property/locators/list.rb +17 -0
- data/lib/wombat/property/locators/property_group.rb +20 -0
- data/lib/wombat/property/locators/text.rb +22 -0
- data/lib/wombat.rb +8 -4
- data/spec/crawler_spec.rb +38 -48
- data/spec/dsl/property_spec.rb +12 -0
- data/spec/helpers/sample_crawler.rb +2 -15
- data/spec/integration/integration_spec.rb +61 -33
- data/spec/processing/parser_spec.rb +32 -0
- data/spec/property/locators/factory_spec.rb +18 -0
- data/spec/property/locators/follow_spec.rb +4 -0
- data/spec/property/locators/html_spec.rb +15 -0
- data/spec/property/locators/iterator_spec.rb +4 -0
- data/spec/property/locators/list_spec.rb +13 -0
- data/spec/property/locators/text_spec.rb +49 -0
- data/spec/sample_crawler_spec.rb +7 -11
- data/spec/wombat_spec.rb +13 -1
- data/wombat.gemspec +27 -16
- metadata +27 -16
- data/lib/wombat/iterator.rb +0 -38
- data/lib/wombat/metadata.rb +0 -24
- data/lib/wombat/node_selector.rb +0 -10
- data/lib/wombat/parser.rb +0 -59
- data/lib/wombat/property.rb +0 -21
- data/lib/wombat/property_container.rb +0 -70
- data/lib/wombat/property_locator.rb +0 -20
- data/spec/iterator_spec.rb +0 -52
- data/spec/metadata_spec.rb +0 -20
- data/spec/parser_spec.rb +0 -125
- data/spec/property_container_spec.rb +0 -62
- data/spec/property_locator_spec.rb +0 -75
- data/spec/property_spec.rb +0 -16
data/lib/wombat/metadata.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
#coding: utf-8
|
2
|
-
require 'wombat/property_container'
|
3
|
-
require 'wombat/iterator'
|
4
|
-
|
5
|
-
module Wombat
|
6
|
-
class Metadata < PropertyContainer
|
7
|
-
def initialize
|
8
|
-
self[:document_format] = :html
|
9
|
-
super
|
10
|
-
end
|
11
|
-
|
12
|
-
def base_url(url)
|
13
|
-
self[:base_url] = url
|
14
|
-
end
|
15
|
-
|
16
|
-
def list_page(url)
|
17
|
-
self[:list_page] = url
|
18
|
-
end
|
19
|
-
|
20
|
-
def document_format(format)
|
21
|
-
self[:document_format] = format
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
data/lib/wombat/node_selector.rb
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
module Wombat
|
2
|
-
module NodeSelector
|
3
|
-
def select_nodes(selector, namespaces = nil)
|
4
|
-
return [selector.to_s] if selector.is_a? Symbol
|
5
|
-
return context.xpath selector[6..-1], namespaces if selector.start_with? "xpath="
|
6
|
-
return context.css selector[4..-1] if selector.start_with? "css="
|
7
|
-
[selector]
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
data/lib/wombat/parser.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
#coding: utf-8
|
2
|
-
require 'wombat/property_locator'
|
3
|
-
require 'mechanize'
|
4
|
-
require 'restclient'
|
5
|
-
|
6
|
-
module Wombat
|
7
|
-
module Parser
|
8
|
-
include PropertyLocator
|
9
|
-
attr_accessor :mechanize, :context, :response_code
|
10
|
-
|
11
|
-
def initialize
|
12
|
-
@mechanize = Mechanize.new
|
13
|
-
end
|
14
|
-
|
15
|
-
def parse(metadata)
|
16
|
-
self.context = parser_for metadata
|
17
|
-
original_context = self.context
|
18
|
-
|
19
|
-
metadata.iterators.each do |it|
|
20
|
-
it.reset # Clean up iterator results before starting
|
21
|
-
select_nodes(it.selector).each do |node|
|
22
|
-
self.context = node
|
23
|
-
it.parse { |p| locate p }
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
self.context = original_context
|
28
|
-
|
29
|
-
metadata.parse { |p| locate p }
|
30
|
-
|
31
|
-
metadata.flatten
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
def parser_for(metadata)
|
36
|
-
url = "#{metadata[:base_url]}#{metadata[:list_page]}"
|
37
|
-
page = nil
|
38
|
-
parser = nil
|
39
|
-
begin
|
40
|
-
if metadata[:document_format] == :html
|
41
|
-
page = @mechanize.get(url)
|
42
|
-
parser = page.parser
|
43
|
-
else
|
44
|
-
page = RestClient.get(url)
|
45
|
-
parser = Nokogiri::XML page
|
46
|
-
end
|
47
|
-
self.response_code = page.code.to_i if page.respond_to? :code
|
48
|
-
parser
|
49
|
-
rescue
|
50
|
-
if $!.respond_to? :http_code
|
51
|
-
self.response_code = $!.http_code.to_i
|
52
|
-
elsif $!.respond_to? :response_code
|
53
|
-
self.response_code = $!.response_code.to_i
|
54
|
-
end
|
55
|
-
raise $!
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
data/lib/wombat/property.rb
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
module Wombat
|
2
|
-
class Property
|
3
|
-
attr_accessor :name, :selector, :format, :namespaces, :callback, :result
|
4
|
-
|
5
|
-
def initialize(options)
|
6
|
-
@name = options[:name]
|
7
|
-
@selector = options[:selector]
|
8
|
-
@format = options[:format]
|
9
|
-
@namespaces = options[:namespaces]
|
10
|
-
@callback = options[:callback]
|
11
|
-
end
|
12
|
-
|
13
|
-
def flatten(depth = nil)
|
14
|
-
depth ? result[depth] : result
|
15
|
-
end
|
16
|
-
|
17
|
-
def reset
|
18
|
-
self.result = nil
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
@@ -1,70 +0,0 @@
|
|
1
|
-
#coding: utf-8
|
2
|
-
|
3
|
-
module Wombat
|
4
|
-
class PropertyContainer < Hash
|
5
|
-
attr_accessor :iterators
|
6
|
-
|
7
|
-
def initialize
|
8
|
-
@iterators = []
|
9
|
-
end
|
10
|
-
|
11
|
-
def method_missing(method, *args, &block)
|
12
|
-
if args.empty? && block
|
13
|
-
self["#{method.to_s}"] = PropertyContainer.new unless self["#{method.to_s}"]
|
14
|
-
block.call(self["#{method.to_s}"])
|
15
|
-
else
|
16
|
-
self[method.to_s] = Property.new(
|
17
|
-
name: method.to_s,
|
18
|
-
selector: args.first,
|
19
|
-
format: args[1],
|
20
|
-
namespaces: args[2],
|
21
|
-
callback: block)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def to_ary
|
26
|
-
end
|
27
|
-
|
28
|
-
def all_properties
|
29
|
-
values.flat_map { |v|
|
30
|
-
if v.kind_of? PropertyContainer
|
31
|
-
v.all_properties
|
32
|
-
elsif v.kind_of? Property
|
33
|
-
v
|
34
|
-
else
|
35
|
-
nil
|
36
|
-
end
|
37
|
-
}.compact
|
38
|
-
end
|
39
|
-
|
40
|
-
def parse
|
41
|
-
all_properties.each do |p|
|
42
|
-
result = yield p if block_given?
|
43
|
-
p.result = p.callback ? p.callback.call(result) : result
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def flatten(depth = nil)
|
48
|
-
properties = Hash.new.tap do |h|
|
49
|
-
keys.map do |k|
|
50
|
-
val = self[k]
|
51
|
-
if val.is_a?(PropertyContainer) || val.is_a?(Property)
|
52
|
-
h[k] = val.flatten depth
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
iters = iterators.reduce({}) do |memo, i|
|
58
|
-
memo.merge("iterator#{iterators.index(i)}" => i.flatten)
|
59
|
-
end
|
60
|
-
|
61
|
-
properties.merge iters
|
62
|
-
end
|
63
|
-
|
64
|
-
def for_each(selector)
|
65
|
-
Iterator.new(selector).tap do |i|
|
66
|
-
iterators << i
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
#coding: utf-8
|
2
|
-
require 'wombat/node_selector'
|
3
|
-
|
4
|
-
module Wombat
|
5
|
-
module PropertyLocator
|
6
|
-
include NodeSelector
|
7
|
-
|
8
|
-
def locate(property)
|
9
|
-
props = _locate property
|
10
|
-
property.format != :list ? props.first : props
|
11
|
-
end
|
12
|
-
|
13
|
-
private
|
14
|
-
def _locate(property)
|
15
|
-
result = select_nodes(property.selector, property.namespaces).to_a
|
16
|
-
result.map! {|r| r.inner_html.strip } if property.format == :html
|
17
|
-
result.map {|r| r.kind_of?(String) ? r : r.inner_text }.map(&:strip)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
data/spec/iterator_spec.rb
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::Iterator do
|
4
|
-
let(:it) { Wombat::Iterator.new "it_selector" }
|
5
|
-
|
6
|
-
context 'parse' do
|
7
|
-
it 'should iterate in for_each properties' do
|
8
|
-
it.prop_1 "some_selector"
|
9
|
-
it.prop_2 "another_selector"
|
10
|
-
|
11
|
-
it['prop_1'].should_receive(:result).twice.and_return([])
|
12
|
-
it['prop_2'].should_receive(:result).twice.and_return([])
|
13
|
-
|
14
|
-
parser = double :parser
|
15
|
-
parser.should_receive(:locate).with(it['prop_1']).twice
|
16
|
-
parser.should_receive(:locate).with(it['prop_2']).twice
|
17
|
-
|
18
|
-
it.parse { |p| parser.locate p }
|
19
|
-
it.parse { |p| parser.locate p }
|
20
|
-
end
|
21
|
-
|
22
|
-
it 'should raise if no block given' do
|
23
|
-
expect{
|
24
|
-
it.parse
|
25
|
-
}.to raise_error(ArgumentError)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
context 'reset' do
|
30
|
-
it 'should clean up properties results' do
|
31
|
-
it.prop_1 'some_selector'
|
32
|
-
it['prop_1'].result = [1, 2]
|
33
|
-
it.reset
|
34
|
-
it['prop_1'].result.should be_nil
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
it 'should flatten properties to plain hash format' do
|
39
|
-
it.prop_1 "some_selector"
|
40
|
-
it.prop_2 "another_selector"
|
41
|
-
|
42
|
-
it.parse {|p| }
|
43
|
-
it.parse {|p| }
|
44
|
-
it['prop_1'].result = ['result 1', 'result 2']
|
45
|
-
it['prop_2'].result = ['result 3', 'result 4']
|
46
|
-
|
47
|
-
it.flatten.should == [
|
48
|
-
{ "prop_1" => "result 1", "prop_2" => "result 3" },
|
49
|
-
{ "prop_1" => "result 2", "prop_2" => "result 4" }
|
50
|
-
]
|
51
|
-
end
|
52
|
-
end
|
data/spec/metadata_spec.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::Metadata do
|
4
|
-
before(:each) do
|
5
|
-
@metadata = Wombat::Metadata.new
|
6
|
-
end
|
7
|
-
|
8
|
-
it 'should not include non-properties in all properties list' do
|
9
|
-
@metadata.another_property "/some/selector", :text
|
10
|
-
@metadata.base_url "felipecsl.com"
|
11
|
-
@metadata.list_page "/yeah"
|
12
|
-
@metadata.all_properties.should == [@metadata['another_property']]
|
13
|
-
end
|
14
|
-
|
15
|
-
it 'should store iterators' do
|
16
|
-
@metadata.for_each("some_selector").kind_of?(Wombat::Iterator).should be_true
|
17
|
-
@metadata.iterators.size.should == 1
|
18
|
-
@metadata.iterators.first.selector.should == "some_selector"
|
19
|
-
end
|
20
|
-
end
|
data/spec/parser_spec.rb
DELETED
@@ -1,125 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::Parser do
|
4
|
-
before(:each) do
|
5
|
-
crawler = Class.new
|
6
|
-
crawler.send(:include, Wombat::Parser)
|
7
|
-
@parser = crawler.new
|
8
|
-
@metadata = Wombat::Metadata.new
|
9
|
-
end
|
10
|
-
|
11
|
-
it 'should request page document with correct url' do
|
12
|
-
@metadata.base_url "http://www.google.com"
|
13
|
-
@metadata.list_page "/search"
|
14
|
-
fake_document = double :document
|
15
|
-
fake_parser = double :parser
|
16
|
-
fake_document.should_receive(:parser).and_return(fake_parser)
|
17
|
-
@parser.mechanize.should_receive(:get).with("http://www.google.com/search").and_return fake_document
|
18
|
-
|
19
|
-
@parser.parse @metadata
|
20
|
-
end
|
21
|
-
|
22
|
-
it 'should send correct data to locate method' do
|
23
|
-
fake_document = double :document
|
24
|
-
fake_parser = double :parser
|
25
|
-
fake_document.should_receive(:parser).and_return(fake_parser)
|
26
|
-
@parser.mechanize.stub(:get).and_return fake_document
|
27
|
-
@parser.should_not_receive :locate
|
28
|
-
@parser.parse @metadata
|
29
|
-
end
|
30
|
-
|
31
|
-
it 'should invoke metadata callbacks' do
|
32
|
-
fake_document = double :document
|
33
|
-
fake_parser = double :parser
|
34
|
-
property = double :property
|
35
|
-
block_called = false
|
36
|
-
block = lambda { |p| block_called = true }
|
37
|
-
|
38
|
-
property.stub(:result)
|
39
|
-
fake_document.should_receive(:parser).and_return(fake_parser)
|
40
|
-
property.should_receive(:callback).twice.and_return(block)
|
41
|
-
property.should_receive(:result=).with(true)
|
42
|
-
|
43
|
-
@parser.mechanize.stub(:get).and_return fake_document
|
44
|
-
@metadata.stub(:all_properties).and_return [property]
|
45
|
-
@parser.should_receive(:locate).with(property)
|
46
|
-
|
47
|
-
@parser.parse @metadata
|
48
|
-
|
49
|
-
block_called.should be_true
|
50
|
-
end
|
51
|
-
|
52
|
-
it 'should invoke callback with parsed data' do
|
53
|
-
fake_document = double :document
|
54
|
-
fake_parser = double :parser
|
55
|
-
property = double :property
|
56
|
-
block_called = false
|
57
|
-
block = lambda { |p|
|
58
|
-
block_called = true
|
59
|
-
p.should == "blah"
|
60
|
-
}
|
61
|
-
|
62
|
-
fake_document.should_receive(:parser).and_return(fake_parser)
|
63
|
-
property.should_receive(:callback).twice.and_return(block)
|
64
|
-
property.should_receive(:result=).with(true)
|
65
|
-
|
66
|
-
@parser.mechanize.stub(:get).and_return fake_document
|
67
|
-
@metadata.stub(:all_properties).and_return [property]
|
68
|
-
@parser.should_receive(:locate).with(property).and_return("blah")
|
69
|
-
|
70
|
-
@parser.parse @metadata
|
71
|
-
|
72
|
-
block_called.should be_true
|
73
|
-
end
|
74
|
-
|
75
|
-
it 'should return hash with requested properties' do
|
76
|
-
hash = double :results
|
77
|
-
fake_parser = double :parser
|
78
|
-
fake_document = double :document
|
79
|
-
|
80
|
-
fake_document.should_receive(:parser).and_return fake_parser
|
81
|
-
@parser.mechanize.stub(:get).and_return fake_document
|
82
|
-
@metadata.should_receive(:flatten).and_return hash
|
83
|
-
|
84
|
-
@parser.parse(@metadata).should == hash
|
85
|
-
end
|
86
|
-
|
87
|
-
it 'should not include null results in iterated block' do
|
88
|
-
fake_parser = double :parser
|
89
|
-
fake_document = double :document
|
90
|
-
c1 = double :context
|
91
|
-
c2 = double :context
|
92
|
-
it = Wombat::Iterator.new "it_selector"
|
93
|
-
it.prop_1 "some_selector"
|
94
|
-
|
95
|
-
@parser.should_receive(:context=).ordered
|
96
|
-
@metadata.should_receive(:iterators).and_return [it]
|
97
|
-
@metadata.should_receive(:flatten)
|
98
|
-
fake_document.should_receive(:parser).and_return(fake_parser)
|
99
|
-
@parser.mechanize.stub(:get).and_return fake_document
|
100
|
-
@parser.should_receive(:select_nodes).with("it_selector").and_return [c1, c2]
|
101
|
-
@parser.should_receive(:context=).with(c1).ordered
|
102
|
-
@parser.should_receive(:context=).with(c2).ordered
|
103
|
-
@parser.should_receive(:context=).ordered
|
104
|
-
@parser.should_receive(:locate).with(it['prop_1']).and_return(12)
|
105
|
-
@parser.should_receive(:locate).with(it['prop_1']).and_return(nil)
|
106
|
-
@parser.stub(:locate)
|
107
|
-
|
108
|
-
@parser.parse(@metadata)
|
109
|
-
|
110
|
-
it["prop_1"].result.should == [12]
|
111
|
-
end
|
112
|
-
|
113
|
-
it 'should correctly parse xml documents' do
|
114
|
-
fake_document = double :xml
|
115
|
-
fake_parser = double :parser
|
116
|
-
@metadata.document_format :xml
|
117
|
-
@parser.mechanize.should_not_receive(:get)
|
118
|
-
RestClient.should_receive(:get).and_return fake_document
|
119
|
-
Nokogiri.should_receive(:XML).with(fake_document).and_return fake_parser
|
120
|
-
@parser.should_receive(:context=).with(fake_parser)
|
121
|
-
@parser.should_receive(:context=)
|
122
|
-
|
123
|
-
@parser.parse @metadata
|
124
|
-
end
|
125
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::PropertyContainer do
|
4
|
-
before(:each) do
|
5
|
-
@metadata = Wombat::PropertyContainer.new
|
6
|
-
end
|
7
|
-
|
8
|
-
it 'should return an array with all the metadata properties excluding iterators' do
|
9
|
-
@metadata["event"] = Wombat::PropertyContainer.new
|
10
|
-
@metadata["venue"] = Wombat::PropertyContainer.new
|
11
|
-
@metadata.another_property "/some/selector", :text
|
12
|
-
@metadata["event"]["something"] = Wombat::PropertyContainer.new
|
13
|
-
@metadata["event"]["something"].else "Wohooo"
|
14
|
-
@metadata["venue"].awesome "whooea"
|
15
|
-
it = Wombat::Iterator.new "it_selector"
|
16
|
-
it.felipe "lima"
|
17
|
-
@metadata.iterators << it
|
18
|
-
|
19
|
-
all_propes = @metadata.all_properties
|
20
|
-
|
21
|
-
all_propes.should =~ [
|
22
|
-
@metadata["another_property"],
|
23
|
-
@metadata["event"]["something"]["else"],
|
24
|
-
@metadata["venue"]["awesome"]
|
25
|
-
]
|
26
|
-
end
|
27
|
-
|
28
|
-
it 'should be able to change properties via all_properties' do
|
29
|
-
@metadata.another_property "/some/selector", :text
|
30
|
-
@metadata.all_properties.first.selector = "abc"
|
31
|
-
@metadata["another_property"].selector.should == "abc"
|
32
|
-
end
|
33
|
-
|
34
|
-
it 'should return metadata in plain hash format including iterators' do
|
35
|
-
@metadata.title "/some/selector"
|
36
|
-
@metadata["title"].result = "Gogobot Inc."
|
37
|
-
@metadata["holder"] = Wombat::PropertyContainer.new
|
38
|
-
@metadata["holder"].heading "css=.heading"
|
39
|
-
@metadata["holder"]["heading"].result = 123456
|
40
|
-
@metadata["holder"]["subheader"] = Wombat::PropertyContainer.new
|
41
|
-
@metadata["holder"]["subheader"].section "/blah"
|
42
|
-
@metadata["holder"]["subheader"]["section"].result = "Lorem Ipsum"
|
43
|
-
it = Wombat::Iterator.new "it_selector"
|
44
|
-
it.felipe "lima"
|
45
|
-
it["felipe"].result = ["correa", "de souza", "lima"]
|
46
|
-
@metadata.iterators = [it]
|
47
|
-
@metadata.footer("another thing", :html) { |a| true }
|
48
|
-
@metadata["footer"].result = "bla bla bla"
|
49
|
-
|
50
|
-
@metadata.flatten.should == {
|
51
|
-
"title" => "Gogobot Inc.",
|
52
|
-
"holder" => {
|
53
|
-
"heading" => 123456,
|
54
|
-
"subheader" => {
|
55
|
-
"section" => "Lorem Ipsum"
|
56
|
-
}
|
57
|
-
},
|
58
|
-
"iterator0"=>[{"felipe"=>"correa"}, {"felipe"=>"de souza"}, {"felipe"=>"lima"}],
|
59
|
-
"footer" => "bla bla bla"
|
60
|
-
}
|
61
|
-
end
|
62
|
-
end
|
@@ -1,75 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::PropertyLocator do
|
4
|
-
before(:each) do
|
5
|
-
@locator = Class.new
|
6
|
-
@locator.send(:include, Wombat::PropertyLocator)
|
7
|
-
@locator_instance = @locator.new
|
8
|
-
@metadata = Wombat::Metadata.new
|
9
|
-
@metadata["event"] = Wombat::PropertyContainer.new
|
10
|
-
@metadata["venue"] = Wombat::PropertyContainer.new
|
11
|
-
@metadata["location"] = Wombat::PropertyContainer.new
|
12
|
-
end
|
13
|
-
|
14
|
-
it 'should locate metadata properties' do
|
15
|
-
context = double :context
|
16
|
-
abc = double :abc
|
17
|
-
|
18
|
-
abc.stub(:inner_text).and_return("Something cool")
|
19
|
-
|
20
|
-
context.stub(:xpath).with("/abc", nil).and_return([abc])
|
21
|
-
context.stub(:xpath).with("/bah", nil).and_return(["abc"])
|
22
|
-
context.stub(:css).with("/ghi").and_return(["Another stuff"])
|
23
|
-
|
24
|
-
@metadata["event"].data1 "xpath=/abc"
|
25
|
-
@metadata["venue"].data2 :farms
|
26
|
-
@metadata["location"].data3 "css=/ghi"
|
27
|
-
@metadata.blah "xpath=/bah"
|
28
|
-
|
29
|
-
@locator_instance.stub(:context).and_return context
|
30
|
-
|
31
|
-
@metadata.all_properties.each { |p| p.result = @locator_instance.locate p }
|
32
|
-
|
33
|
-
@metadata["blah"].result.should == "abc"
|
34
|
-
@metadata["event"]["data1"].result.should == "Something cool"
|
35
|
-
@metadata["venue"]["data2"].result.should == "farms"
|
36
|
-
@metadata["location"]["data3"].result.should == "Another stuff"
|
37
|
-
end
|
38
|
-
|
39
|
-
it 'should support properties with html format' do
|
40
|
-
context = double :context
|
41
|
-
html_info = double :html_info
|
42
|
-
|
43
|
-
html_info.should_receive(:inner_html).and_return("some another info ")
|
44
|
-
context.should_receive(:xpath).with("/anotherData", nil).and_return([html_info])
|
45
|
-
|
46
|
-
@locator_instance.stub(:context).and_return context
|
47
|
-
|
48
|
-
@metadata["event"].another_info "xpath=/anotherData", :html
|
49
|
-
|
50
|
-
@metadata.all_properties.each { |p| p.result = @locator_instance.locate p }
|
51
|
-
|
52
|
-
@metadata["event"]["another_info"].result.should == "some another info"
|
53
|
-
end
|
54
|
-
|
55
|
-
it 'should trim property contents and use namespaces if present' do
|
56
|
-
context = double :context
|
57
|
-
context.should_receive(:xpath).with("/event/some/description", "blah").and_return([" awesome event "])
|
58
|
-
|
59
|
-
@locator_instance.stub(:context).and_return context
|
60
|
-
@metadata["event"].description "xpath=/event/some/description", :text, "blah"
|
61
|
-
|
62
|
-
@metadata.all_properties.each { |p| p.result = @locator_instance.locate p }
|
63
|
-
|
64
|
-
@metadata["event"]["description"].result.should == "awesome event"
|
65
|
-
end
|
66
|
-
|
67
|
-
it 'should return array of matching nodes for list properties' do
|
68
|
-
context = double :context
|
69
|
-
@metadata.list_prop "css=.selector", :list
|
70
|
-
@locator_instance.stub(:context).and_return context
|
71
|
-
@locator_instance.should_receive(:select_nodes).with("css=.selector", nil).and_return %w(1 2 3 4 5)
|
72
|
-
|
73
|
-
@locator_instance.locate(@metadata["list_prop"]).should == %w(1 2 3 4 5)
|
74
|
-
end
|
75
|
-
end
|
data/spec/property_spec.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wombat::Property do
|
4
|
-
it 'should store property data' do
|
5
|
-
property = Wombat::Property.new(
|
6
|
-
name: "title",
|
7
|
-
selector: "/some/selector",
|
8
|
-
format: :html,
|
9
|
-
callback: lambda {})
|
10
|
-
|
11
|
-
property.name.should == "title"
|
12
|
-
property.selector.should == "/some/selector"
|
13
|
-
property.format.should == :html
|
14
|
-
property.callback.should == lambda {}
|
15
|
-
end
|
16
|
-
end
|