wombat 2.4.0 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -0
- data/CHANGELOG.md +67 -0
- data/Gemfile.lock +52 -50
- data/README.md +4 -4
- data/VERSION +1 -1
- data/examples/iterator.rb +25 -25
- data/examples/list.rb +22 -22
- data/examples/xml.rb +15 -15
- data/fixtures/vcr_cassettes/make_post_request.yml +69 -0
- data/lib/wombat/crawler.rb +13 -7
- data/lib/wombat/dsl/metadata.rb +8 -0
- data/lib/wombat/processing/parser.rb +22 -7
- data/lib/wombat/property/locators/factory.rb +28 -28
- data/lib/wombat/property/locators/follow.rb +15 -38
- data/lib/wombat/property/locators/list.rb +13 -13
- data/lib/wombat.rb +8 -8
- data/spec/crawler_spec.rb +64 -42
- data/spec/integration/crawler_inheritance_spec.rb +63 -0
- data/spec/integration/integration_spec.rb +30 -0
- data/spec/processing/parser_spec.rb +35 -0
- data/spec/property/locators/factory_spec.rb +14 -14
- data/spec/property/locators/html_spec.rb +18 -18
- data/spec/property/locators/list_spec.rb +8 -8
- data/spec/property/locators/text_spec.rb +44 -44
- data/spec/wombat_spec.rb +36 -36
- data/wombat.gemspec +7 -3
- metadata +6 -2
@@ -1,16 +1,16 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Wombat::Property::Locators::Text do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
4
|
+
it 'should locate text property with xpath selector and namespaces' do
|
5
|
+
fake_elem = double :element
|
6
|
+
context = double :context
|
7
|
+
fake_elem.stub inner_text: "Something cool "
|
8
|
+
context.stub(:xpath).with("/abc", 'boom').and_return [fake_elem]
|
9
|
+
property = Wombat::DSL::Property.new('data1', 'xpath=/abc', :text, 'boom')
|
10
|
+
|
11
|
+
locator = Wombat::Property::Locators::Text.new(property)
|
12
|
+
|
13
|
+
locator.locate(context).should == { "data1" => "Something cool" }
|
14
14
|
end
|
15
15
|
|
16
16
|
it 'should locate text property with xpath selector using xpath functions' do
|
@@ -23,37 +23,37 @@ describe Wombat::Property::Locators::Text do
|
|
23
23
|
locator.locate(context).should == { "data1" => "Something" }
|
24
24
|
end
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
end
|
26
|
+
it 'should locate text property with css selector' do
|
27
|
+
fake_elem = double :element
|
28
|
+
context = double :context
|
29
|
+
fake_elem.stub inner_text: "My name"
|
30
|
+
context.stub(:css).with("/def").and_return [fake_elem]
|
31
|
+
property = Wombat::DSL::Property.new('data1', 'css=/def', :text)
|
32
|
+
|
33
|
+
locator = Wombat::Property::Locators::Text.new(property)
|
34
|
+
|
35
|
+
locator.locate(context).should == { "data1" => "My name" }
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'should return plain symbols as strings' do
|
39
|
+
fake_elem = double :element
|
40
|
+
context = double :context
|
41
|
+
property = Wombat::DSL::Property.new('data_2', :hardcoded_value, :text)
|
42
|
+
|
43
|
+
locator = Wombat::Property::Locators::Text.new(property)
|
44
|
+
|
45
|
+
locator.locate(context).should == { "data_2" => "hardcoded_value" }
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'should invoke property callback' do
|
49
|
+
fake_elem = double :element
|
50
|
+
context = double :context
|
51
|
+
fake_elem.stub inner_text: "My name"
|
52
|
+
context.stub(:css).with("/def").and_return [fake_elem]
|
53
|
+
property = Wombat::DSL::Property.new('data1', 'css=/def', :text) { |s| s.gsub(/name/, 'ass') }
|
54
|
+
|
55
|
+
locator = Wombat::Property::Locators::Text.new(property)
|
56
|
+
|
57
|
+
locator.locate(context).should == { "data1" => "My ass" }
|
58
|
+
end
|
59
|
+
end
|
data/spec/wombat_spec.rb
CHANGED
@@ -1,45 +1,45 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Wombat do
|
4
|
-
|
5
|
-
|
6
|
-
|
4
|
+
it 'should provide syntactic sugar method Wombat.crawl' do
|
5
|
+
Wombat.should respond_to(:crawl)
|
6
|
+
end
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
it 'should provide syntactic sugar method Wombat.scrape' do
|
9
|
+
Wombat.should respond_to(:scrape)
|
10
|
+
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
12
|
+
it 'should redirect .scrape to .crawl' do
|
13
|
+
fake_class = double :fake
|
14
|
+
fake_class.stub :include
|
15
|
+
fake_class.should_receive(:new).and_return(double(crawl: nil))
|
16
|
+
Class.stub :new => fake_class
|
17
|
+
Wombat.scrape
|
18
|
+
end
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
20
|
+
it 'should provide configuration method with block' do
|
21
|
+
Wombat.configure do |config|
|
22
|
+
config.set_proxy "10.0.0.1", 8080
|
23
|
+
config.set_user_agent "Wombat"
|
24
|
+
config.set_user_agent_alias 'Mac Safari'
|
25
|
+
end
|
26
|
+
Wombat.proxy_args.should == ["10.0.0.1", 8080]
|
27
|
+
Wombat.user_agent.should == 'Wombat'
|
28
|
+
Wombat.user_agent_alias.should == 'Mac Safari'
|
29
|
+
end
|
30
30
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
it 'should accept regular properties (non-selectors)' do
|
32
|
+
VCR.use_cassette('broken_selector') do
|
33
|
+
lambda {
|
34
|
+
Wombat.crawl do
|
35
|
+
base_url "http://www.github.com"
|
36
|
+
path "/"
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
38
|
+
source :obaoba
|
39
|
+
description 'Oba Oba'
|
40
|
+
website 'http://obaoba.com.br'
|
41
|
+
end
|
42
|
+
}.should_not raise_error
|
43
|
+
end
|
44
|
+
end
|
45
45
|
end
|
data/wombat.gemspec
CHANGED
@@ -2,19 +2,20 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: wombat 2.
|
5
|
+
# stub: wombat 2.5.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "wombat"
|
9
|
-
s.version = "2.
|
9
|
+
s.version = "2.5.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Felipe Lima"]
|
14
|
-
s.date = "
|
14
|
+
s.date = "2016-01-27"
|
15
15
|
s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
|
16
16
|
s.email = "felipe.lima@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
18
|
+
"CHANGELOG.md",
|
18
19
|
"LICENSE.txt",
|
19
20
|
"README.md"
|
20
21
|
]
|
@@ -22,6 +23,7 @@ Gem::Specification.new do |s|
|
|
22
23
|
".document",
|
23
24
|
".rspec",
|
24
25
|
".travis.yml",
|
26
|
+
"CHANGELOG.md",
|
25
27
|
"Gemfile",
|
26
28
|
"Gemfile.lock",
|
27
29
|
"Guardfile",
|
@@ -41,6 +43,7 @@ Gem::Specification.new do |s|
|
|
41
43
|
"fixtures/vcr_cassettes/follow_relative_links.yml",
|
42
44
|
"fixtures/vcr_cassettes/for_each_page.yml",
|
43
45
|
"fixtures/vcr_cassettes/headers_selector.yml",
|
46
|
+
"fixtures/vcr_cassettes/make_post_request.yml",
|
44
47
|
"fixtures/vcr_cassettes/xml_with_namespace.yml",
|
45
48
|
"lib/wombat.rb",
|
46
49
|
"lib/wombat/crawler.rb",
|
@@ -64,6 +67,7 @@ Gem::Specification.new do |s|
|
|
64
67
|
"spec/crawler_spec.rb",
|
65
68
|
"spec/dsl/property_spec.rb",
|
66
69
|
"spec/helpers/sample_crawler.rb",
|
70
|
+
"spec/integration/crawler_inheritance_spec.rb",
|
67
71
|
"spec/integration/follow_relative_links_spec.rb",
|
68
72
|
"spec/integration/integration_spec.rb",
|
69
73
|
"spec/processing/parser_spec.rb",
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wombat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Felipe Lima
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -169,12 +169,14 @@ email: felipe.lima@gmail.com
|
|
169
169
|
executables: []
|
170
170
|
extensions: []
|
171
171
|
extra_rdoc_files:
|
172
|
+
- CHANGELOG.md
|
172
173
|
- LICENSE.txt
|
173
174
|
- README.md
|
174
175
|
files:
|
175
176
|
- ".document"
|
176
177
|
- ".rspec"
|
177
178
|
- ".travis.yml"
|
179
|
+
- CHANGELOG.md
|
178
180
|
- Gemfile
|
179
181
|
- Gemfile.lock
|
180
182
|
- Guardfile
|
@@ -194,6 +196,7 @@ files:
|
|
194
196
|
- fixtures/vcr_cassettes/follow_relative_links.yml
|
195
197
|
- fixtures/vcr_cassettes/for_each_page.yml
|
196
198
|
- fixtures/vcr_cassettes/headers_selector.yml
|
199
|
+
- fixtures/vcr_cassettes/make_post_request.yml
|
197
200
|
- fixtures/vcr_cassettes/xml_with_namespace.yml
|
198
201
|
- lib/wombat.rb
|
199
202
|
- lib/wombat/crawler.rb
|
@@ -217,6 +220,7 @@ files:
|
|
217
220
|
- spec/crawler_spec.rb
|
218
221
|
- spec/dsl/property_spec.rb
|
219
222
|
- spec/helpers/sample_crawler.rb
|
223
|
+
- spec/integration/crawler_inheritance_spec.rb
|
220
224
|
- spec/integration/follow_relative_links_spec.rb
|
221
225
|
- spec/integration/integration_spec.rb
|
222
226
|
- spec/processing/parser_spec.rb
|