wombat 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +49 -47
- data/README.md +9 -12
- data/VERSION +1 -1
- data/lib/wombat.rb +10 -2
- data/lib/wombat/processing/parser.rb +2 -0
- data/spec/wombat_spec.rb +5 -1
- data/wombat.gemspec +4 -4
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d495fd45c7c451739a77c6f9474b2eafd5b8324a
|
|
4
|
+
data.tar.gz: d665211fe3918cdf30b46c5baba237b6ba12d788
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7245007b0fddde801a764d1777a041a25e3453972edbc9bcf710876804bd5d4b23f24a5c2b477798ad4886f660b28591f230c544b1e5c3ee90a859bf855eafb9
|
|
7
|
+
data.tar.gz: 442634497971982fb0142540d6cd2ddc7b2224e1f78ed032eae448d72a2db8e73ac88d6841965c6d6d4c4350c2ea274bc2f6c377dfe0f9021ab238af159e0340
|
data/Gemfile.lock
CHANGED
|
@@ -1,31 +1,31 @@
|
|
|
1
1
|
GEM
|
|
2
2
|
remote: http://rubygems.org/
|
|
3
3
|
specs:
|
|
4
|
-
activesupport (4.
|
|
5
|
-
i18n (~> 0.
|
|
4
|
+
activesupport (4.2.0)
|
|
5
|
+
i18n (~> 0.7)
|
|
6
6
|
json (~> 1.7, >= 1.7.7)
|
|
7
7
|
minitest (~> 5.1)
|
|
8
|
-
thread_safe (~> 0.
|
|
8
|
+
thread_safe (~> 0.3, >= 0.3.4)
|
|
9
9
|
tzinfo (~> 1.1)
|
|
10
|
-
addressable (2.3.
|
|
10
|
+
addressable (2.3.7)
|
|
11
11
|
builder (3.2.2)
|
|
12
|
-
coveralls (0.7.
|
|
13
|
-
multi_json (~> 1.
|
|
14
|
-
rest-client (
|
|
15
|
-
simplecov (
|
|
16
|
-
term-ansicolor (
|
|
17
|
-
thor (
|
|
12
|
+
coveralls (0.7.11)
|
|
13
|
+
multi_json (~> 1.10)
|
|
14
|
+
rest-client (>= 1.6.8, < 2)
|
|
15
|
+
simplecov (~> 0.9.1)
|
|
16
|
+
term-ansicolor (~> 1.3)
|
|
17
|
+
thor (~> 0.19.1)
|
|
18
18
|
descendants_tracker (0.0.4)
|
|
19
19
|
thread_safe (~> 0.3, >= 0.3.1)
|
|
20
20
|
diff-lcs (1.2.5)
|
|
21
21
|
docile (1.1.5)
|
|
22
|
-
domain_name (0.5.
|
|
22
|
+
domain_name (0.5.23)
|
|
23
23
|
unf (>= 0.0.5, < 1.0.0)
|
|
24
24
|
fakeweb (1.3.0)
|
|
25
|
-
faraday (0.9.
|
|
25
|
+
faraday (0.9.1)
|
|
26
26
|
multipart-post (>= 1.2, < 3)
|
|
27
|
-
git (1.2.
|
|
28
|
-
github_api (0.12.
|
|
27
|
+
git (1.2.9.1)
|
|
28
|
+
github_api (0.12.3)
|
|
29
29
|
addressable (~> 2.3)
|
|
30
30
|
descendants_tracker (~> 0.0.4)
|
|
31
31
|
faraday (~> 0.8, < 0.10)
|
|
@@ -33,11 +33,11 @@ GEM
|
|
|
33
33
|
multi_json (>= 1.7.5, < 2.0)
|
|
34
34
|
nokogiri (~> 1.6.3)
|
|
35
35
|
oauth2
|
|
36
|
-
hashie (3.
|
|
37
|
-
highline (1.
|
|
36
|
+
hashie (3.4.0)
|
|
37
|
+
highline (1.7.1)
|
|
38
38
|
http-cookie (1.0.2)
|
|
39
39
|
domain_name (~> 0.5)
|
|
40
|
-
i18n (0.
|
|
40
|
+
i18n (0.7.0)
|
|
41
41
|
jeweler (2.0.1)
|
|
42
42
|
builder
|
|
43
43
|
bundler (>= 1.0)
|
|
@@ -47,8 +47,8 @@ GEM
|
|
|
47
47
|
nokogiri (>= 1.5.10)
|
|
48
48
|
rake
|
|
49
49
|
rdoc
|
|
50
|
-
json (1.8.
|
|
51
|
-
jwt (1.
|
|
50
|
+
json (1.8.2)
|
|
51
|
+
jwt (1.3.0)
|
|
52
52
|
mechanize (2.7.3)
|
|
53
53
|
domain_name (~> 0.5, >= 0.5.1)
|
|
54
54
|
http-cookie (~> 1.0)
|
|
@@ -59,14 +59,15 @@ GEM
|
|
|
59
59
|
ntlm-http (~> 0.1, >= 0.1.1)
|
|
60
60
|
webrobots (>= 0.0.9, < 0.2)
|
|
61
61
|
mime-types (2.4.3)
|
|
62
|
-
mini_portile (0.6.
|
|
63
|
-
minitest (5.
|
|
64
|
-
multi_json (1.
|
|
62
|
+
mini_portile (0.6.2)
|
|
63
|
+
minitest (5.5.1)
|
|
64
|
+
multi_json (1.11.0)
|
|
65
65
|
multi_xml (0.5.5)
|
|
66
66
|
multipart-post (2.0.0)
|
|
67
67
|
net-http-digest_auth (1.4)
|
|
68
68
|
net-http-persistent (2.9.4)
|
|
69
|
-
|
|
69
|
+
netrc (0.10.3)
|
|
70
|
+
nokogiri (1.6.6.2)
|
|
70
71
|
mini_portile (~> 0.6.0)
|
|
71
72
|
ntlm-http (0.1.1)
|
|
72
73
|
oauth2 (1.0.0)
|
|
@@ -75,34 +76,35 @@ GEM
|
|
|
75
76
|
multi_json (~> 1.3)
|
|
76
77
|
multi_xml (~> 0.5)
|
|
77
78
|
rack (~> 1.2)
|
|
78
|
-
rack (1.
|
|
79
|
-
rake (10.4.
|
|
80
|
-
rdoc (4.
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
rspec (3.
|
|
85
|
-
rspec-core (~> 3.
|
|
86
|
-
rspec-expectations (~> 3.
|
|
87
|
-
rspec-mocks (~> 3.
|
|
88
|
-
rspec-core (3.1
|
|
89
|
-
rspec-support (~> 3.
|
|
90
|
-
rspec-expectations (3.
|
|
79
|
+
rack (1.6.0)
|
|
80
|
+
rake (10.4.2)
|
|
81
|
+
rdoc (4.2.0)
|
|
82
|
+
rest-client (1.7.3)
|
|
83
|
+
mime-types (>= 1.16, < 3.0)
|
|
84
|
+
netrc (~> 0.7)
|
|
85
|
+
rspec (3.2.0)
|
|
86
|
+
rspec-core (~> 3.2.0)
|
|
87
|
+
rspec-expectations (~> 3.2.0)
|
|
88
|
+
rspec-mocks (~> 3.2.0)
|
|
89
|
+
rspec-core (3.2.1)
|
|
90
|
+
rspec-support (~> 3.2.0)
|
|
91
|
+
rspec-expectations (3.2.0)
|
|
92
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
93
|
+
rspec-support (~> 3.2.0)
|
|
94
|
+
rspec-mocks (3.2.1)
|
|
91
95
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
92
|
-
rspec-support (~> 3.
|
|
93
|
-
rspec-
|
|
94
|
-
|
|
95
|
-
rspec-support (3.1.2)
|
|
96
|
-
simplecov (0.9.1)
|
|
96
|
+
rspec-support (~> 3.2.0)
|
|
97
|
+
rspec-support (3.2.2)
|
|
98
|
+
simplecov (0.9.2)
|
|
97
99
|
docile (~> 1.1.0)
|
|
98
100
|
multi_json (~> 1.0)
|
|
99
|
-
simplecov-html (~> 0.
|
|
100
|
-
simplecov-html (0.
|
|
101
|
-
term-ansicolor (1.
|
|
102
|
-
tins (~> 0
|
|
103
|
-
thor (0.
|
|
101
|
+
simplecov-html (~> 0.9.0)
|
|
102
|
+
simplecov-html (0.9.0)
|
|
103
|
+
term-ansicolor (1.3.0)
|
|
104
|
+
tins (~> 1.0)
|
|
105
|
+
thor (0.19.1)
|
|
104
106
|
thread_safe (0.3.4)
|
|
105
|
-
tins (
|
|
107
|
+
tins (1.3.5)
|
|
106
108
|
tzinfo (1.2.2)
|
|
107
109
|
thread_safe (~> 0.1)
|
|
108
110
|
unf (0.1.4)
|
data/README.md
CHANGED
|
@@ -14,8 +14,6 @@ Web scraper with an elegant DSL that parses structured data from web pages.
|
|
|
14
14
|
|
|
15
15
|
``gem install wombat``
|
|
16
16
|
|
|
17
|
-
Obs: Requires ruby 1.9.3 (activesupport requires Ruby version >= 1.9.3)
|
|
18
|
-
|
|
19
17
|
## Scraping a page:
|
|
20
18
|
|
|
21
19
|
The simplest way to use Wombat is by calling ``Wombat.crawl`` and passing it a block:
|
|
@@ -48,18 +46,18 @@ end
|
|
|
48
46
|
|
|
49
47
|
```ruby
|
|
50
48
|
{
|
|
51
|
-
"headline"=>"Build software better, together.",
|
|
52
|
-
"subheading"=>"Powerful collaboration, code review, and code management for open source and private projects. Need private repositories? Upgraded plans start at $7/mo.",
|
|
49
|
+
"headline"=>"Build software better, together.",
|
|
50
|
+
"subheading"=>"Powerful collaboration, code review, and code management for open source and private projects. Need private repositories? Upgraded plans start at $7/mo.",
|
|
53
51
|
"what_is"=>[
|
|
54
|
-
"Great collaboration starts with communication.",
|
|
55
|
-
"Friction-less development across teams.",
|
|
56
|
-
"World's largest open source community.",
|
|
52
|
+
"Great collaboration starts with communication.",
|
|
53
|
+
"Friction-less development across teams.",
|
|
54
|
+
"World's largest open source community.",
|
|
57
55
|
"Do more with powerful integrations."
|
|
58
|
-
],
|
|
56
|
+
],
|
|
59
57
|
"links"=>{
|
|
60
|
-
"explore"=>"Love",
|
|
61
|
-
"features"=>"Features",
|
|
62
|
-
"enterprise"=>"Enterprise",
|
|
58
|
+
"explore"=>"Love",
|
|
59
|
+
"features"=>"Features",
|
|
60
|
+
"enterprise"=>"Enterprise",
|
|
63
61
|
"blog"=>"Blog"
|
|
64
62
|
}
|
|
65
63
|
}
|
|
@@ -89,4 +87,3 @@ end
|
|
|
89
87
|
## Copyright
|
|
90
88
|
|
|
91
89
|
Copyright (c) 2012 Felipe Lima. See LICENSE.txt for further details.
|
|
92
|
-
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.
|
|
1
|
+
2.4.0
|
data/lib/wombat.rb
CHANGED
|
@@ -5,7 +5,7 @@ require 'wombat/crawler'
|
|
|
5
5
|
module Wombat
|
|
6
6
|
class << self
|
|
7
7
|
|
|
8
|
-
attr_reader :proxy_args
|
|
8
|
+
attr_reader :proxy_args, :user_agent, :user_agent_alias
|
|
9
9
|
|
|
10
10
|
def crawl(&block)
|
|
11
11
|
klass = Class.new
|
|
@@ -21,6 +21,14 @@ module Wombat
|
|
|
21
21
|
@proxy_args = args
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
+
def set_user_agent(user_agent)
|
|
25
|
+
@user_agent = user_agent
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def set_user_agent_alias(user_agent_alias)
|
|
29
|
+
@user_agent_alias = user_agent_alias
|
|
30
|
+
end
|
|
31
|
+
|
|
24
32
|
alias_method :scrape, :crawl
|
|
25
33
|
end
|
|
26
|
-
end
|
|
34
|
+
end
|
|
@@ -27,6 +27,8 @@ module Wombat
|
|
|
27
27
|
}
|
|
28
28
|
}
|
|
29
29
|
@mechanize.set_proxy(*Wombat.proxy_args) if Wombat.proxy_args
|
|
30
|
+
@mechanize.user_agent = Wombat.user_agent if Wombat.user_agent
|
|
31
|
+
@mechanize.user_agent_alias = Wombat.user_agent_alias if Wombat.user_agent_alias
|
|
30
32
|
end
|
|
31
33
|
|
|
32
34
|
def parse(metadata)
|
data/spec/wombat_spec.rb
CHANGED
|
@@ -20,8 +20,12 @@ describe Wombat do
|
|
|
20
20
|
it 'should provide configuration method with block' do
|
|
21
21
|
Wombat.configure do |config|
|
|
22
22
|
config.set_proxy "10.0.0.1", 8080
|
|
23
|
+
config.set_user_agent "Wombat"
|
|
24
|
+
config.set_user_agent_alias 'Mac Safari'
|
|
23
25
|
end
|
|
24
26
|
Wombat.proxy_args.should == ["10.0.0.1", 8080]
|
|
27
|
+
Wombat.user_agent.should == 'Wombat'
|
|
28
|
+
Wombat.user_agent_alias.should == 'Mac Safari'
|
|
25
29
|
end
|
|
26
30
|
|
|
27
31
|
it 'should accept regular properties (non-selectors)' do
|
|
@@ -38,4 +42,4 @@ describe Wombat do
|
|
|
38
42
|
}.should_not raise_error
|
|
39
43
|
end
|
|
40
44
|
end
|
|
41
|
-
end
|
|
45
|
+
end
|
data/wombat.gemspec
CHANGED
|
@@ -2,16 +2,16 @@
|
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
|
5
|
-
# stub: wombat 2.
|
|
5
|
+
# stub: wombat 2.4.0 ruby lib
|
|
6
6
|
|
|
7
7
|
Gem::Specification.new do |s|
|
|
8
8
|
s.name = "wombat"
|
|
9
|
-
s.version = "2.
|
|
9
|
+
s.version = "2.4.0"
|
|
10
10
|
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
12
12
|
s.require_paths = ["lib"]
|
|
13
13
|
s.authors = ["Felipe Lima"]
|
|
14
|
-
s.date = "
|
|
14
|
+
s.date = "2015-03-05"
|
|
15
15
|
s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
|
|
16
16
|
s.email = "felipe.lima@gmail.com"
|
|
17
17
|
s.extra_rdoc_files = [
|
|
@@ -82,7 +82,7 @@ Gem::Specification.new do |s|
|
|
|
82
82
|
s.homepage = "http://felipecsl.github.com/wombat"
|
|
83
83
|
s.licenses = ["MIT"]
|
|
84
84
|
s.required_ruby_version = Gem::Requirement.new(">= 1.9")
|
|
85
|
-
s.rubygems_version = "2.
|
|
85
|
+
s.rubygems_version = "2.4.6"
|
|
86
86
|
s.summary = "Ruby DSL to scrape web pages"
|
|
87
87
|
|
|
88
88
|
if s.respond_to? :specification_version then
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: wombat
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Felipe Lima
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2015-03-05 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: mechanize
|
|
@@ -251,7 +251,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
251
251
|
version: '0'
|
|
252
252
|
requirements: []
|
|
253
253
|
rubyforge_project:
|
|
254
|
-
rubygems_version: 2.
|
|
254
|
+
rubygems_version: 2.4.6
|
|
255
255
|
signing_key:
|
|
256
256
|
specification_version: 4
|
|
257
257
|
summary: Ruby DSL to scrape web pages
|