wombat 2.3.0 → 2.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +49 -47
- data/README.md +9 -12
- data/VERSION +1 -1
- data/lib/wombat.rb +10 -2
- data/lib/wombat/processing/parser.rb +2 -0
- data/spec/wombat_spec.rb +5 -1
- data/wombat.gemspec +4 -4
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d495fd45c7c451739a77c6f9474b2eafd5b8324a
|
4
|
+
data.tar.gz: d665211fe3918cdf30b46c5baba237b6ba12d788
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7245007b0fddde801a764d1777a041a25e3453972edbc9bcf710876804bd5d4b23f24a5c2b477798ad4886f660b28591f230c544b1e5c3ee90a859bf855eafb9
|
7
|
+
data.tar.gz: 442634497971982fb0142540d6cd2ddc7b2224e1f78ed032eae448d72a2db8e73ac88d6841965c6d6d4c4350c2ea274bc2f6c377dfe0f9021ab238af159e0340
|
data/Gemfile.lock
CHANGED
@@ -1,31 +1,31 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
activesupport (4.
|
5
|
-
i18n (~> 0.
|
4
|
+
activesupport (4.2.0)
|
5
|
+
i18n (~> 0.7)
|
6
6
|
json (~> 1.7, >= 1.7.7)
|
7
7
|
minitest (~> 5.1)
|
8
|
-
thread_safe (~> 0.
|
8
|
+
thread_safe (~> 0.3, >= 0.3.4)
|
9
9
|
tzinfo (~> 1.1)
|
10
|
-
addressable (2.3.
|
10
|
+
addressable (2.3.7)
|
11
11
|
builder (3.2.2)
|
12
|
-
coveralls (0.7.
|
13
|
-
multi_json (~> 1.
|
14
|
-
rest-client (
|
15
|
-
simplecov (
|
16
|
-
term-ansicolor (
|
17
|
-
thor (
|
12
|
+
coveralls (0.7.11)
|
13
|
+
multi_json (~> 1.10)
|
14
|
+
rest-client (>= 1.6.8, < 2)
|
15
|
+
simplecov (~> 0.9.1)
|
16
|
+
term-ansicolor (~> 1.3)
|
17
|
+
thor (~> 0.19.1)
|
18
18
|
descendants_tracker (0.0.4)
|
19
19
|
thread_safe (~> 0.3, >= 0.3.1)
|
20
20
|
diff-lcs (1.2.5)
|
21
21
|
docile (1.1.5)
|
22
|
-
domain_name (0.5.
|
22
|
+
domain_name (0.5.23)
|
23
23
|
unf (>= 0.0.5, < 1.0.0)
|
24
24
|
fakeweb (1.3.0)
|
25
|
-
faraday (0.9.
|
25
|
+
faraday (0.9.1)
|
26
26
|
multipart-post (>= 1.2, < 3)
|
27
|
-
git (1.2.
|
28
|
-
github_api (0.12.
|
27
|
+
git (1.2.9.1)
|
28
|
+
github_api (0.12.3)
|
29
29
|
addressable (~> 2.3)
|
30
30
|
descendants_tracker (~> 0.0.4)
|
31
31
|
faraday (~> 0.8, < 0.10)
|
@@ -33,11 +33,11 @@ GEM
|
|
33
33
|
multi_json (>= 1.7.5, < 2.0)
|
34
34
|
nokogiri (~> 1.6.3)
|
35
35
|
oauth2
|
36
|
-
hashie (3.
|
37
|
-
highline (1.
|
36
|
+
hashie (3.4.0)
|
37
|
+
highline (1.7.1)
|
38
38
|
http-cookie (1.0.2)
|
39
39
|
domain_name (~> 0.5)
|
40
|
-
i18n (0.
|
40
|
+
i18n (0.7.0)
|
41
41
|
jeweler (2.0.1)
|
42
42
|
builder
|
43
43
|
bundler (>= 1.0)
|
@@ -47,8 +47,8 @@ GEM
|
|
47
47
|
nokogiri (>= 1.5.10)
|
48
48
|
rake
|
49
49
|
rdoc
|
50
|
-
json (1.8.
|
51
|
-
jwt (1.
|
50
|
+
json (1.8.2)
|
51
|
+
jwt (1.3.0)
|
52
52
|
mechanize (2.7.3)
|
53
53
|
domain_name (~> 0.5, >= 0.5.1)
|
54
54
|
http-cookie (~> 1.0)
|
@@ -59,14 +59,15 @@ GEM
|
|
59
59
|
ntlm-http (~> 0.1, >= 0.1.1)
|
60
60
|
webrobots (>= 0.0.9, < 0.2)
|
61
61
|
mime-types (2.4.3)
|
62
|
-
mini_portile (0.6.
|
63
|
-
minitest (5.
|
64
|
-
multi_json (1.
|
62
|
+
mini_portile (0.6.2)
|
63
|
+
minitest (5.5.1)
|
64
|
+
multi_json (1.11.0)
|
65
65
|
multi_xml (0.5.5)
|
66
66
|
multipart-post (2.0.0)
|
67
67
|
net-http-digest_auth (1.4)
|
68
68
|
net-http-persistent (2.9.4)
|
69
|
-
|
69
|
+
netrc (0.10.3)
|
70
|
+
nokogiri (1.6.6.2)
|
70
71
|
mini_portile (~> 0.6.0)
|
71
72
|
ntlm-http (0.1.1)
|
72
73
|
oauth2 (1.0.0)
|
@@ -75,34 +76,35 @@ GEM
|
|
75
76
|
multi_json (~> 1.3)
|
76
77
|
multi_xml (~> 0.5)
|
77
78
|
rack (~> 1.2)
|
78
|
-
rack (1.
|
79
|
-
rake (10.4.
|
80
|
-
rdoc (4.
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
rspec (3.
|
85
|
-
rspec-core (~> 3.
|
86
|
-
rspec-expectations (~> 3.
|
87
|
-
rspec-mocks (~> 3.
|
88
|
-
rspec-core (3.1
|
89
|
-
rspec-support (~> 3.
|
90
|
-
rspec-expectations (3.
|
79
|
+
rack (1.6.0)
|
80
|
+
rake (10.4.2)
|
81
|
+
rdoc (4.2.0)
|
82
|
+
rest-client (1.7.3)
|
83
|
+
mime-types (>= 1.16, < 3.0)
|
84
|
+
netrc (~> 0.7)
|
85
|
+
rspec (3.2.0)
|
86
|
+
rspec-core (~> 3.2.0)
|
87
|
+
rspec-expectations (~> 3.2.0)
|
88
|
+
rspec-mocks (~> 3.2.0)
|
89
|
+
rspec-core (3.2.1)
|
90
|
+
rspec-support (~> 3.2.0)
|
91
|
+
rspec-expectations (3.2.0)
|
92
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
93
|
+
rspec-support (~> 3.2.0)
|
94
|
+
rspec-mocks (3.2.1)
|
91
95
|
diff-lcs (>= 1.2.0, < 2.0)
|
92
|
-
rspec-support (~> 3.
|
93
|
-
rspec-
|
94
|
-
|
95
|
-
rspec-support (3.1.2)
|
96
|
-
simplecov (0.9.1)
|
96
|
+
rspec-support (~> 3.2.0)
|
97
|
+
rspec-support (3.2.2)
|
98
|
+
simplecov (0.9.2)
|
97
99
|
docile (~> 1.1.0)
|
98
100
|
multi_json (~> 1.0)
|
99
|
-
simplecov-html (~> 0.
|
100
|
-
simplecov-html (0.
|
101
|
-
term-ansicolor (1.
|
102
|
-
tins (~> 0
|
103
|
-
thor (0.
|
101
|
+
simplecov-html (~> 0.9.0)
|
102
|
+
simplecov-html (0.9.0)
|
103
|
+
term-ansicolor (1.3.0)
|
104
|
+
tins (~> 1.0)
|
105
|
+
thor (0.19.1)
|
104
106
|
thread_safe (0.3.4)
|
105
|
-
tins (
|
107
|
+
tins (1.3.5)
|
106
108
|
tzinfo (1.2.2)
|
107
109
|
thread_safe (~> 0.1)
|
108
110
|
unf (0.1.4)
|
data/README.md
CHANGED
@@ -14,8 +14,6 @@ Web scraper with an elegant DSL that parses structured data from web pages.
|
|
14
14
|
|
15
15
|
``gem install wombat``
|
16
16
|
|
17
|
-
Obs: Requires ruby 1.9.3 (activesupport requires Ruby version >= 1.9.3)
|
18
|
-
|
19
17
|
## Scraping a page:
|
20
18
|
|
21
19
|
The simplest way to use Wombat is by calling ``Wombat.crawl`` and passing it a block:
|
@@ -48,18 +46,18 @@ end
|
|
48
46
|
|
49
47
|
```ruby
|
50
48
|
{
|
51
|
-
"headline"=>"Build software better, together.",
|
52
|
-
"subheading"=>"Powerful collaboration, code review, and code management for open source and private projects. Need private repositories? Upgraded plans start at $7/mo.",
|
49
|
+
"headline"=>"Build software better, together.",
|
50
|
+
"subheading"=>"Powerful collaboration, code review, and code management for open source and private projects. Need private repositories? Upgraded plans start at $7/mo.",
|
53
51
|
"what_is"=>[
|
54
|
-
"Great collaboration starts with communication.",
|
55
|
-
"Friction-less development across teams.",
|
56
|
-
"World's largest open source community.",
|
52
|
+
"Great collaboration starts with communication.",
|
53
|
+
"Friction-less development across teams.",
|
54
|
+
"World's largest open source community.",
|
57
55
|
"Do more with powerful integrations."
|
58
|
-
],
|
56
|
+
],
|
59
57
|
"links"=>{
|
60
|
-
"explore"=>"Love",
|
61
|
-
"features"=>"Features",
|
62
|
-
"enterprise"=>"Enterprise",
|
58
|
+
"explore"=>"Love",
|
59
|
+
"features"=>"Features",
|
60
|
+
"enterprise"=>"Enterprise",
|
63
61
|
"blog"=>"Blog"
|
64
62
|
}
|
65
63
|
}
|
@@ -89,4 +87,3 @@ end
|
|
89
87
|
## Copyright
|
90
88
|
|
91
89
|
Copyright (c) 2012 Felipe Lima. See LICENSE.txt for further details.
|
92
|
-
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.4.0
|
data/lib/wombat.rb
CHANGED
@@ -5,7 +5,7 @@ require 'wombat/crawler'
|
|
5
5
|
module Wombat
|
6
6
|
class << self
|
7
7
|
|
8
|
-
attr_reader :proxy_args
|
8
|
+
attr_reader :proxy_args, :user_agent, :user_agent_alias
|
9
9
|
|
10
10
|
def crawl(&block)
|
11
11
|
klass = Class.new
|
@@ -21,6 +21,14 @@ module Wombat
|
|
21
21
|
@proxy_args = args
|
22
22
|
end
|
23
23
|
|
24
|
+
def set_user_agent(user_agent)
|
25
|
+
@user_agent = user_agent
|
26
|
+
end
|
27
|
+
|
28
|
+
def set_user_agent_alias(user_agent_alias)
|
29
|
+
@user_agent_alias = user_agent_alias
|
30
|
+
end
|
31
|
+
|
24
32
|
alias_method :scrape, :crawl
|
25
33
|
end
|
26
|
-
end
|
34
|
+
end
|
@@ -27,6 +27,8 @@ module Wombat
|
|
27
27
|
}
|
28
28
|
}
|
29
29
|
@mechanize.set_proxy(*Wombat.proxy_args) if Wombat.proxy_args
|
30
|
+
@mechanize.user_agent = Wombat.user_agent if Wombat.user_agent
|
31
|
+
@mechanize.user_agent_alias = Wombat.user_agent_alias if Wombat.user_agent_alias
|
30
32
|
end
|
31
33
|
|
32
34
|
def parse(metadata)
|
data/spec/wombat_spec.rb
CHANGED
@@ -20,8 +20,12 @@ describe Wombat do
|
|
20
20
|
it 'should provide configuration method with block' do
|
21
21
|
Wombat.configure do |config|
|
22
22
|
config.set_proxy "10.0.0.1", 8080
|
23
|
+
config.set_user_agent "Wombat"
|
24
|
+
config.set_user_agent_alias 'Mac Safari'
|
23
25
|
end
|
24
26
|
Wombat.proxy_args.should == ["10.0.0.1", 8080]
|
27
|
+
Wombat.user_agent.should == 'Wombat'
|
28
|
+
Wombat.user_agent_alias.should == 'Mac Safari'
|
25
29
|
end
|
26
30
|
|
27
31
|
it 'should accept regular properties (non-selectors)' do
|
@@ -38,4 +42,4 @@ describe Wombat do
|
|
38
42
|
}.should_not raise_error
|
39
43
|
end
|
40
44
|
end
|
41
|
-
end
|
45
|
+
end
|
data/wombat.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: wombat 2.
|
5
|
+
# stub: wombat 2.4.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "wombat"
|
9
|
-
s.version = "2.
|
9
|
+
s.version = "2.4.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Felipe Lima"]
|
14
|
-
s.date = "
|
14
|
+
s.date = "2015-03-05"
|
15
15
|
s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
|
16
16
|
s.email = "felipe.lima@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -82,7 +82,7 @@ Gem::Specification.new do |s|
|
|
82
82
|
s.homepage = "http://felipecsl.github.com/wombat"
|
83
83
|
s.licenses = ["MIT"]
|
84
84
|
s.required_ruby_version = Gem::Requirement.new(">= 1.9")
|
85
|
-
s.rubygems_version = "2.
|
85
|
+
s.rubygems_version = "2.4.6"
|
86
86
|
s.summary = "Ruby DSL to scrape web pages"
|
87
87
|
|
88
88
|
if s.respond_to? :specification_version then
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wombat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Felipe Lima
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -251,7 +251,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
251
251
|
version: '0'
|
252
252
|
requirements: []
|
253
253
|
rubyforge_project:
|
254
|
-
rubygems_version: 2.
|
254
|
+
rubygems_version: 2.4.6
|
255
255
|
signing_key:
|
256
256
|
specification_version: 4
|
257
257
|
summary: Ruby DSL to scrape web pages
|