web_crawler 0.3.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/README +22 -1
- data/lib/web_crawler.rb +2 -0
- data/lib/web_crawler/application.rb +33 -2
- data/lib/web_crawler/base.rb +113 -0
- data/lib/web_crawler/batch_request.rb +10 -4
- data/lib/web_crawler/cached_request.rb +16 -7
- data/lib/web_crawler/configuration.rb +5 -5
- data/lib/web_crawler/factory_url.rb +27 -7
- data/lib/web_crawler/follower.rb +11 -9
- data/lib/web_crawler/parsers.rb +1 -0
- data/lib/web_crawler/parsers/mapper.rb +114 -0
- data/lib/web_crawler/parsers/url.rb +3 -5
- data/lib/web_crawler/request.rb +14 -2
- data/lib/web_crawler/response.rb +2 -2
- data/lib/web_crawler/version.rb +2 -2
- data/lib/web_crawler/view.rb +1 -1
- data/lib/web_crawler/view/csv.rb +1 -1
- data/lib/web_crawler/view/json.rb +1 -1
- data/lib/web_crawler/view/yaml.rb +1 -1
- data/spec/fixtures/example.xml +171 -0
- data/spec/fixtures/my_crawler.rb +82 -0
- data/spec/fixtures/test_crawler.rb +108 -0
- data/spec/fixtures/test_crawler2.rb +77 -0
- data/spec/spec_helper.rb +8 -3
- data/spec/web_crawler/batch_request_spec.rb +0 -11
- data/spec/web_crawler/cached_request_spec.rb +17 -11
- data/spec/web_crawler/factory_url_spec.rb +19 -6
- data/spec/web_crawler/follow_spec.rb +11 -4
- data/spec/web_crawler/view_spec.rb +10 -10
- data/spec/web_crawler/web_crawler_api_base_class_spec.rb +143 -0
- data/web_crawler.gemspec +2 -0
- metadata +43 -8
data/web_crawler.gemspec
CHANGED
@@ -25,6 +25,8 @@ Gem::Specification.new do |s|
|
|
25
25
|
|
26
26
|
s.add_dependency 'thor', '>=0.14.6'
|
27
27
|
s.add_dependency 'mime-types', '>=1.16'
|
28
|
+
s.add_dependency 'parallel', '>=0.5.5'
|
29
|
+
s.add_dependency 'activesupport'
|
28
30
|
|
29
31
|
s.add_development_dependency(%q<rspec>, [">=2.6"])
|
30
32
|
s.add_development_dependency(%q<fakeweb>)
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
7
|
+
- 5
|
8
|
+
- 0
|
9
|
+
version: 0.5.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Anton Sozontov
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-06-
|
17
|
+
date: 2011-06-14 00:00:00 +04:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -47,9 +47,37 @@ dependencies:
|
|
47
47
|
type: :runtime
|
48
48
|
version_requirements: *id002
|
49
49
|
- !ruby/object:Gem::Dependency
|
50
|
-
name:
|
50
|
+
name: parallel
|
51
51
|
prerelease: false
|
52
52
|
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
- 5
|
60
|
+
- 5
|
61
|
+
version: 0.5.5
|
62
|
+
type: :runtime
|
63
|
+
version_requirements: *id003
|
64
|
+
- !ruby/object:Gem::Dependency
|
65
|
+
name: activesupport
|
66
|
+
prerelease: false
|
67
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
type: :runtime
|
76
|
+
version_requirements: *id004
|
77
|
+
- !ruby/object:Gem::Dependency
|
78
|
+
name: rspec
|
79
|
+
prerelease: false
|
80
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
53
81
|
none: false
|
54
82
|
requirements:
|
55
83
|
- - ">="
|
@@ -59,11 +87,11 @@ dependencies:
|
|
59
87
|
- 6
|
60
88
|
version: "2.6"
|
61
89
|
type: :development
|
62
|
-
version_requirements: *
|
90
|
+
version_requirements: *id005
|
63
91
|
- !ruby/object:Gem::Dependency
|
64
92
|
name: fakeweb
|
65
93
|
prerelease: false
|
66
|
-
requirement: &
|
94
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
67
95
|
none: false
|
68
96
|
requirements:
|
69
97
|
- - ">="
|
@@ -72,7 +100,7 @@ dependencies:
|
|
72
100
|
- 0
|
73
101
|
version: "0"
|
74
102
|
type: :development
|
75
|
-
version_requirements: *
|
103
|
+
version_requirements: *id006
|
76
104
|
description: Web crawler help you with parse and collect data from the web
|
77
105
|
email:
|
78
106
|
- a.sozontov@gmail.com
|
@@ -94,6 +122,7 @@ files:
|
|
94
122
|
- lib/ext/http_response.rb
|
95
123
|
- lib/web_crawler.rb
|
96
124
|
- lib/web_crawler/application.rb
|
125
|
+
- lib/web_crawler/base.rb
|
97
126
|
- lib/web_crawler/batch_request.rb
|
98
127
|
- lib/web_crawler/cache_adapter.rb
|
99
128
|
- lib/web_crawler/cache_adapter/base.rb
|
@@ -108,6 +137,7 @@ files:
|
|
108
137
|
- lib/web_crawler/follower.rb
|
109
138
|
- lib/web_crawler/handler.rb
|
110
139
|
- lib/web_crawler/parsers.rb
|
140
|
+
- lib/web_crawler/parsers/mapper.rb
|
111
141
|
- lib/web_crawler/parsers/url.rb
|
112
142
|
- lib/web_crawler/request.rb
|
113
143
|
- lib/web_crawler/response.rb
|
@@ -122,6 +152,10 @@ files:
|
|
122
152
|
- lib/web_crawler/view/xml.rb
|
123
153
|
- lib/web_crawler/view/yaml.rb
|
124
154
|
- spec/fake_web_generator.rb
|
155
|
+
- spec/fixtures/example.xml
|
156
|
+
- spec/fixtures/my_crawler.rb
|
157
|
+
- spec/fixtures/test_crawler.rb
|
158
|
+
- spec/fixtures/test_crawler2.rb
|
125
159
|
- spec/spec_helper.rb
|
126
160
|
- spec/web_crawler/batch_request_spec.rb
|
127
161
|
- spec/web_crawler/cached_request_spec.rb
|
@@ -131,6 +165,7 @@ files:
|
|
131
165
|
- spec/web_crawler/response_spec.rb
|
132
166
|
- spec/web_crawler/url_parser_spec.rb
|
133
167
|
- spec/web_crawler/view_spec.rb
|
168
|
+
- spec/web_crawler/web_crawler_api_base_class_spec.rb
|
134
169
|
- web_crawler.gemspec
|
135
170
|
has_rdoc: false
|
136
171
|
homepage: ""
|