scruber 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/core_ext/const_missing.rb +9 -0
- data/lib/scruber/cli/project_generator.rb +2 -0
- data/lib/scruber/cli/templates/application.tt +1 -0
- data/lib/scruber/cli.rb +2 -0
- data/lib/scruber/core/configuration.rb +24 -1
- data/lib/scruber/core/crawler.rb +165 -7
- data/lib/scruber/core/extensions/base.rb +10 -0
- data/lib/scruber/core/extensions/csv_output.rb +21 -0
- data/lib/scruber/core/extensions/log.rb +39 -0
- data/lib/scruber/core/extensions/loop.rb +34 -0
- data/lib/scruber/core/extensions/parser_aliases.rb +24 -0
- data/lib/scruber/core/extensions/queue_aliases.rb +15 -0
- data/lib/scruber/core/extensions/seed.rb +23 -0
- data/lib/scruber/fetcher_adapters/abstract_adapter.rb +14 -14
- data/lib/scruber/fetcher_adapters/typhoeus_fetcher.rb +1 -1
- data/lib/scruber/queue_adapters/abstract_adapter.rb +149 -10
- data/lib/scruber/queue_adapters/memory.rb +139 -9
- data/lib/scruber/version.rb +1 -1
- data/lib/scruber.rb +23 -8
- data/scruber.gemspec +6 -4
- metadata +71 -10
@@ -1,12 +1,44 @@
|
|
1
1
|
module Scruber
|
2
2
|
module QueueAdapters
|
3
|
+
#
|
4
|
+
# Abstract Queue Adapter
|
5
|
+
#
|
6
|
+
# @abstract
|
7
|
+
# @author Ivan Goncharov
|
8
|
+
#
|
3
9
|
class AbstractAdapter
|
4
|
-
|
10
|
+
#
|
11
|
+
# Queue page wrapper
|
12
|
+
#
|
13
|
+
# @author Ivan Goncharov
|
14
|
+
#
|
15
|
+
# @attr [Object] id ID of page. Will be autogenerated if not passed
|
16
|
+
# @attr [String] url URL of page
|
17
|
+
# @attr [String] method Request method, post, get, head
|
18
|
+
# @attr [String] user_agent Fixed User-Agent for requesting this page
|
19
|
+
# @attr [Hash] headers Headers for requesting this page
|
20
|
+
# @attr [Object] fetcher_agent_id ID of FetcherAgent, assigned to this page
|
21
|
+
# @attr [Object] proxy_id ID of proxy, assigned to this page
|
22
|
+
# @attr [String] response_body Response body
|
23
|
+
# @attr [Integer] response_code Response code
|
24
|
+
# @attr [Hash] response_headers Response headers
|
25
|
+
# @attr [Float] response_total_time Response total time
|
26
|
+
# @attr [Integer] retry_at Minimal timestamp of next retry
|
27
|
+
# @attr [Integer] fetched_at Download completion timestamp
|
28
|
+
# @attr [Integer] retry_count Number of download attempts
|
29
|
+
# @attr [Integer] max_retry_times Max number of download attempts
|
30
|
+
# @attr [Integer] enqueued_at Timestamp added to the queue
|
31
|
+
# @attr [String] page_type Page type
|
32
|
+
# @attr [Scruber::QueueAdapters::AbstractAdapter::Page] queue Queue object
|
33
|
+
# @attr [Integer] priority Priority of page in queue for fetcher
|
34
|
+
# @attr [Integer] processed_at Processed by parser timestamp
|
35
|
+
# @attr [Hash] options All options
|
5
36
|
class Page
|
6
|
-
attr_accessor :
|
37
|
+
attr_accessor :id,
|
38
|
+
:url,
|
7
39
|
:method,
|
8
40
|
:user_agent,
|
9
|
-
:
|
41
|
+
:body,
|
10
42
|
:headers,
|
11
43
|
:fetcher_agent_id,
|
12
44
|
:proxy_id,
|
@@ -25,14 +57,16 @@ module Scruber
|
|
25
57
|
:processed_at,
|
26
58
|
:options
|
27
59
|
|
28
|
-
def initialize(queue,
|
60
|
+
def initialize(queue, options={})
|
29
61
|
@queue = queue
|
30
|
-
@url = url
|
31
62
|
|
32
63
|
options = options.with_indifferent_access
|
64
|
+
@options = options
|
65
|
+
@id = options.fetch(:id) { generate_page_id }
|
66
|
+
@url = options.fetch(:url) { raise "URL not provided" }
|
33
67
|
@method = options.fetch(:method) { :get }
|
34
68
|
@user_agent = options.fetch(:user_agent) { nil }
|
35
|
-
@
|
69
|
+
@body = options.fetch(:body) { nil }
|
36
70
|
@headers = options.fetch(:headers) { {} }
|
37
71
|
@fetcher_agent_id = options.fetch(:fetcher_agent_id) { nil }
|
38
72
|
@proxy_id = options.fetch(:proxy_id) { nil }
|
@@ -49,13 +83,16 @@ module Scruber
|
|
49
83
|
# @queue = options.fetch(:queue) { 'default' }
|
50
84
|
@priority = options.fetch(:priority) { 0 }
|
51
85
|
@processed_at = options.fetch(:processed_at) { 0 }
|
52
|
-
@options = options
|
53
86
|
|
54
87
|
@_fetcher_agent = false
|
55
88
|
@_proxy = false
|
56
89
|
@_redownload = false
|
57
90
|
end
|
58
91
|
|
92
|
+
#
|
93
|
+
# Returns assigned to this page FetcherAgent
|
94
|
+
#
|
95
|
+
# @return [Scruber::Helpers::FetcherAgent] Agent object
|
59
96
|
def fetcher_agent
|
60
97
|
if @_fetcher_agent == false
|
61
98
|
@_fetcher_agent = (@fetcher_agent_id ? Scruber::Helpers::FetcherAgent.find(@fetcher_agent_id) : nil)
|
@@ -64,6 +101,10 @@ module Scruber
|
|
64
101
|
end
|
65
102
|
end
|
66
103
|
|
104
|
+
#
|
105
|
+
# Returns assigned to this page proxy
|
106
|
+
#
|
107
|
+
# @return [Proxy] proxy object
|
67
108
|
def proxy
|
68
109
|
if @_proxy == false
|
69
110
|
@_proxy = (@proxy_id ? Scruber::Helpers::ProxyRotator.find(@proxy_id) : nil)
|
@@ -72,6 +113,10 @@ module Scruber
|
|
72
113
|
end
|
73
114
|
end
|
74
115
|
|
116
|
+
#
|
117
|
+
# Returns cookies from response headers
|
118
|
+
#
|
119
|
+
# @return [Array] array of cookies
|
75
120
|
def response_cookies
|
76
121
|
cookies = self.response_headers['Set-Cookie']
|
77
122
|
if cookies.blank?
|
@@ -93,50 +138,144 @@ module Scruber
|
|
93
138
|
instance_variable_get("@#{k.to_s}")
|
94
139
|
end
|
95
140
|
|
141
|
+
#
|
142
|
+
# Delete page from queue
|
143
|
+
#
|
144
|
+
# @return [void]
|
96
145
|
def delete
|
97
146
|
raise NotImplementedError
|
98
147
|
end
|
99
148
|
|
149
|
+
#
|
150
|
+
# Mark page as processed by parser and save it
|
151
|
+
#
|
152
|
+
# @return [void]
|
100
153
|
def processed!
|
101
154
|
@processed_at = Time.now.to_i
|
102
155
|
@_redownload = false
|
103
156
|
save
|
104
157
|
end
|
105
158
|
|
106
|
-
|
159
|
+
#
|
160
|
+
# Mark page as pending and return to queue
|
161
|
+
#
|
162
|
+
# @param new_retry_count [Integer] new count of reties. Allows to reset retries count
|
163
|
+
#
|
164
|
+
# @return [void]
|
165
|
+
def redownload!(new_retry_count=nil)
|
107
166
|
@_redownload = true
|
108
167
|
|
109
|
-
@processed_at =
|
110
|
-
|
168
|
+
@processed_at = 0
|
169
|
+
if new_retry_count
|
170
|
+
@retry_count = new_retry_count
|
171
|
+
else
|
172
|
+
@retry_count += 1
|
173
|
+
end
|
111
174
|
@fetched_at = 0
|
112
175
|
@response_body = nil
|
113
176
|
save
|
114
177
|
end
|
115
178
|
|
179
|
+
#
|
180
|
+
# Marked as page for redownloading
|
181
|
+
#
|
182
|
+
# @return [Boolean] true if need to redownload
|
116
183
|
def sent_to_redownload?
|
117
184
|
@_redownload
|
118
185
|
end
|
186
|
+
|
187
|
+
private
|
188
|
+
|
189
|
+
def generate_page_id
|
190
|
+
Digest::MD5.hexdigest @options.slice(:method, :url, :headers, :body).to_json
|
191
|
+
end
|
119
192
|
end
|
120
193
|
|
121
194
|
def initialize(options={})
|
122
195
|
@options = options
|
123
196
|
end
|
124
197
|
|
198
|
+
#
|
199
|
+
# Add page to queue
|
200
|
+
# @param url [String] URL of page
|
201
|
+
# @param options [Hash] Other options, see {Scruber::QueueAdapters::AbstractAdapter::Page}
|
202
|
+
#
|
203
|
+
# @return [void]
|
125
204
|
def add(url, options={})
|
126
205
|
raise NotImplementedError
|
127
206
|
end
|
128
207
|
|
208
|
+
#
|
209
|
+
# Search page by id
|
210
|
+
# @param id [Object] id of page
|
211
|
+
#
|
212
|
+
# @return [Page] page object
|
213
|
+
def find(id)
|
214
|
+
raise NotImplementedError
|
215
|
+
end
|
216
|
+
|
217
|
+
#
|
218
|
+
# Size of queue
|
219
|
+
#
|
220
|
+
# @return [Integer] count of pages in queue
|
221
|
+
def size
|
222
|
+
raise NotImplementedError
|
223
|
+
end
|
224
|
+
|
225
|
+
#
|
226
|
+
# Fetch pending page for fetching
|
227
|
+
# @param count [Integer] count of pages to fetch
|
228
|
+
#
|
229
|
+
# @return [Scruber::QueueAdapters::AbstractAdapter::Page|Array<Scruber::QueueAdapters::AbstractAdapter::Page>] page of count = nil, or array of pages of count > 0
|
129
230
|
def fetch_pending(count=nil)
|
130
231
|
raise NotImplementedError
|
131
232
|
end
|
132
233
|
|
234
|
+
#
|
235
|
+
# Fetch downloaded and not processed pages for feching
|
236
|
+
# @param count [Integer] count of pages to fetch
|
237
|
+
#
|
238
|
+
# @return [Scruber::QueueAdapters::AbstractAdapter::Page|Array<Scruber::QueueAdapters::AbstractAdapter::Page>] page of count = nil, or array of pages of count > 0
|
133
239
|
def fetch_downloaded(count=nil)
|
134
240
|
raise NotImplementedError
|
135
241
|
end
|
136
242
|
|
243
|
+
#
|
244
|
+
# Fetch error page
|
245
|
+
# @param count [Integer] count of pages to fetch
|
246
|
+
#
|
247
|
+
# @return [Scruber::QueueAdapters::AbstractAdapter::Page|Array<Scruber::QueueAdapters::AbstractAdapter::Page>] page of count = nil, or array of pages of count > 0
|
248
|
+
def fetch_error(count=nil)
|
249
|
+
raise NotImplementedError
|
250
|
+
end
|
251
|
+
|
252
|
+
#
|
253
|
+
# Count of downloaded pages
|
254
|
+
# Using to show downloading progress.
|
255
|
+
#
|
256
|
+
# @return [Integer] count of downloaded pages
|
257
|
+
def downloaded_count
|
258
|
+
raise NotImplementedError
|
259
|
+
end
|
260
|
+
|
261
|
+
#
|
262
|
+
# Check if queue was initialized.
|
263
|
+
# Using for `seed` method. If queue was initialized,
|
264
|
+
# then no need to run seed block.
|
265
|
+
#
|
266
|
+
# @return [Boolean] true if queue already was initialized
|
137
267
|
def initialized?
|
138
268
|
raise NotImplementedError
|
139
269
|
end
|
270
|
+
|
271
|
+
#
|
272
|
+
# Used by Core. It checks for pages that are
|
273
|
+
# not downloaded or not parsed yet.
|
274
|
+
#
|
275
|
+
# @return [Boolean] true if queue still has work for scraper
|
276
|
+
def has_work?
|
277
|
+
raise NotImplementedError
|
278
|
+
end
|
140
279
|
end
|
141
280
|
end
|
142
281
|
end
|
@@ -1,46 +1,118 @@
|
|
1
1
|
module Scruber
|
2
2
|
module QueueAdapters
|
3
|
+
#
|
4
|
+
# Memory Queue Adapter
|
5
|
+
#
|
6
|
+
# Simple queue adapted which stores pages in memory.
|
7
|
+
# Nice solution for small scrapes.
|
8
|
+
# Easy to use. No need to setup any database, but
|
9
|
+
# no ability to reparse pages if something went wrong.
|
10
|
+
#
|
11
|
+
# @author Ivan Goncharov
|
12
|
+
#
|
3
13
|
class Memory < AbstractAdapter
|
4
14
|
attr_reader :error_pages
|
5
15
|
|
16
|
+
#
|
17
|
+
# Queue item class
|
18
|
+
#
|
19
|
+
# @author Ivan Goncharov
|
20
|
+
#
|
21
|
+
# @attr (see Scruber::QueueAdapters::AbstractAdapter::Page)
|
22
|
+
#
|
6
23
|
class Page < Scruber::QueueAdapters::AbstractAdapter::Page
|
24
|
+
|
25
|
+
#
|
26
|
+
# Save page
|
27
|
+
#
|
28
|
+
# Depends on page attributes it push page
|
29
|
+
# to pending, downloaded or error queue.
|
30
|
+
#
|
31
|
+
# @return [void]
|
7
32
|
def save
|
8
33
|
if self.processed_at.to_i > 0
|
9
|
-
|
34
|
+
@queue.add_processed_page self
|
10
35
|
elsif self.fetched_at > 0
|
11
36
|
@queue.add_downloaded self
|
12
37
|
elsif self.max_retry_times && self.retry_count >= self.max_retry_times.to_i
|
13
38
|
@queue.add_error_page self
|
14
39
|
else
|
15
|
-
@queue.
|
40
|
+
@queue.add self
|
16
41
|
end
|
17
42
|
end
|
18
43
|
|
44
|
+
#
|
45
|
+
# Delete page from all queues
|
46
|
+
#
|
47
|
+
# @return [void]
|
19
48
|
def delete
|
20
49
|
@queue.delete self
|
21
50
|
end
|
22
51
|
end
|
23
52
|
|
53
|
+
#
|
54
|
+
# Queue initializer
|
55
|
+
# @param options [Hash] See {Scruber::QueueAdapters::AbstractAdapter#initializer}
|
56
|
+
#
|
57
|
+
# @return [Scruber::QueueAdapters::Memory] class instance
|
24
58
|
def initialize(options={})
|
25
59
|
super(options)
|
60
|
+
@processed_ids = []
|
26
61
|
@queue = []
|
27
62
|
@downloaded_pages = []
|
28
63
|
@error_pages = []
|
29
64
|
end
|
30
65
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
66
|
+
#
|
67
|
+
# Add page to queue
|
68
|
+
# @param url_or_page [String|Page] URL of page or Page object
|
69
|
+
# @param options [Hash] Other options, see {Scruber::QueueAdapters::AbstractAdapter::Page}
|
70
|
+
#
|
71
|
+
# @return [void]
|
72
|
+
def add(url_or_page, options={})
|
73
|
+
unless url_or_page.is_a?(Page)
|
74
|
+
url_or_page = Page.new(self, options.merge(url: url_or_page))
|
75
|
+
end
|
76
|
+
@queue.push(url_or_page) unless @processed_ids.include?(url_or_page.id) || find(url_or_page.id)
|
77
|
+
end
|
78
|
+
alias_method :push, :add
|
79
|
+
|
80
|
+
#
|
81
|
+
# Search page by id
|
82
|
+
# @param id [Object] id of page
|
83
|
+
#
|
84
|
+
# @return [Page] page
|
85
|
+
def find(id)
|
86
|
+
[@queue, @downloaded_pages, @error_pages].each do |q|
|
87
|
+
q.each do |i|
|
88
|
+
return i if i.id == id
|
89
|
+
end
|
36
90
|
end
|
91
|
+
nil
|
37
92
|
end
|
38
|
-
alias_method :add, :push
|
39
93
|
|
40
|
-
|
94
|
+
#
|
95
|
+
# Size of queue
|
96
|
+
#
|
97
|
+
# @return [Integer] count of pages in queue
|
98
|
+
def size
|
41
99
|
@queue.count
|
42
100
|
end
|
43
101
|
|
102
|
+
#
|
103
|
+
# Count of downloaded pages
|
104
|
+
# Using to show downloading progress.
|
105
|
+
#
|
106
|
+
# @return [Integer] count of downloaded pages
|
107
|
+
def downloaded_count
|
108
|
+
@downloaded_pages.count
|
109
|
+
end
|
110
|
+
|
111
|
+
#
|
112
|
+
# Fetch downloaded and not processed pages for feching
|
113
|
+
# @param count [Integer] count of pages to fetch
|
114
|
+
#
|
115
|
+
# @return [Scruber::QueueAdapters::AbstractAdapter::Page|Array<Scruber::QueueAdapters::AbstractAdapter::Page>] page of count = nil, or array of pages of count > 0
|
44
116
|
def fetch_downloaded(count=nil)
|
45
117
|
if count.nil?
|
46
118
|
@downloaded_pages.shift
|
@@ -49,6 +121,24 @@ module Scruber
|
|
49
121
|
end
|
50
122
|
end
|
51
123
|
|
124
|
+
#
|
125
|
+
# Fetch error page
|
126
|
+
# @param count [Integer] count of pages to fetch
|
127
|
+
#
|
128
|
+
# @return [Scruber::QueueAdapters::AbstractAdapter::Page|Array<Scruber::QueueAdapters::AbstractAdapter::Page>] page of count = nil, or array of pages of count > 0
|
129
|
+
def fetch_error(count=nil)
|
130
|
+
if count.nil?
|
131
|
+
@error_pages.shift
|
132
|
+
else
|
133
|
+
@error_pages.shift(count)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
#
|
138
|
+
# Fetch pending page for fetching
|
139
|
+
# @param count [Integer] count of pages to fetch
|
140
|
+
#
|
141
|
+
# @return [Scruber::QueueAdapters::AbstractAdapter::Page|Array<Scruber::QueueAdapters::AbstractAdapter::Page>] page of count = nil, or array of pages of count > 0
|
52
142
|
def fetch_pending(count=nil)
|
53
143
|
if count.nil?
|
54
144
|
@queue.shift
|
@@ -57,24 +147,64 @@ module Scruber
|
|
57
147
|
end
|
58
148
|
end
|
59
149
|
|
150
|
+
#
|
151
|
+
# Internal method to add page to downloaded queue
|
152
|
+
#
|
153
|
+
# @param page [Scruber::QueueAdapters::Memory::Page] page
|
154
|
+
#
|
155
|
+
# @return [void]
|
60
156
|
def add_downloaded(page)
|
61
157
|
@downloaded_pages.push page
|
62
158
|
end
|
63
159
|
|
160
|
+
#
|
161
|
+
# Internal method to add page to error queue
|
162
|
+
#
|
163
|
+
# @param page [Scruber::QueueAdapters::Memory::Page] page
|
164
|
+
#
|
165
|
+
# @return [void]
|
64
166
|
def add_error_page(page)
|
65
167
|
@error_pages.push page
|
66
168
|
end
|
67
169
|
|
170
|
+
#
|
171
|
+
# Saving processed page id to prevent
|
172
|
+
# adding identical pages to queue
|
173
|
+
#
|
174
|
+
# @param page [Page] page
|
175
|
+
#
|
176
|
+
# @return [void]
|
177
|
+
def add_processed_page(page)
|
178
|
+
@processed_ids.push page.id
|
179
|
+
end
|
180
|
+
|
181
|
+
#
|
182
|
+
# Used by Core. It checks for pages that are
|
183
|
+
# not downloaded or not parsed yet.
|
184
|
+
#
|
185
|
+
# @return [Boolean] true if queue still has work for scraper
|
68
186
|
def has_work?
|
69
187
|
@queue.count > 0 || @downloaded_pages.count > 0
|
70
188
|
end
|
71
189
|
|
190
|
+
#
|
191
|
+
# Delete page from all internal queues
|
192
|
+
#
|
193
|
+
# @param page [Scruber::QueueAdapters::Memory::Page] page
|
194
|
+
#
|
195
|
+
# @return [void]
|
72
196
|
def delete(page)
|
73
197
|
@queue -= [page]
|
74
198
|
@downloaded_pages -= [page]
|
75
199
|
@error_pages -= [page]
|
76
200
|
end
|
77
201
|
|
202
|
+
#
|
203
|
+
# Check if queue was initialized.
|
204
|
+
# Using for `seed` method. If queue was initialized,
|
205
|
+
# then no need to run seed block.
|
206
|
+
#
|
207
|
+
# @return [Boolean] true if queue already was initialized
|
78
208
|
def initialized?
|
79
209
|
@queue.present? || @downloaded_pages.present? || @error_pages.present?
|
80
210
|
end
|
data/lib/scruber/version.rb
CHANGED
data/lib/scruber.rb
CHANGED
@@ -3,7 +3,11 @@ require 'nokogiri'
|
|
3
3
|
require 'http-cookie'
|
4
4
|
require 'pickup'
|
5
5
|
require 'csv'
|
6
|
+
require 'paint'
|
7
|
+
require 'powerbar'
|
8
|
+
require 'core_ext/const_missing'
|
6
9
|
require 'active_support'
|
10
|
+
require 'active_support/dependencies'
|
7
11
|
require 'active_support/core_ext/object'
|
8
12
|
require 'active_support/core_ext/hash'
|
9
13
|
|
@@ -22,6 +26,8 @@ require "scruber/core/page_format/html"
|
|
22
26
|
|
23
27
|
require "scruber/core/extensions/base"
|
24
28
|
require "scruber/core/extensions/loop"
|
29
|
+
require "scruber/core/extensions/log"
|
30
|
+
require "scruber/core/extensions/seed"
|
25
31
|
require "scruber/core/extensions/csv_output"
|
26
32
|
require "scruber/core/extensions/queue_aliases"
|
27
33
|
require "scruber/core/extensions/parser_aliases"
|
@@ -30,16 +36,13 @@ require "scruber/helpers/dictionary_reader"
|
|
30
36
|
require "scruber/helpers/dictionary_reader/xml"
|
31
37
|
require "scruber/helpers/dictionary_reader/csv"
|
32
38
|
|
33
|
-
# require "scruber/core/configuration"
|
34
|
-
# require "scruber/core/configuration"
|
35
|
-
|
36
39
|
module Scruber
|
37
40
|
class ArgumentError < ::ArgumentError; end
|
41
|
+
|
38
42
|
module Core
|
39
|
-
autoload :Configuration,
|
40
|
-
autoload :Crawler,
|
43
|
+
autoload :Configuration, "scruber/core/configuration"
|
44
|
+
autoload :Crawler, "scruber/core/crawler"
|
41
45
|
end
|
42
|
-
|
43
46
|
module Helpers
|
44
47
|
autoload :UserAgentRotator, "scruber/helpers/user_agent_rotator"
|
45
48
|
autoload :ProxyRotator, "scruber/helpers/proxy_rotator"
|
@@ -51,11 +54,11 @@ module Scruber
|
|
51
54
|
end
|
52
55
|
|
53
56
|
class << self
|
54
|
-
attr_writer :configuration
|
57
|
+
attr_writer :configuration, :logger
|
55
58
|
|
56
59
|
def run(*args, &block)
|
57
60
|
raise "You need a block to build!" unless block_given?
|
58
|
-
|
61
|
+
|
59
62
|
Core::Crawler.new(*args).run(&block)
|
60
63
|
end
|
61
64
|
|
@@ -63,8 +66,20 @@ module Scruber
|
|
63
66
|
@configuration ||= Core::Configuration.new
|
64
67
|
end
|
65
68
|
|
69
|
+
def logger
|
70
|
+
@logger ||= Scruber.root.nil? ? nil : Logger.new(Scruber.root.join('log', 'crawler.log'))
|
71
|
+
end
|
72
|
+
|
66
73
|
def configure(&block)
|
67
74
|
yield configuration
|
68
75
|
end
|
76
|
+
|
77
|
+
def root
|
78
|
+
if defined?(APP_PATH)
|
79
|
+
Pathname.new(File.expand_path('../../', APP_PATH))
|
80
|
+
else
|
81
|
+
nil
|
82
|
+
end
|
83
|
+
end
|
69
84
|
end
|
70
85
|
end
|
data/scruber.gemspec
CHANGED
@@ -30,11 +30,13 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ["lib"]
|
32
32
|
|
33
|
-
spec.add_dependency "typhoeus",
|
34
|
-
spec.add_dependency "pickup", "0.0.11"
|
35
|
-
spec.add_dependency "nokogiri",
|
33
|
+
spec.add_dependency "typhoeus", '~> 1.1', '>= 1.1.2'
|
34
|
+
spec.add_dependency "pickup", "~> 0.0.11"
|
35
|
+
spec.add_dependency "nokogiri", '~> 1.8', '>= 1.8.2'
|
36
36
|
spec.add_dependency "http-cookie", "1.0.3"
|
37
|
-
spec.add_dependency "activesupport",
|
37
|
+
spec.add_dependency "activesupport", '~> 5.1', '>= 5.1.5'
|
38
|
+
spec.add_dependency "powerbar", '~> 2.0', '>= 2.0.1'
|
39
|
+
spec.add_dependency "paint", '~> 2.0', '>= 2.0.1'
|
38
40
|
spec.add_runtime_dependency "thor", "0.20.0"
|
39
41
|
spec.add_development_dependency "bundler", "~> 1.15"
|
40
42
|
spec.add_development_dependency "rake", "~> 10.0"
|