upton 0.2.9 → 0.2.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/upton.rb +6 -5
- data/lib/upton/downloader.rb +2 -1
- metadata +15 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46903360868525008977cd6ed14f99aa48508f2e
|
4
|
+
data.tar.gz: 5003dd6031339e8aa19fb429fdd10e78cd8c2608
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1bcd61e00617300959f076b1b78ee9136096a405158b8504f77903120a1d6fb7164870f577590f699c727c5d3be62969646c7e1f046ede9d3e27bd1f5a594f5
|
7
|
+
data.tar.gz: 56cea905090eaededa2295eef89a9b7142870771a097745e404cb667fc95bffaebac782ce7f497087ace6256755c2c8dbab77e8513b79c0f8b9bda4eb900cdfe
|
data/lib/upton.rb
CHANGED
@@ -35,7 +35,7 @@ module Upton
|
|
35
35
|
EMPTY_STRING = ''
|
36
36
|
|
37
37
|
attr_accessor :verbose, :debug, :index_debug, :sleep_time_between_requests, :stash_folder, :url_array,
|
38
|
-
:paginated, :pagination_param, :pagination_max_pages
|
38
|
+
:paginated, :pagination_param, :pagination_max_pages, :readable_filenames
|
39
39
|
|
40
40
|
##
|
41
41
|
# This is the main user-facing method for a basic scraper.
|
@@ -212,11 +212,12 @@ module Upton
|
|
212
212
|
:cache => stash,
|
213
213
|
:verbose => @verbose
|
214
214
|
}
|
215
|
+
if @readable_filenames
|
216
|
+
global_options[:readable_filenames] = true
|
217
|
+
end
|
215
218
|
if @stash_folder
|
216
|
-
global_options
|
217
|
-
|
218
|
-
:readable_filenames => true
|
219
|
-
})
|
219
|
+
global_options[:readable_filenames] = true
|
220
|
+
global_options[:cache_location] = @stash_folder
|
220
221
|
end
|
221
222
|
resp_and_cache = Downloader.new(url, global_options.merge(options)).get
|
222
223
|
if resp_and_cache[:from_resource]
|
data/lib/upton/downloader.rb
CHANGED
@@ -20,6 +20,7 @@ module Upton
|
|
20
20
|
attr_reader :uri, :cache_location, :verbose
|
21
21
|
def initialize(uri, options = {})
|
22
22
|
@uri = uri
|
23
|
+
@options = options
|
23
24
|
@cache = options.fetch(:cache) { true }
|
24
25
|
@cache_location = File.absolute_path(options[:cache_location] || "#{Dir.tmpdir}/upton")
|
25
26
|
@verbose = options[:verbose] || false
|
@@ -78,7 +79,7 @@ module Upton
|
|
78
79
|
unless cached_file_exists?
|
79
80
|
if @verbose
|
80
81
|
if @readable_stash_filenames
|
81
|
-
puts "Writing #{uri} data to the cache at #{cached_file}"
|
82
|
+
puts "Writing #{uri} data to the cache at #{cached_file}"
|
82
83
|
else
|
83
84
|
puts "Writing #{uri} data to the cache"
|
84
85
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: upton
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy B. Merrill
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -137,8 +137,8 @@ dependencies:
|
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
139
|
description: Don't re-write web scrapers every time. Upton gives you a scraper template
|
140
|
-
that's easy to use for debugging and doesn't hammer servers by default
|
141
|
-
email:
|
140
|
+
that's easy to use for debugging and doesn't hammer servers by default.
|
141
|
+
email: jeremybmerrill@jeremybmerrill.com
|
142
142
|
executables: []
|
143
143
|
extensions: []
|
144
144
|
extra_rdoc_files: []
|
@@ -146,15 +146,15 @@ files:
|
|
146
146
|
- lib/upton.rb
|
147
147
|
- lib/upton/utils.rb
|
148
148
|
- lib/upton/downloader.rb
|
149
|
-
- spec/data/
|
150
|
-
- spec/data/
|
149
|
+
- spec/data/discussion.html
|
150
|
+
- spec/data/easttimor.html
|
151
151
|
- spec/data/propublica-relative.html
|
152
152
|
- spec/data/propublica.html
|
153
|
+
- spec/data/propublica_search.html
|
154
|
+
- spec/data/propublica_search_page_2.html
|
153
155
|
- spec/data/prosecutor.html
|
154
156
|
- spec/data/sixfacts.html
|
155
|
-
- spec/data/
|
156
|
-
- spec/data/easttimor.html
|
157
|
-
- spec/data/propublica_search.html
|
157
|
+
- spec/data/webinar.html
|
158
158
|
- spec/upton_spec.rb
|
159
159
|
- spec/spec_helper.rb
|
160
160
|
- spec/upton_downloader_spec.rb
|
@@ -178,20 +178,20 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
178
|
version: '0'
|
179
179
|
requirements: []
|
180
180
|
rubyforge_project:
|
181
|
-
rubygems_version: 2.0.
|
181
|
+
rubygems_version: 2.0.3
|
182
182
|
signing_key:
|
183
183
|
specification_version: 4
|
184
184
|
summary: A simple web-scraping framework
|
185
185
|
test_files:
|
186
|
-
- spec/data/
|
187
|
-
- spec/data/
|
186
|
+
- spec/data/discussion.html
|
187
|
+
- spec/data/easttimor.html
|
188
188
|
- spec/data/propublica-relative.html
|
189
189
|
- spec/data/propublica.html
|
190
|
+
- spec/data/propublica_search.html
|
191
|
+
- spec/data/propublica_search_page_2.html
|
190
192
|
- spec/data/prosecutor.html
|
191
193
|
- spec/data/sixfacts.html
|
192
|
-
- spec/data/
|
193
|
-
- spec/data/easttimor.html
|
194
|
-
- spec/data/propublica_search.html
|
194
|
+
- spec/data/webinar.html
|
195
195
|
- spec/upton_spec.rb
|
196
196
|
- spec/spec_helper.rb
|
197
197
|
- spec/upton_downloader_spec.rb
|