upton 0.2.9 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/upton.rb +6 -5
- data/lib/upton/downloader.rb +2 -1
- metadata +15 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46903360868525008977cd6ed14f99aa48508f2e
|
4
|
+
data.tar.gz: 5003dd6031339e8aa19fb429fdd10e78cd8c2608
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1bcd61e00617300959f076b1b78ee9136096a405158b8504f77903120a1d6fb7164870f577590f699c727c5d3be62969646c7e1f046ede9d3e27bd1f5a594f5
|
7
|
+
data.tar.gz: 56cea905090eaededa2295eef89a9b7142870771a097745e404cb667fc95bffaebac782ce7f497087ace6256755c2c8dbab77e8513b79c0f8b9bda4eb900cdfe
|
data/lib/upton.rb
CHANGED
@@ -35,7 +35,7 @@ module Upton
|
|
35
35
|
EMPTY_STRING = ''
|
36
36
|
|
37
37
|
attr_accessor :verbose, :debug, :index_debug, :sleep_time_between_requests, :stash_folder, :url_array,
|
38
|
-
:paginated, :pagination_param, :pagination_max_pages
|
38
|
+
:paginated, :pagination_param, :pagination_max_pages, :readable_filenames
|
39
39
|
|
40
40
|
##
|
41
41
|
# This is the main user-facing method for a basic scraper.
|
@@ -212,11 +212,12 @@ module Upton
|
|
212
212
|
:cache => stash,
|
213
213
|
:verbose => @verbose
|
214
214
|
}
|
215
|
+
if @readable_filenames
|
216
|
+
global_options[:readable_filenames] = true
|
217
|
+
end
|
215
218
|
if @stash_folder
|
216
|
-
global_options
|
217
|
-
|
218
|
-
:readable_filenames => true
|
219
|
-
})
|
219
|
+
global_options[:readable_filenames] = true
|
220
|
+
global_options[:cache_location] = @stash_folder
|
220
221
|
end
|
221
222
|
resp_and_cache = Downloader.new(url, global_options.merge(options)).get
|
222
223
|
if resp_and_cache[:from_resource]
|
data/lib/upton/downloader.rb
CHANGED
@@ -20,6 +20,7 @@ module Upton
|
|
20
20
|
attr_reader :uri, :cache_location, :verbose
|
21
21
|
def initialize(uri, options = {})
|
22
22
|
@uri = uri
|
23
|
+
@options = options
|
23
24
|
@cache = options.fetch(:cache) { true }
|
24
25
|
@cache_location = File.absolute_path(options[:cache_location] || "#{Dir.tmpdir}/upton")
|
25
26
|
@verbose = options[:verbose] || false
|
@@ -78,7 +79,7 @@ module Upton
|
|
78
79
|
unless cached_file_exists?
|
79
80
|
if @verbose
|
80
81
|
if @readable_stash_filenames
|
81
|
-
puts "Writing #{uri} data to the cache at #{cached_file}"
|
82
|
+
puts "Writing #{uri} data to the cache at #{cached_file}"
|
82
83
|
else
|
83
84
|
puts "Writing #{uri} data to the cache"
|
84
85
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: upton
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy B. Merrill
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -137,8 +137,8 @@ dependencies:
|
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
139
|
description: Don't re-write web scrapers every time. Upton gives you a scraper template
|
140
|
-
that's easy to use for debugging and doesn't hammer servers by default
|
141
|
-
email:
|
140
|
+
that's easy to use for debugging and doesn't hammer servers by default.
|
141
|
+
email: jeremybmerrill@jeremybmerrill.com
|
142
142
|
executables: []
|
143
143
|
extensions: []
|
144
144
|
extra_rdoc_files: []
|
@@ -146,15 +146,15 @@ files:
|
|
146
146
|
- lib/upton.rb
|
147
147
|
- lib/upton/utils.rb
|
148
148
|
- lib/upton/downloader.rb
|
149
|
-
- spec/data/
|
150
|
-
- spec/data/
|
149
|
+
- spec/data/discussion.html
|
150
|
+
- spec/data/easttimor.html
|
151
151
|
- spec/data/propublica-relative.html
|
152
152
|
- spec/data/propublica.html
|
153
|
+
- spec/data/propublica_search.html
|
154
|
+
- spec/data/propublica_search_page_2.html
|
153
155
|
- spec/data/prosecutor.html
|
154
156
|
- spec/data/sixfacts.html
|
155
|
-
- spec/data/
|
156
|
-
- spec/data/easttimor.html
|
157
|
-
- spec/data/propublica_search.html
|
157
|
+
- spec/data/webinar.html
|
158
158
|
- spec/upton_spec.rb
|
159
159
|
- spec/spec_helper.rb
|
160
160
|
- spec/upton_downloader_spec.rb
|
@@ -178,20 +178,20 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
178
|
version: '0'
|
179
179
|
requirements: []
|
180
180
|
rubyforge_project:
|
181
|
-
rubygems_version: 2.0.
|
181
|
+
rubygems_version: 2.0.3
|
182
182
|
signing_key:
|
183
183
|
specification_version: 4
|
184
184
|
summary: A simple web-scraping framework
|
185
185
|
test_files:
|
186
|
-
- spec/data/
|
187
|
-
- spec/data/
|
186
|
+
- spec/data/discussion.html
|
187
|
+
- spec/data/easttimor.html
|
188
188
|
- spec/data/propublica-relative.html
|
189
189
|
- spec/data/propublica.html
|
190
|
+
- spec/data/propublica_search.html
|
191
|
+
- spec/data/propublica_search_page_2.html
|
190
192
|
- spec/data/prosecutor.html
|
191
193
|
- spec/data/sixfacts.html
|
192
|
-
- spec/data/
|
193
|
-
- spec/data/easttimor.html
|
194
|
-
- spec/data/propublica_search.html
|
194
|
+
- spec/data/webinar.html
|
195
195
|
- spec/upton_spec.rb
|
196
196
|
- spec/spec_helper.rb
|
197
197
|
- spec/upton_downloader_spec.rb
|