upton 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 892592f6c890ecd94fb1bdf3b8cc500e813ebfa3
4
+ data.tar.gz: 95d10ea4c37aaec611c76dc98c45dd449a1ac35d
5
+ SHA512:
6
+ metadata.gz: f112a48ed90264ac5e111e48b45e6b67468793059f613385faa87bd6ab5122a7f11c532358daf7382c689e42b66db022bfce24ffd0b32ffe15619de0a026df77
7
+ data.tar.gz: 8fcbd1276ea6e284481d0395de5a1f73c07a2acb159edab6775c875d3dbf76e2b3c16c6b46b2aa48aec345fbd4950a5af179a6e560a1af5923e5087a8c6a648b
@@ -2,6 +2,7 @@ require "fileutils"
2
2
  require "open-uri"
3
3
  require "tmpdir"
4
4
  require "restclient"
5
+ require_relative "./version"
5
6
 
6
7
  module Upton
7
8
 
@@ -88,11 +89,30 @@ module Upton
88
89
  puts "Writing #{uri} data to the cache"
89
90
  end
90
91
  end
91
- open(cached_file, 'w'){|f| f << resp}
92
+ commented_resp = add_comment(resp)
93
+ open(cached_file, 'w'){|f| f << commented_resp}
92
94
  end
93
95
  {:resp => resp, :from_resource => from_resource }
94
96
  end
95
97
 
98
+ def add_comment(resp)
99
+ # n = Nokogiri::HTML("<html></html>")
100
+ # c = Nokogiri::XML::Comment.new(n, "asdfasdf")
101
+ # n.root.add_child(c)
102
+ # <!----Retrieved by Upton from http://www.somesite.com on January 15 at 4:28 p.m.-->
103
+ msg = "Stashed file retrieved by Upton #{Upton::VERSION} from #{@uri} at #{Time.now}"
104
+ resp_html = Nokogiri::HTML(resp)
105
+ comment = Nokogiri::XML::Comment.new(resp_html, msg)
106
+ if resp_html.root.nil?
107
+ return resp
108
+ elsif resp_html.root.children.empty?
109
+ resp_html.root.add_child(comment)
110
+ else
111
+ resp_html.root.children.before(comment)
112
+ end
113
+ resp_html.to_html
114
+ end
115
+
96
116
  def cache_enabled?
97
117
  !!@cache
98
118
  end
@@ -0,0 +1,330 @@
1
+ require 'uri'
2
+ require 'nokogiri'
3
+ require_relative './downloader'
4
+
5
+ module Upton
6
+ # Upton::Scraper can be used as-is for basic use-cases by:
7
+ # 1. specifying the pages to be scraped in `new` as an index page
8
+ # or as an Array of URLs.
9
+ # 2. supplying a block to `scrape` or `scrape_to_csv` or using a pre-build
10
+ # block from Upton::Utils.
11
+ # For more complicated cases; subclass Upton::Scraper
12
+ # e.g. +MyScraper < Upton::Scraper+ and override various methods.
13
+ ##
14
+ class Scraper
15
+ EMPTY_STRING = ''
16
+
17
+ attr_accessor :verbose, :debug, :index_debug, :sleep_time_between_requests, :stash_folder, :url_array,
18
+ :paginated, :pagination_param, :pagination_max_pages, :pagination_start_index, :readable_filenames,
19
+ :pagination_interval
20
+
21
+ ##
22
+ # This is the main user-facing method for a basic scraper.
23
+ # Call +scrape+ with a block; this block will be called on
24
+ # the text of each instance page, (and optionally, its URL and its index
25
+ # in the list of instance URLs returned by +get_index+).
26
+ ##
27
+ def scrape(&blk)
28
+ self.url_array = self.get_index unless self.url_array
29
+ self.scrape_from_list(self.url_array, blk)
30
+ end
31
+
32
+ ##
33
+ # +index_url_or_array+: A list of string URLs, OR
34
+ # the URL of the page containing the list of instances.
35
+ # +selector+: The XPath expression or CSS selector that specifies the
36
+ # anchor elements within the page, if a url is specified for
37
+ # the previous argument.
38
+ #
39
+ # These options are a shortcut. If you plan to override +get_index+, you
40
+ # do not need to set them.
41
+ # If you don't specify a selector, the first argument will be treated as a
42
+ # list of URLs.
43
+ ##
44
+ def initialize(index_url_or_array, selector="")
45
+
46
+ #if first arg is a valid URL, do already-written stuff;
47
+ #if it's not (or if it's a list?) don't bother with get_index, etc.
48
+ #e.g. Scraper.new(["http://jeremybmerrill.com"])
49
+
50
+ #TODO: rewrite this, because it's a little silly. (i.e. should be a more sensical division of how these arguments work)
51
+ if index_url_or_array.respond_to? :each_with_index
52
+ @url_array = index_url_or_array
53
+ else
54
+ @index_url = index_url_or_array
55
+ @index_selector = selector
56
+ end
57
+
58
+ # If true, then Upton prints information about when it gets
59
+ # files from the internet and when it gets them from its stash.
60
+ @verbose = false
61
+
62
+ # If true, then Upton fetches each instance page only once
63
+ # future requests for that file are responded to with the locally stashed
64
+ # version.
65
+ # You may want to set @debug to false for production (but maybe not).
66
+ # You can also control stashing behavior on a per-call basis with the
67
+ # optional second argument to get_page, if, for instance, you want to
68
+ # stash certain instance pages, e.g. based on their modification date.
69
+ @debug = true
70
+ # Index debug does the same, but for index pages.
71
+ @index_debug = false
72
+
73
+ # In order to not hammer servers, Upton waits for, by default, 30
74
+ # seconds between requests to the remote server.
75
+ @sleep_time_between_requests = 30 #seconds
76
+
77
+ # If true, then Upton will attempt to scrape paginated index pages
78
+ @paginated = false
79
+ # Default query string parameter used to specify the current page
80
+ @pagination_param = 'page'
81
+ # Default number of paginated pages to scrape
82
+ @pagination_max_pages = 2
83
+ # Default starting number for pagination (second page is this plus 1).
84
+ @pagination_start_index = 1
85
+ # Default value to increment page number by
86
+ @pagination_interval = 1
87
+
88
+ # Folder name for stashes, if you want them to be stored somewhere else,
89
+ # e.g. under /tmp.
90
+ if @stash_folder
91
+ FileUtils.mkdir_p(@stash_folder) unless Dir.exists?(@stash_folder)
92
+ end
93
+ end
94
+
95
+ ##
96
+ # If instance pages are paginated, <b>you must override</b>
97
+ # this method to return the next URL, given the current URL and its index.
98
+ #
99
+ # If instance pages aren't paginated, there's no need to override this.
100
+ #
101
+ # Recursion stops if the fetching URL returns an empty string or an error.
102
+ #
103
+ # e.g. next_instance_page_url("http://whatever.com/article/upton-sinclairs-the-jungle?page=1", 2)
104
+ # ought to return "http://whatever.com/article/upton-sinclairs-the-jungle?page=2"
105
+ ##
106
+ def next_instance_page_url(url, pagination_index)
107
+ EMPTY_STRING
108
+ end
109
+
110
+ ##
111
+ # Return the next URL to scrape, given the current URL and its index.
112
+ #
113
+ # Recursion stops if the fetching URL returns an empty string or an error.
114
+ #
115
+ # If @paginated is not set (the default), this method returns an empty string.
116
+ #
117
+ # If @paginated is set, this method will return the next pagination URL
118
+ # to scrape using @pagination_param and the pagination_index.
119
+ #
120
+ # If the pagination_index is greater than @pagination_max_pages, then the
121
+ # method will return an empty string.
122
+ #
123
+ # Override this method to handle pagination is an alternative way
124
+ # e.g. next_index_page_url("http://whatever.com/articles?page=1", 2)
125
+ # ought to return "http://whatever.com/articles?page=2"
126
+ #
127
+ ##
128
+ def next_index_page_url(url, pagination_index)
129
+ return EMPTY_STRING unless @paginated
130
+
131
+ if pagination_index > @pagination_max_pages
132
+ puts "Exceeded pagination limit of #{@pagination_max_pages}" if @verbose
133
+ EMPTY_STRING
134
+ else
135
+ uri = URI.parse(url)
136
+ query = uri.query ? Hash[URI.decode_www_form(uri.query)] : {}
137
+ # update the pagination query string parameter
138
+ query[@pagination_param] = pagination_index
139
+ uri.query = URI.encode_www_form(query)
140
+ puts "Next index pagination url is #{uri}" if @verbose
141
+ uri.to_s
142
+ end
143
+ end
144
+
145
+ ##
146
+ # Writes the scraped result to a CSV at the given filename.
147
+ ##
148
+ def scrape_to_csv filename, &blk
149
+ require 'csv'
150
+ self.url_array = self.get_index unless self.url_array
151
+ CSV.open filename, 'wb' do |csv|
152
+ #this is a conscious choice: each document is a list of things, either single elements or rows (as lists).
153
+ self.scrape_from_list(self.url_array, blk).compact.each do |document|
154
+ if document[0].respond_to? :map
155
+ document.each{|row| csv << row }
156
+ else
157
+ csv << document
158
+ end
159
+ end
160
+ #self.scrape_from_list(self.url_array, blk).compact.each{|document| csv << document }
161
+ end
162
+ end
163
+
164
+ def scrape_to_tsv filename, &blk
165
+ require 'csv'
166
+ self.url_array = self.get_index unless self.url_array
167
+ CSV.open filename, 'wb', :col_sep => "\t" do |csv|
168
+ #this is a conscious choice: each document is a list of things, either single elements or rows (as lists).
169
+ self.scrape_from_list(self.url_array, blk).compact.each do |document|
170
+ if document[0].respond_to? :map
171
+ document.each{|row| csv << row }
172
+ else
173
+ csv << document
174
+ end
175
+ end
176
+ #self.scrape_from_list(self.url_array, blk).compact.each{|document| csv << document }
177
+ end
178
+ end
179
+
180
+ protected
181
+
182
+ ##
183
+ # Handles getting pages with Downlader, which handles stashing.
184
+ ##
185
+ def get_page(url, stash=false, options={})
186
+ return EMPTY_STRING if url.nil? || url.empty? #url is nil if the <a> lacks an `href` attribute.
187
+ global_options = {
188
+ :cache => stash,
189
+ :verbose => @verbose
190
+ }
191
+ if @readable_filenames
192
+ global_options[:readable_filenames] = true
193
+ end
194
+ if @stash_folder
195
+ global_options[:readable_filenames] = true
196
+ global_options[:cache_location] = @stash_folder
197
+ end
198
+ resp_and_cache = Downloader.new(url, global_options.merge(options)).get
199
+ if resp_and_cache[:from_resource]
200
+ puts "sleeping #{@sleep_time_between_requests} secs" if @verbose
201
+ sleep @sleep_time_between_requests
202
+ end
203
+ resp_and_cache[:resp]
204
+ end
205
+
206
+
207
+ ##
208
+ # sometimes URLs are relative, e.g. "index.html" as opposed to "http://site.com/index.html"
209
+ # resolve_url resolves them to absolute urls.
210
+ # absolute_url_str must be a URL, as a string that represents an absolute URL or a URI
211
+ ##
212
+ def resolve_url(href_str, absolute_url_str)
213
+ if absolute_url_str.class <= URI::Generic
214
+ absolute_url = absolute_url_str.dup
215
+ else
216
+ begin
217
+ absolute_url = URI(absolute_url_str).dup
218
+ rescue URI::InvalidURIError
219
+ raise ArgumentError, "#{absolute_url_str} must be represent a valid relative or absolute URI"
220
+ end
221
+ end
222
+ raise ArgumentError, "#{absolute_url} must be absolute" unless absolute_url.absolute?
223
+ if href_str.class <= URI::Generic
224
+ href = href_str.dup
225
+ else
226
+ begin
227
+ href = URI(href_str).dup
228
+ rescue URI::InvalidURIError
229
+ raise ArgumentError, "#{href_str} must be represent a valid relative or absolute URI"
230
+ end
231
+ end
232
+
233
+ # return :href if :href is already absolute
234
+ return href.to_s if href.absolute?
235
+
236
+ #TODO: edge cases, see [issue #8](https://github.com/propublica/upton/issues/8)
237
+ URI.join(absolute_url.to_s, href.to_s).to_s
238
+ end
239
+
240
+ ##
241
+ # Return a list of URLs for the instances you want to scrape.
242
+ # This can optionally be overridden if, for example, the list of instances
243
+ # comes from an API.
244
+ ##
245
+ def get_index
246
+ index_pages = get_index_pages(@index_url, @pagination_start_index, @pagination_interval).map{|page| parse_index(page, @index_selector) }.flatten
247
+ end
248
+
249
+ # TODO: Not sure the best way to handle this
250
+ # Currently, #parse_index is called upon #get_index_pages,
251
+ # which itself is dependent on @index_url
252
+ # Does @index_url stay unaltered for the lifetime of the Upton instance?
253
+ # It seems to at this point, but that may be something that gets
254
+ # deprecated later
255
+ #
256
+ # So for now, @index_url is used in conjunction with resolve_url
257
+ # to make sure that this method returns absolute urls
258
+ # i.e. this method expects @index_url to always have an absolute address
259
+ # for the lifetime of an Upton instance
260
+ def parse_index(text, selector)
261
+ Nokogiri::HTML(text).search(selector).to_a.map do |a_element|
262
+ href = a_element["href"]
263
+ resolved_url = resolve_url( href, @index_url) unless href.nil?
264
+ puts "resolved #{href} to #{resolved_url}" if @verbose && resolved_url != href
265
+ resolved_url
266
+ end
267
+ end
268
+
269
+
270
+ ##
271
+ # Returns the concatenated output of each member of a paginated index,
272
+ # e.g. a site listing links with 2+ pages.
273
+ ##
274
+ def get_index_pages(url, pagination_index, pagination_interval, options={})
275
+ resps = [self.get_page(url, @index_debug, options)]
276
+ prev_url = url
277
+ while !resps.last.empty?
278
+ pagination_index += pagination_interval
279
+ next_url = self.next_index_page_url(url, pagination_index)
280
+ next_url = resolve_url(next_url, url)
281
+ break if next_url == prev_url || next_url.empty?
282
+
283
+ next_resp = self.get_page(next_url, @index_debug, options).to_s
284
+ prev_url = next_url
285
+ resps << next_resp
286
+ end
287
+ resps
288
+ end
289
+
290
+ ##
291
+ # Returns the instance at `url`.
292
+ #
293
+ # If the page is stashed, returns that, otherwise, fetches it from the web.
294
+ #
295
+ # If an instance is paginated, returns the concatenated output of each
296
+ # page, e.g. if a news article has two pages.
297
+ ##
298
+ def get_instance(url, pagination_index=0, options={})
299
+ resps = [self.get_page(url, @debug, options)]
300
+ pagination_index = pagination_index.to_i
301
+ prev_url = url
302
+ while !resps.last.empty?
303
+ next_url = self.next_instance_page_url(url, pagination_index + 1)
304
+ break if next_url == prev_url || next_url.empty?
305
+
306
+ next_resp = self.get_page(next_url, @debug, options)
307
+ prev_url = next_url
308
+ resps << next_resp
309
+ end
310
+ resps
311
+ end
312
+
313
+ # Just a helper for +scrape+.
314
+ def scrape_from_list(list, blk)
315
+ puts "Scraping #{list.size} instances" if @verbose
316
+ list.each_with_index.map do |instance_url, instance_index|
317
+ instance_resps = get_instance instance_url, nil, :instance_index => instance_index
318
+ instance_resps.each_with_index.map do |instance_resp, pagination_index|
319
+ blk.call(instance_resp, instance_url, instance_index, pagination_index)
320
+ end
321
+ end.flatten(1)
322
+ end
323
+
324
+ # it's often useful to have this slug method for uniquely (almost certainly) identifying pages.
325
+ def slug(url)
326
+ url.split("/")[-1].gsub(/\?.*/, "").gsub(/.html.*/, "")
327
+ end
328
+
329
+ end
330
+ end
data/lib/upton/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Upton # :nodoc:
2
- VERSION = '0.3.0'
2
+ VERSION = '0.3.1'
3
3
  end
data/lib/upton.rb CHANGED
@@ -35,7 +35,8 @@ module Upton
35
35
  EMPTY_STRING = ''
36
36
 
37
37
  attr_accessor :verbose, :debug, :index_debug, :sleep_time_between_requests, :stash_folder, :url_array,
38
- :paginated, :pagination_param, :pagination_max_pages, :pagination_start_index, :readable_filenames
38
+ :paginated, :pagination_param, :pagination_max_pages, :pagination_start_index, :readable_filenames,
39
+ :pagination_interval
39
40
 
40
41
  ##
41
42
  # This is the main user-facing method for a basic scraper.
@@ -101,6 +102,8 @@ module Upton
101
102
  @pagination_max_pages = 2
102
103
  # Default starting number for pagination (second page is this plus 1).
103
104
  @pagination_start_index = 1
105
+ # Default value to increment page number by
106
+ @pagination_interval = 1
104
107
 
105
108
  # Folder name for stashes, if you want them to be stored somewhere else,
106
109
  # e.g. under /tmp.
@@ -260,7 +263,7 @@ module Upton
260
263
  # comes from an API.
261
264
  ##
262
265
  def get_index
263
- index_pages = get_index_pages(@index_url, @pagination_start_index).map{|page| parse_index(page, @index_selector) }.flatten
266
+ index_pages = get_index_pages(@index_url, @pagination_start_index, @pagination_interval).map{|page| parse_index(page, @index_selector) }.flatten
264
267
  end
265
268
 
266
269
  # TODO: Not sure the best way to handle this
@@ -288,11 +291,11 @@ module Upton
288
291
  # Returns the concatenated output of each member of a paginated index,
289
292
  # e.g. a site listing links with 2+ pages.
290
293
  ##
291
- def get_index_pages(url, pagination_index, options={})
294
+ def get_index_pages(url, pagination_index, pagination_interval, options={})
292
295
  resps = [self.get_page(url, @index_debug, options)]
293
296
  prev_url = url
294
297
  while !resps.last.empty?
295
- pagination_index += 1
298
+ pagination_index += pagination_interval
296
299
  next_url = self.next_index_page_url(url, pagination_index)
297
300
  next_url = resolve_url(next_url, url)
298
301
  break if next_url == prev_url || next_url.empty?
metadata CHANGED
@@ -1,116 +1,102 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: upton
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
5
- prerelease:
4
+ version: 0.3.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - Jeremy B. Merrill
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-12-22 00:00:00.000000000 Z
11
+ date: 2014-02-16 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rack
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - '>='
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: rspec
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ! '>='
31
+ - - '>='
36
32
  - !ruby/object:Gem::Version
37
33
  version: '0'
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ! '>='
38
+ - - '>='
44
39
  - !ruby/object:Gem::Version
45
40
  version: '0'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: webmock
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ! '>='
45
+ - - '>='
52
46
  - !ruby/object:Gem::Version
53
47
  version: '0'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ! '>='
52
+ - - '>='
60
53
  - !ruby/object:Gem::Version
61
54
  version: '0'
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: thin
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ! '>='
59
+ - - '>='
68
60
  - !ruby/object:Gem::Version
69
61
  version: '0'
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ! '>='
66
+ - - '>='
76
67
  - !ruby/object:Gem::Version
77
68
  version: '0'
78
69
  - !ruby/object:Gem::Dependency
79
70
  name: nokogiri
80
71
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
72
  requirements:
83
- - - ! '>='
73
+ - - '>='
84
74
  - !ruby/object:Gem::Version
85
75
  version: 1.5.1
86
76
  type: :development
87
77
  prerelease: false
88
78
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
79
  requirements:
91
- - - ! '>='
80
+ - - '>='
92
81
  - !ruby/object:Gem::Version
93
82
  version: 1.5.1
94
83
  - !ruby/object:Gem::Dependency
95
84
  name: yard
96
85
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
86
  requirements:
99
- - - ! '>='
87
+ - - '>='
100
88
  - !ruby/object:Gem::Version
101
89
  version: '0'
102
90
  type: :development
103
91
  prerelease: false
104
92
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
93
  requirements:
107
- - - ! '>='
94
+ - - '>='
108
95
  - !ruby/object:Gem::Version
109
96
  version: '0'
110
97
  - !ruby/object:Gem::Dependency
111
98
  name: rest-client
112
99
  requirement: !ruby/object:Gem::Requirement
113
- none: false
114
100
  requirements:
115
101
  - - ~>
116
102
  - !ruby/object:Gem::Version
@@ -118,7 +104,6 @@ dependencies:
118
104
  type: :runtime
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
- none: false
122
107
  requirements:
123
108
  - - ~>
124
109
  - !ruby/object:Gem::Version
@@ -126,33 +111,29 @@ dependencies:
126
111
  - !ruby/object:Gem::Dependency
127
112
  name: nokogiri
128
113
  requirement: !ruby/object:Gem::Requirement
129
- none: false
130
114
  requirements:
131
- - - ! '>='
115
+ - - '>='
132
116
  - !ruby/object:Gem::Version
133
117
  version: '0'
134
118
  type: :runtime
135
119
  prerelease: false
136
120
  version_requirements: !ruby/object:Gem::Requirement
137
- none: false
138
121
  requirements:
139
- - - ! '>='
122
+ - - '>='
140
123
  - !ruby/object:Gem::Version
141
124
  version: '0'
142
125
  - !ruby/object:Gem::Dependency
143
126
  name: mechanize
144
127
  requirement: !ruby/object:Gem::Requirement
145
- none: false
146
128
  requirements:
147
- - - ! '>='
129
+ - - '>='
148
130
  - !ruby/object:Gem::Version
149
131
  version: '0'
150
132
  type: :runtime
151
133
  prerelease: false
152
134
  version_requirements: !ruby/object:Gem::Requirement
153
- none: false
154
135
  requirements:
155
- - - ! '>='
136
+ - - '>='
156
137
  - !ruby/object:Gem::Version
157
138
  version: '0'
158
139
  description: Don't re-write web scrapers every time. Upton gives you a scraper template
@@ -163,6 +144,7 @@ extensions: []
163
144
  extra_rdoc_files: []
164
145
  files:
165
146
  - lib/upton.rb
147
+ - lib/upton/scraper.rb
166
148
  - lib/upton/utils.rb
167
149
  - lib/upton/downloader.rb
168
150
  - lib/upton/version.rb
@@ -181,27 +163,26 @@ files:
181
163
  homepage: http://github.org/propublica/upton
182
164
  licenses:
183
165
  - MIT
166
+ metadata: {}
184
167
  post_install_message:
185
168
  rdoc_options: []
186
169
  require_paths:
187
170
  - lib
188
171
  required_ruby_version: !ruby/object:Gem::Requirement
189
- none: false
190
172
  requirements:
191
- - - ! '>='
173
+ - - '>='
192
174
  - !ruby/object:Gem::Version
193
175
  version: 1.9.2
194
176
  required_rubygems_version: !ruby/object:Gem::Requirement
195
- none: false
196
177
  requirements:
197
- - - ! '>='
178
+ - - '>='
198
179
  - !ruby/object:Gem::Version
199
180
  version: '0'
200
181
  requirements: []
201
182
  rubyforge_project:
202
- rubygems_version: 1.8.23
183
+ rubygems_version: 2.0.14
203
184
  signing_key:
204
- specification_version: 3
185
+ specification_version: 4
205
186
  summary: A simple web-scraping framework
206
187
  test_files:
207
188
  - spec/data/prosecutor.html