upton 0.2.11 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/upton.rb CHANGED
@@ -35,7 +35,7 @@ module Upton
35
35
  EMPTY_STRING = ''
36
36
 
37
37
  attr_accessor :verbose, :debug, :index_debug, :sleep_time_between_requests, :stash_folder, :url_array,
38
- :paginated, :pagination_param, :pagination_max_pages, :readable_filenames
38
+ :paginated, :pagination_param, :pagination_max_pages, :pagination_start_index, :readable_filenames
39
39
 
40
40
  ##
41
41
  # This is the main user-facing method for a basic scraper.
@@ -54,21 +54,13 @@ module Upton
54
54
  # +selector+: The XPath expression or CSS selector that specifies the
55
55
  # anchor elements within the page, if a url is specified for
56
56
  # the previous argument.
57
- # +selector_method+: Deprecated and ignored. Next breaking release will
58
- # remove this option.x
59
57
  #
60
58
  # These options are a shortcut. If you plan to override +get_index+, you
61
59
  # do not need to set them.
62
60
  # If you don't specify a selector, the first argument will be treated as a
63
61
  # list of URLs.
64
62
  ##
65
-
66
- # DEPRECATION NOTE, re: selector_method
67
- # the selector_method parameter is unneeded, as Nokogiri provides the
68
- # #search method, which picks a selector depending on whether
69
- # the String passed is of CSS/XPath notation
70
-
71
- def initialize(index_url_or_array, selector="", selector_method=:deprecated)
63
+ def initialize(index_url_or_array, selector="")
72
64
 
73
65
  #if first arg is a valid URL, do already-written stuff;
74
66
  #if it's not (or if it's a list?) don't bother with get_index, etc.
@@ -107,8 +99,9 @@ module Upton
107
99
  @pagination_param = 'page'
108
100
  # Default number of paginated pages to scrape
109
101
  @pagination_max_pages = 2
110
-
111
-
102
+ # Default starting number for pagination (second page is this plus 1).
103
+ @pagination_start_index = 1
104
+
112
105
  # Folder name for stashes, if you want them to be stored somewhere else,
113
106
  # e.g. under /tmp.
114
107
  if @stash_folder
@@ -231,18 +224,34 @@ module Upton
231
224
  ##
232
225
  # sometimes URLs are relative, e.g. "index.html" as opposed to "http://site.com/index.html"
233
226
  # resolve_url resolves them to absolute urls.
234
- # absolute_url_str must be a URL, as a string, that is absolute.
227
+ # absolute_url_str must be a URL, as a string that represents an absolute URL or a URI
235
228
  ##
236
229
  def resolve_url(href_str, absolute_url_str)
237
- absolute_url = URI(absolute_url_str).dup
230
+ if absolute_url_str.class <= URI::Generic
231
+ absolute_url = absolute_url_str.dup
232
+ else
233
+ begin
234
+ absolute_url = URI(absolute_url_str).dup
235
+ rescue URI::InvalidURIError
236
+ raise ArgumentError, "#{absolute_url_str} must be represent a valid relative or absolute URI"
237
+ end
238
+ end
238
239
  raise ArgumentError, "#{absolute_url} must be absolute" unless absolute_url.absolute?
239
- href = URI(href_str).dup
240
+ if href_str.class <= URI::Generic
241
+ href = href_str.dup
242
+ else
243
+ begin
244
+ href = URI(href_str).dup
245
+ rescue URI::InvalidURIError
246
+ raise ArgumentError, "#{href_str} must be represent a valid relative or absolute URI"
247
+ end
248
+ end
240
249
 
241
250
  # return :href if :href is already absolute
242
251
  return href.to_s if href.absolute?
243
252
 
244
253
  #TODO: edge cases, see [issue #8](https://github.com/propublica/upton/issues/8)
245
- URI.join(absolute_url, href).to_s
254
+ URI.join(absolute_url.to_s, href.to_s).to_s
246
255
  end
247
256
 
248
257
  ##
@@ -251,16 +260,7 @@ module Upton
251
260
  # comes from an API.
252
261
  ##
253
262
  def get_index
254
- # TODO: Deprecate @index_Selector_method in next minor release
255
- parse_index(get_index_pages(@index_url, 1), @index_selector)
256
- end
257
-
258
- ##
259
- # Using the XPath expression or CSS selector and selector_method that
260
- # uniquely identifies the links in the index, return those links as strings. ##
261
- def old_parse_index(text, selector, selector_method=:deprecated) # TODO: Deprecate selector_method in next minor release.
262
- # for now, override selector_method with :search, which will work with either CSS or XPath
263
- Nokogiri::HTML(text).search(selector).to_a.map{|l| l["href"] }
263
+ index_pages = get_index_pages(@index_url, @pagination_start_index).map{|page| parse_index(page, @index_selector) }.flatten
264
264
  end
265
265
 
266
266
  # TODO: Not sure the best way to handle this
@@ -274,8 +274,7 @@ module Upton
274
274
  # to make sure that this method returns absolute urls
275
275
  # i.e. this method expects @index_url to always have an absolute address
276
276
  # for the lifetime of an Upton instance
277
- def parse_index(text, selector, selector_method=:deprecated) # TODO: Deprecate selector_method in next minor release.
278
- # for now, override selector_method with :search, which will work with either CSS or XPath
277
+ def parse_index(text, selector)
279
278
  Nokogiri::HTML(text).search(selector).to_a.map do |a_element|
280
279
  href = a_element["href"]
281
280
  resolved_url = resolve_url( href, @index_url) unless href.nil?
@@ -290,18 +289,19 @@ module Upton
290
289
  # e.g. a site listing links with 2+ pages.
291
290
  ##
292
291
  def get_index_pages(url, pagination_index, options={})
293
- resp = self.get_page(url, @index_debug, options)
294
- unless resp.empty?
295
- next_url = self.next_index_page_url(url, pagination_index + 1)
296
- # resolve to absolute url
297
- #
292
+ resps = [self.get_page(url, @index_debug, options)]
293
+ prev_url = url
294
+ while !resps.last.empty?
295
+ pagination_index += 1
296
+ next_url = self.next_index_page_url(url, pagination_index)
298
297
  next_url = resolve_url(next_url, url)
299
- unless next_url == url
300
- next_resp = self.get_index_pages(next_url, pagination_index + 1).to_s
301
- resp += next_resp
302
- end
298
+ break if next_url == prev_url || next_url.empty?
299
+
300
+ next_resp = self.get_page(next_url, @index_debug, options).to_s
301
+ prev_url = next_url
302
+ resps << next_resp
303
303
  end
304
- resp
304
+ resps
305
305
  end
306
306
 
307
307
  ##
@@ -313,26 +313,29 @@ module Upton
313
313
  # page, e.g. if a news article has two pages.
314
314
  ##
315
315
  def get_instance(url, pagination_index=0, options={})
316
- resp = self.get_page(url, @debug, options)
317
- if !resp.empty?
318
- next_url = self.next_instance_page_url(url, pagination_index.to_i + 1)
319
-
320
- #next_url = resolve_url(next_url, url)
321
- unless next_url == url
322
- next_resp = self.get_instance(next_url, pagination_index.to_i + 1).to_s
323
- resp += next_resp
324
- end
316
+ resps = [self.get_page(url, @debug, options)]
317
+ pagination_index = pagination_index.to_i
318
+ prev_url = url
319
+ while !resps.last.empty?
320
+ next_url = self.next_instance_page_url(url, pagination_index + 1)
321
+ break if next_url == prev_url || next_url.empty?
322
+
323
+ next_resp = self.get_page(next_url, @debug, options)
324
+ prev_url = next_url
325
+ resps << next_resp
325
326
  end
326
- resp
327
+ resps
327
328
  end
328
329
 
329
330
  # Just a helper for +scrape+.
330
331
  def scrape_from_list(list, blk)
331
332
  puts "Scraping #{list.size} instances" if @verbose
332
333
  list.each_with_index.map do |instance_url, instance_index|
333
- instance_resp = get_instance instance_url, nil, :instance_index => instance_index
334
- blk.call(instance_resp, instance_url, instance_index)
335
- end
334
+ instance_resps = get_instance instance_url, nil, :instance_index => instance_index
335
+ instance_resps.each_with_index.map do |instance_resp, pagination_index|
336
+ blk.call(instance_resp, instance_url, instance_index, pagination_index)
337
+ end
338
+ end.flatten(1)
336
339
  end
337
340
 
338
341
  # it's often useful to have this slug method for uniquely (almost certainly) identifying pages.
@@ -42,10 +42,14 @@ module Upton
42
42
 
43
43
  private
44
44
 
45
+ def make_request_for_resource!
46
+ RestClient.get(uri)
47
+ end
48
+
45
49
  def download_from_resource!
46
50
  begin
47
51
  puts "Downloading from #{uri}" if @verbose
48
- resp = RestClient.get(uri)
52
+ resp = make_request_for_resource!
49
53
  puts "Downloaded #{uri}" if @verbose
50
54
  rescue RestClient::ResourceNotFound
51
55
  puts "404 error, skipping: #{uri}" if @verbose
@@ -73,7 +77,7 @@ module Upton
73
77
  puts "Cache of #{uri} unavailable. Will download from the internet"
74
78
  end
75
79
  end
76
- from_resource = false
80
+ from_resource = true
77
81
  download_from_resource!
78
82
  end
79
83
  unless cached_file_exists?
@@ -84,7 +88,7 @@ module Upton
84
88
  puts "Writing #{uri} data to the cache"
85
89
  end
86
90
  end
87
- File.write(cached_file, resp)
91
+ open(cached_file, 'w'){|f| f << resp}
88
92
  end
89
93
  {:resp => resp, :from_resource => from_resource }
90
94
  end
@@ -0,0 +1,3 @@
1
+ module Upton # :nodoc:
2
+ VERSION = '0.3.0'
3
+ end
data/spec/upton_spec.rb CHANGED
@@ -54,8 +54,9 @@ describe Upton do
54
54
 
55
55
  propubscraper = Upton::Scraper.new("http://www.example.com/propublica.html", "section#river section h1 a")
56
56
  propubscraper.debug = true
57
- propubscraper.verbose = true
57
+ propubscraper.verbose = false
58
58
  propubscraper.sleep_time_between_requests = 0
59
+ propubscraper.stash_folder = "test_stashes"
59
60
 
60
61
  heds = propubscraper.scrape do |article_str|
61
62
  doc = Nokogiri::HTML(article_str)
@@ -88,8 +89,9 @@ describe Upton do
88
89
 
89
90
  propubscraper = Upton::Scraper.new("http://www.example.com/propublica-relative.html", "section#river h1 a")
90
91
  propubscraper.debug = true
91
- propubscraper.verbose = true
92
+ propubscraper.verbose = false
92
93
  propubscraper.sleep_time_between_requests = 0
94
+ propubscraper.stash_folder = "test_stashes"
93
95
 
94
96
  heds = propubscraper.scrape do |article_str|
95
97
  doc = Nokogiri::HTML(article_str)
@@ -105,8 +107,9 @@ describe Upton do
105
107
 
106
108
  propubscraper = Upton::Scraper.new(["http://www.example.com/propublica.html"])
107
109
  propubscraper.debug = true
108
- propubscraper.verbose = true
110
+ propubscraper.verbose = false
109
111
  propubscraper.sleep_time_between_requests = 0
112
+ propubscraper.stash_folder = "test_stashes"
110
113
 
111
114
  list = propubscraper.scrape(&Upton::Utils.list("#jamb.wNarrow #most-commented li a"))
112
115
  FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
@@ -119,10 +122,12 @@ describe Upton do
119
122
 
120
123
  propubscraper = Upton::Scraper.new(["http://www.example.com/easttimor.html"])
121
124
  propubscraper.debug = true
122
- propubscraper.verbose = true
125
+ propubscraper.verbose = false
123
126
  propubscraper.sleep_time_between_requests = 0
127
+ propubscraper.stash_folder = "test_stashes"
124
128
 
125
129
  table = propubscraper.scrape(&Upton::Utils.table('//table[contains(concat(" ", normalize-space(@class), " "), " wikitable ")][2]'))
130
+ table.map{|outer| outer.map{|row| row.map{|cell| cell.gsub!("\n", '') } }} # cope with diff nokogiri versions differing behavior.
126
131
  FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
127
132
  table.should eql @east_timor_prime_ministers
128
133
  end
@@ -148,11 +153,12 @@ describe Upton do
148
153
 
149
154
  propubscraper = Upton::Scraper.new("http://www.example.com/propublica_search.html", '.compact-list a.title-link')
150
155
  propubscraper.debug = true
151
- propubscraper.verbose = true
156
+ propubscraper.verbose = false
152
157
  propubscraper.paginated = true
153
158
  propubscraper.pagination_param = 'p'
154
159
  propubscraper.pagination_max_pages = 3
155
160
  propubscraper.sleep_time_between_requests = 0
161
+ propubscraper.stash_folder = "test_stashes"
156
162
 
157
163
  results = propubscraper.scrape do |article_str|
158
164
  doc = Nokogiri::HTML(article_str)
@@ -167,15 +173,57 @@ describe Upton do
167
173
  Upton::Scraper.stub(:sleep)
168
174
  end
169
175
 
170
- it "should sleep after uncached requests" do
176
+ it "should sleep after requests with caching disabled" do
171
177
  stub_request(:get, "www.example.com")
172
178
  u = Upton::Scraper.new("http://www.example.com", '.whatever')
179
+ u.index_debug = false
173
180
  u.sleep_time_between_requests = 1 #don't sleep too long, that's annoying.
174
181
  u.should_receive(:sleep)
175
- stub = stub_request(:get, "http://www.example.com")
176
182
  u.scrape
177
183
  end
178
184
 
185
+ it "should sleep after uncached requests when caching is enabled" do
186
+ FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
187
+ stub_request(:get, "www.example.com")
188
+ u = Upton::Scraper.new("http://www.example.com", '.whatever')
189
+ u.index_debug = true
190
+ u.stash_folder = "test_stashes"
191
+ u.sleep_time_between_requests = 1 #don't sleep too long, that's annoying.
192
+ u.should_receive(:sleep)
193
+ u.scrape
194
+ end
195
+
196
+ it "should sleep after paginated requests when caching is disabled" do
197
+ FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
198
+ stub_request(:get, "www.example.com/propublica_search.html").
199
+ to_return(:body => File.new('./spec/data/propublica_search.html'), :status => 200)
200
+ stub_request(:get, "www.example.com/propublica_search.html?p=2").
201
+ to_return(:body => File.new('./spec/data/propublica_search_page_2.html'), :status => 200)
202
+ stub_request(:get, "www.example.com/propublica_search.html?p=3").
203
+ to_return(:body => '', :status => 200)
204
+ stub_request(:get, "www.example.com/webinar.html").
205
+ to_return(:body => File.new('./spec/data/webinar.html'), :status => 200)
206
+ stub_request(:get, "www.example.com/prosecutor.html").
207
+ to_return(:body => File.new('./spec/data/prosecutor.html'), :status => 200)
208
+ stub_request(:get, "www.example.com/sixfacts.html").
209
+ to_return(:body => File.new('./spec/data/sixfacts.html'), :status => 200)
210
+
211
+
212
+ u = Upton::Scraper.new("http://www.example.com/propublica_search.html", '.nonexistent')
213
+ u.index_debug = false
214
+ u.debug = false
215
+ u.paginated = true
216
+ u.pagination_param = 'p'
217
+ u.pagination_max_pages = 3
218
+ u.sleep_time_between_requests = 1 #don't sleep too long, that's annoying.
219
+ u.stash_folder = "test_stashes"
220
+
221
+ u.should_receive(:sleep).exactly(3).times #once for each search page, so 3.
222
+ u.scrape
223
+ FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
224
+ end
225
+
226
+
179
227
  it "should save to the designated stash folder" do
180
228
  custom_cache_folder = "#{Dir.tmpdir}/upton/test"
181
229
  FileUtils.rm_rf(custom_cache_folder)
@@ -183,17 +231,28 @@ describe Upton do
183
231
  to_return(:body => '', :status => 200)
184
232
 
185
233
  u = Upton::Scraper.new("http://www.example.com", '.whatever')
234
+ u.sleep_time_between_requests = 0.0
186
235
  u.stash_folder = custom_cache_folder
187
236
  u.debug = true
188
237
  u.scrape do
189
238
  1+1
190
239
  end
191
- puts [custom_cache_folder, custom_cache_folder + "/*", Dir.glob(custom_cache_folder)].inspect
192
240
  files = Dir.glob(custom_cache_folder)
193
241
  expect(files).not_to be_empty
194
242
  end
195
243
 
196
- it "should be silent if verbose if false" do
197
- pending
244
+
245
+ before do
246
+ Upton::Scraper.stub(:puts)
198
247
  end
248
+
249
+ it "should be silent if verbose is false" do
250
+ stub_request(:get, "www.example.com")
251
+ u = Upton::Scraper.new("http://www.example.com", '.whatever')
252
+ u.sleep_time_between_requests = 0.0
253
+ u.verbose = false
254
+ u.should_not_receive(:puts)
255
+ u.scrape
256
+ end
257
+
199
258
  end
metadata CHANGED
@@ -1,102 +1,116 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: upton
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.11
4
+ version: 0.3.0
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Jeremy B. Merrill
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2013-11-14 00:00:00.000000000 Z
12
+ date: 2013-12-22 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: rack
15
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
16
18
  requirements:
17
- - - '>='
19
+ - - ! '>='
18
20
  - !ruby/object:Gem::Version
19
21
  version: '0'
20
22
  type: :development
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
23
26
  requirements:
24
- - - '>='
27
+ - - ! '>='
25
28
  - !ruby/object:Gem::Version
26
29
  version: '0'
27
30
  - !ruby/object:Gem::Dependency
28
31
  name: rspec
29
32
  requirement: !ruby/object:Gem::Requirement
33
+ none: false
30
34
  requirements:
31
- - - '>='
35
+ - - ! '>='
32
36
  - !ruby/object:Gem::Version
33
37
  version: '0'
34
38
  type: :development
35
39
  prerelease: false
36
40
  version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
37
42
  requirements:
38
- - - '>='
43
+ - - ! '>='
39
44
  - !ruby/object:Gem::Version
40
45
  version: '0'
41
46
  - !ruby/object:Gem::Dependency
42
47
  name: webmock
43
48
  requirement: !ruby/object:Gem::Requirement
49
+ none: false
44
50
  requirements:
45
- - - '>='
51
+ - - ! '>='
46
52
  - !ruby/object:Gem::Version
47
53
  version: '0'
48
54
  type: :development
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
51
58
  requirements:
52
- - - '>='
59
+ - - ! '>='
53
60
  - !ruby/object:Gem::Version
54
61
  version: '0'
55
62
  - !ruby/object:Gem::Dependency
56
63
  name: thin
57
64
  requirement: !ruby/object:Gem::Requirement
65
+ none: false
58
66
  requirements:
59
- - - '>='
67
+ - - ! '>='
60
68
  - !ruby/object:Gem::Version
61
69
  version: '0'
62
70
  type: :development
63
71
  prerelease: false
64
72
  version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
65
74
  requirements:
66
- - - '>='
75
+ - - ! '>='
67
76
  - !ruby/object:Gem::Version
68
77
  version: '0'
69
78
  - !ruby/object:Gem::Dependency
70
79
  name: nokogiri
71
80
  requirement: !ruby/object:Gem::Requirement
81
+ none: false
72
82
  requirements:
73
- - - '>='
83
+ - - ! '>='
74
84
  - !ruby/object:Gem::Version
75
- version: '0'
85
+ version: 1.5.1
76
86
  type: :development
77
87
  prerelease: false
78
88
  version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
79
90
  requirements:
80
- - - '>='
91
+ - - ! '>='
81
92
  - !ruby/object:Gem::Version
82
- version: '0'
93
+ version: 1.5.1
83
94
  - !ruby/object:Gem::Dependency
84
95
  name: yard
85
96
  requirement: !ruby/object:Gem::Requirement
97
+ none: false
86
98
  requirements:
87
- - - '>='
99
+ - - ! '>='
88
100
  - !ruby/object:Gem::Version
89
101
  version: '0'
90
102
  type: :development
91
103
  prerelease: false
92
104
  version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
93
106
  requirements:
94
- - - '>='
107
+ - - ! '>='
95
108
  - !ruby/object:Gem::Version
96
109
  version: '0'
97
110
  - !ruby/object:Gem::Dependency
98
111
  name: rest-client
99
112
  requirement: !ruby/object:Gem::Requirement
113
+ none: false
100
114
  requirements:
101
115
  - - ~>
102
116
  - !ruby/object:Gem::Version
@@ -104,6 +118,7 @@ dependencies:
104
118
  type: :runtime
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
107
122
  requirements:
108
123
  - - ~>
109
124
  - !ruby/object:Gem::Version
@@ -111,29 +126,33 @@ dependencies:
111
126
  - !ruby/object:Gem::Dependency
112
127
  name: nokogiri
113
128
  requirement: !ruby/object:Gem::Requirement
129
+ none: false
114
130
  requirements:
115
- - - '>='
131
+ - - ! '>='
116
132
  - !ruby/object:Gem::Version
117
133
  version: '0'
118
134
  type: :runtime
119
135
  prerelease: false
120
136
  version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
121
138
  requirements:
122
- - - '>='
139
+ - - ! '>='
123
140
  - !ruby/object:Gem::Version
124
141
  version: '0'
125
142
  - !ruby/object:Gem::Dependency
126
143
  name: mechanize
127
144
  requirement: !ruby/object:Gem::Requirement
145
+ none: false
128
146
  requirements:
129
- - - '>='
147
+ - - ! '>='
130
148
  - !ruby/object:Gem::Version
131
149
  version: '0'
132
150
  type: :runtime
133
151
  prerelease: false
134
152
  version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
135
154
  requirements:
136
- - - '>='
155
+ - - ! '>='
137
156
  - !ruby/object:Gem::Version
138
157
  version: '0'
139
158
  description: Don't re-write web scrapers every time. Upton gives you a scraper template
@@ -146,52 +165,54 @@ files:
146
165
  - lib/upton.rb
147
166
  - lib/upton/utils.rb
148
167
  - lib/upton/downloader.rb
149
- - spec/data/discussion.html
168
+ - lib/upton/version.rb
169
+ - spec/data/prosecutor.html
150
170
  - spec/data/easttimor.html
151
- - spec/data/propublica-relative.html
152
- - spec/data/propublica.html
171
+ - spec/data/discussion.html
153
172
  - spec/data/propublica_search.html
154
173
  - spec/data/propublica_search_page_2.html
155
- - spec/data/prosecutor.html
156
- - spec/data/sixfacts.html
174
+ - spec/data/propublica-relative.html
157
175
  - spec/data/webinar.html
176
+ - spec/data/propublica.html
177
+ - spec/data/sixfacts.html
158
178
  - spec/upton_spec.rb
159
179
  - spec/spec_helper.rb
160
180
  - spec/upton_downloader_spec.rb
161
181
  homepage: http://github.org/propublica/upton
162
182
  licenses:
163
183
  - MIT
164
- metadata: {}
165
184
  post_install_message:
166
185
  rdoc_options: []
167
186
  require_paths:
168
187
  - lib
169
188
  required_ruby_version: !ruby/object:Gem::Requirement
189
+ none: false
170
190
  requirements:
171
- - - '>='
191
+ - - ! '>='
172
192
  - !ruby/object:Gem::Version
173
- version: 1.8.7
193
+ version: 1.9.2
174
194
  required_rubygems_version: !ruby/object:Gem::Requirement
195
+ none: false
175
196
  requirements:
176
- - - '>='
197
+ - - ! '>='
177
198
  - !ruby/object:Gem::Version
178
199
  version: '0'
179
200
  requirements: []
180
201
  rubyforge_project:
181
- rubygems_version: 2.0.3
202
+ rubygems_version: 1.8.23
182
203
  signing_key:
183
- specification_version: 4
204
+ specification_version: 3
184
205
  summary: A simple web-scraping framework
185
206
  test_files:
186
- - spec/data/discussion.html
207
+ - spec/data/prosecutor.html
187
208
  - spec/data/easttimor.html
188
- - spec/data/propublica-relative.html
189
- - spec/data/propublica.html
209
+ - spec/data/discussion.html
190
210
  - spec/data/propublica_search.html
191
211
  - spec/data/propublica_search_page_2.html
192
- - spec/data/prosecutor.html
193
- - spec/data/sixfacts.html
212
+ - spec/data/propublica-relative.html
194
213
  - spec/data/webinar.html
214
+ - spec/data/propublica.html
215
+ - spec/data/sixfacts.html
195
216
  - spec/upton_spec.rb
196
217
  - spec/spec_helper.rb
197
218
  - spec/upton_downloader_spec.rb
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 2ef1916db6e2fb734cb8ea7ed33eb5edb67b37e3
4
- data.tar.gz: 2a9da49f8a47dfc9e1feab2138045f7aa49268d6
5
- SHA512:
6
- metadata.gz: e94a228a8fb01c90c0e7535b106b2af4dd8983ea3e92b2813cd5d038c3985a5f55c5fbcac19ee5f16f3271ad9e390d426f0ad8ad7b0c08afdf3b9d745cff2738
7
- data.tar.gz: f8b0475e022980cd6ca0eec6dc8512394723084ba59d0b47cd36c24c736fbfc4a58b52ce186a3f5b91c69fd1241dfaa9d57c5f71bf6867255426e0fd3f26ed0f