upton 0.2.11 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/upton.rb CHANGED
@@ -35,7 +35,7 @@ module Upton
35
35
  EMPTY_STRING = ''
36
36
 
37
37
  attr_accessor :verbose, :debug, :index_debug, :sleep_time_between_requests, :stash_folder, :url_array,
38
- :paginated, :pagination_param, :pagination_max_pages, :readable_filenames
38
+ :paginated, :pagination_param, :pagination_max_pages, :pagination_start_index, :readable_filenames
39
39
 
40
40
  ##
41
41
  # This is the main user-facing method for a basic scraper.
@@ -54,21 +54,13 @@ module Upton
54
54
  # +selector+: The XPath expression or CSS selector that specifies the
55
55
  # anchor elements within the page, if a url is specified for
56
56
  # the previous argument.
57
- # +selector_method+: Deprecated and ignored. Next breaking release will
58
- # remove this option.x
59
57
  #
60
58
  # These options are a shortcut. If you plan to override +get_index+, you
61
59
  # do not need to set them.
62
60
  # If you don't specify a selector, the first argument will be treated as a
63
61
  # list of URLs.
64
62
  ##
65
-
66
- # DEPRECATION NOTE, re: selector_method
67
- # the selector_method parameter is unneeded, as Nokogiri provides the
68
- # #search method, which picks a selector depending on whether
69
- # the String passed is of CSS/XPath notation
70
-
71
- def initialize(index_url_or_array, selector="", selector_method=:deprecated)
63
+ def initialize(index_url_or_array, selector="")
72
64
 
73
65
  #if first arg is a valid URL, do already-written stuff;
74
66
  #if it's not (or if it's a list?) don't bother with get_index, etc.
@@ -107,8 +99,9 @@ module Upton
107
99
  @pagination_param = 'page'
108
100
  # Default number of paginated pages to scrape
109
101
  @pagination_max_pages = 2
110
-
111
-
102
+ # Default starting number for pagination (second page is this plus 1).
103
+ @pagination_start_index = 1
104
+
112
105
  # Folder name for stashes, if you want them to be stored somewhere else,
113
106
  # e.g. under /tmp.
114
107
  if @stash_folder
@@ -231,18 +224,34 @@ module Upton
231
224
  ##
232
225
  # sometimes URLs are relative, e.g. "index.html" as opposed to "http://site.com/index.html"
233
226
  # resolve_url resolves them to absolute urls.
234
- # absolute_url_str must be a URL, as a string, that is absolute.
227
+ # absolute_url_str must be a URL, as a string that represents an absolute URL or a URI
235
228
  ##
236
229
  def resolve_url(href_str, absolute_url_str)
237
- absolute_url = URI(absolute_url_str).dup
230
+ if absolute_url_str.class <= URI::Generic
231
+ absolute_url = absolute_url_str.dup
232
+ else
233
+ begin
234
+ absolute_url = URI(absolute_url_str).dup
235
+ rescue URI::InvalidURIError
236
+ raise ArgumentError, "#{absolute_url_str} must be represent a valid relative or absolute URI"
237
+ end
238
+ end
238
239
  raise ArgumentError, "#{absolute_url} must be absolute" unless absolute_url.absolute?
239
- href = URI(href_str).dup
240
+ if href_str.class <= URI::Generic
241
+ href = href_str.dup
242
+ else
243
+ begin
244
+ href = URI(href_str).dup
245
+ rescue URI::InvalidURIError
246
+ raise ArgumentError, "#{href_str} must be represent a valid relative or absolute URI"
247
+ end
248
+ end
240
249
 
241
250
  # return :href if :href is already absolute
242
251
  return href.to_s if href.absolute?
243
252
 
244
253
  #TODO: edge cases, see [issue #8](https://github.com/propublica/upton/issues/8)
245
- URI.join(absolute_url, href).to_s
254
+ URI.join(absolute_url.to_s, href.to_s).to_s
246
255
  end
247
256
 
248
257
  ##
@@ -251,16 +260,7 @@ module Upton
251
260
  # comes from an API.
252
261
  ##
253
262
  def get_index
254
- # TODO: Deprecate @index_Selector_method in next minor release
255
- parse_index(get_index_pages(@index_url, 1), @index_selector)
256
- end
257
-
258
- ##
259
- # Using the XPath expression or CSS selector and selector_method that
260
- # uniquely identifies the links in the index, return those links as strings. ##
261
- def old_parse_index(text, selector, selector_method=:deprecated) # TODO: Deprecate selector_method in next minor release.
262
- # for now, override selector_method with :search, which will work with either CSS or XPath
263
- Nokogiri::HTML(text).search(selector).to_a.map{|l| l["href"] }
263
+ index_pages = get_index_pages(@index_url, @pagination_start_index).map{|page| parse_index(page, @index_selector) }.flatten
264
264
  end
265
265
 
266
266
  # TODO: Not sure the best way to handle this
@@ -274,8 +274,7 @@ module Upton
274
274
  # to make sure that this method returns absolute urls
275
275
  # i.e. this method expects @index_url to always have an absolute address
276
276
  # for the lifetime of an Upton instance
277
- def parse_index(text, selector, selector_method=:deprecated) # TODO: Deprecate selector_method in next minor release.
278
- # for now, override selector_method with :search, which will work with either CSS or XPath
277
+ def parse_index(text, selector)
279
278
  Nokogiri::HTML(text).search(selector).to_a.map do |a_element|
280
279
  href = a_element["href"]
281
280
  resolved_url = resolve_url( href, @index_url) unless href.nil?
@@ -290,18 +289,19 @@ module Upton
290
289
  # e.g. a site listing links with 2+ pages.
291
290
  ##
292
291
  def get_index_pages(url, pagination_index, options={})
293
- resp = self.get_page(url, @index_debug, options)
294
- unless resp.empty?
295
- next_url = self.next_index_page_url(url, pagination_index + 1)
296
- # resolve to absolute url
297
- #
292
+ resps = [self.get_page(url, @index_debug, options)]
293
+ prev_url = url
294
+ while !resps.last.empty?
295
+ pagination_index += 1
296
+ next_url = self.next_index_page_url(url, pagination_index)
298
297
  next_url = resolve_url(next_url, url)
299
- unless next_url == url
300
- next_resp = self.get_index_pages(next_url, pagination_index + 1).to_s
301
- resp += next_resp
302
- end
298
+ break if next_url == prev_url || next_url.empty?
299
+
300
+ next_resp = self.get_page(next_url, @index_debug, options).to_s
301
+ prev_url = next_url
302
+ resps << next_resp
303
303
  end
304
- resp
304
+ resps
305
305
  end
306
306
 
307
307
  ##
@@ -313,26 +313,29 @@ module Upton
313
313
  # page, e.g. if a news article has two pages.
314
314
  ##
315
315
  def get_instance(url, pagination_index=0, options={})
316
- resp = self.get_page(url, @debug, options)
317
- if !resp.empty?
318
- next_url = self.next_instance_page_url(url, pagination_index.to_i + 1)
319
-
320
- #next_url = resolve_url(next_url, url)
321
- unless next_url == url
322
- next_resp = self.get_instance(next_url, pagination_index.to_i + 1).to_s
323
- resp += next_resp
324
- end
316
+ resps = [self.get_page(url, @debug, options)]
317
+ pagination_index = pagination_index.to_i
318
+ prev_url = url
319
+ while !resps.last.empty?
320
+ next_url = self.next_instance_page_url(url, pagination_index + 1)
321
+ break if next_url == prev_url || next_url.empty?
322
+
323
+ next_resp = self.get_page(next_url, @debug, options)
324
+ prev_url = next_url
325
+ resps << next_resp
325
326
  end
326
- resp
327
+ resps
327
328
  end
328
329
 
329
330
  # Just a helper for +scrape+.
330
331
  def scrape_from_list(list, blk)
331
332
  puts "Scraping #{list.size} instances" if @verbose
332
333
  list.each_with_index.map do |instance_url, instance_index|
333
- instance_resp = get_instance instance_url, nil, :instance_index => instance_index
334
- blk.call(instance_resp, instance_url, instance_index)
335
- end
334
+ instance_resps = get_instance instance_url, nil, :instance_index => instance_index
335
+ instance_resps.each_with_index.map do |instance_resp, pagination_index|
336
+ blk.call(instance_resp, instance_url, instance_index, pagination_index)
337
+ end
338
+ end.flatten(1)
336
339
  end
337
340
 
338
341
  # it's often useful to have this slug method for uniquely (almost certainly) identifying pages.
@@ -42,10 +42,14 @@ module Upton
42
42
 
43
43
  private
44
44
 
45
+ def make_request_for_resource!
46
+ RestClient.get(uri)
47
+ end
48
+
45
49
  def download_from_resource!
46
50
  begin
47
51
  puts "Downloading from #{uri}" if @verbose
48
- resp = RestClient.get(uri)
52
+ resp = make_request_for_resource!
49
53
  puts "Downloaded #{uri}" if @verbose
50
54
  rescue RestClient::ResourceNotFound
51
55
  puts "404 error, skipping: #{uri}" if @verbose
@@ -73,7 +77,7 @@ module Upton
73
77
  puts "Cache of #{uri} unavailable. Will download from the internet"
74
78
  end
75
79
  end
76
- from_resource = false
80
+ from_resource = true
77
81
  download_from_resource!
78
82
  end
79
83
  unless cached_file_exists?
@@ -84,7 +88,7 @@ module Upton
84
88
  puts "Writing #{uri} data to the cache"
85
89
  end
86
90
  end
87
- File.write(cached_file, resp)
91
+ open(cached_file, 'w'){|f| f << resp}
88
92
  end
89
93
  {:resp => resp, :from_resource => from_resource }
90
94
  end
@@ -0,0 +1,3 @@
1
+ module Upton # :nodoc:
2
+ VERSION = '0.3.0'
3
+ end
data/spec/upton_spec.rb CHANGED
@@ -54,8 +54,9 @@ describe Upton do
54
54
 
55
55
  propubscraper = Upton::Scraper.new("http://www.example.com/propublica.html", "section#river section h1 a")
56
56
  propubscraper.debug = true
57
- propubscraper.verbose = true
57
+ propubscraper.verbose = false
58
58
  propubscraper.sleep_time_between_requests = 0
59
+ propubscraper.stash_folder = "test_stashes"
59
60
 
60
61
  heds = propubscraper.scrape do |article_str|
61
62
  doc = Nokogiri::HTML(article_str)
@@ -88,8 +89,9 @@ describe Upton do
88
89
 
89
90
  propubscraper = Upton::Scraper.new("http://www.example.com/propublica-relative.html", "section#river h1 a")
90
91
  propubscraper.debug = true
91
- propubscraper.verbose = true
92
+ propubscraper.verbose = false
92
93
  propubscraper.sleep_time_between_requests = 0
94
+ propubscraper.stash_folder = "test_stashes"
93
95
 
94
96
  heds = propubscraper.scrape do |article_str|
95
97
  doc = Nokogiri::HTML(article_str)
@@ -105,8 +107,9 @@ describe Upton do
105
107
 
106
108
  propubscraper = Upton::Scraper.new(["http://www.example.com/propublica.html"])
107
109
  propubscraper.debug = true
108
- propubscraper.verbose = true
110
+ propubscraper.verbose = false
109
111
  propubscraper.sleep_time_between_requests = 0
112
+ propubscraper.stash_folder = "test_stashes"
110
113
 
111
114
  list = propubscraper.scrape(&Upton::Utils.list("#jamb.wNarrow #most-commented li a"))
112
115
  FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
@@ -119,10 +122,12 @@ describe Upton do
119
122
 
120
123
  propubscraper = Upton::Scraper.new(["http://www.example.com/easttimor.html"])
121
124
  propubscraper.debug = true
122
- propubscraper.verbose = true
125
+ propubscraper.verbose = false
123
126
  propubscraper.sleep_time_between_requests = 0
127
+ propubscraper.stash_folder = "test_stashes"
124
128
 
125
129
  table = propubscraper.scrape(&Upton::Utils.table('//table[contains(concat(" ", normalize-space(@class), " "), " wikitable ")][2]'))
130
+ table.map{|outer| outer.map{|row| row.map{|cell| cell.gsub!("\n", '') } }} # cope with diff nokogiri versions differing behavior.
126
131
  FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
127
132
  table.should eql @east_timor_prime_ministers
128
133
  end
@@ -148,11 +153,12 @@ describe Upton do
148
153
 
149
154
  propubscraper = Upton::Scraper.new("http://www.example.com/propublica_search.html", '.compact-list a.title-link')
150
155
  propubscraper.debug = true
151
- propubscraper.verbose = true
156
+ propubscraper.verbose = false
152
157
  propubscraper.paginated = true
153
158
  propubscraper.pagination_param = 'p'
154
159
  propubscraper.pagination_max_pages = 3
155
160
  propubscraper.sleep_time_between_requests = 0
161
+ propubscraper.stash_folder = "test_stashes"
156
162
 
157
163
  results = propubscraper.scrape do |article_str|
158
164
  doc = Nokogiri::HTML(article_str)
@@ -167,15 +173,57 @@ describe Upton do
167
173
  Upton::Scraper.stub(:sleep)
168
174
  end
169
175
 
170
- it "should sleep after uncached requests" do
176
+ it "should sleep after requests with caching disabled" do
171
177
  stub_request(:get, "www.example.com")
172
178
  u = Upton::Scraper.new("http://www.example.com", '.whatever')
179
+ u.index_debug = false
173
180
  u.sleep_time_between_requests = 1 #don't sleep too long, that's annoying.
174
181
  u.should_receive(:sleep)
175
- stub = stub_request(:get, "http://www.example.com")
176
182
  u.scrape
177
183
  end
178
184
 
185
+ it "should sleep after uncached requests when caching is enabled" do
186
+ FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
187
+ stub_request(:get, "www.example.com")
188
+ u = Upton::Scraper.new("http://www.example.com", '.whatever')
189
+ u.index_debug = true
190
+ u.stash_folder = "test_stashes"
191
+ u.sleep_time_between_requests = 1 #don't sleep too long, that's annoying.
192
+ u.should_receive(:sleep)
193
+ u.scrape
194
+ end
195
+
196
+ it "should sleep after paginated requests when caching is disabled" do
197
+ FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
198
+ stub_request(:get, "www.example.com/propublica_search.html").
199
+ to_return(:body => File.new('./spec/data/propublica_search.html'), :status => 200)
200
+ stub_request(:get, "www.example.com/propublica_search.html?p=2").
201
+ to_return(:body => File.new('./spec/data/propublica_search_page_2.html'), :status => 200)
202
+ stub_request(:get, "www.example.com/propublica_search.html?p=3").
203
+ to_return(:body => '', :status => 200)
204
+ stub_request(:get, "www.example.com/webinar.html").
205
+ to_return(:body => File.new('./spec/data/webinar.html'), :status => 200)
206
+ stub_request(:get, "www.example.com/prosecutor.html").
207
+ to_return(:body => File.new('./spec/data/prosecutor.html'), :status => 200)
208
+ stub_request(:get, "www.example.com/sixfacts.html").
209
+ to_return(:body => File.new('./spec/data/sixfacts.html'), :status => 200)
210
+
211
+
212
+ u = Upton::Scraper.new("http://www.example.com/propublica_search.html", '.nonexistent')
213
+ u.index_debug = false
214
+ u.debug = false
215
+ u.paginated = true
216
+ u.pagination_param = 'p'
217
+ u.pagination_max_pages = 3
218
+ u.sleep_time_between_requests = 1 #don't sleep too long, that's annoying.
219
+ u.stash_folder = "test_stashes"
220
+
221
+ u.should_receive(:sleep).exactly(3).times #once for each search page, so 3.
222
+ u.scrape
223
+ FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
224
+ end
225
+
226
+
179
227
  it "should save to the designated stash folder" do
180
228
  custom_cache_folder = "#{Dir.tmpdir}/upton/test"
181
229
  FileUtils.rm_rf(custom_cache_folder)
@@ -183,17 +231,28 @@ describe Upton do
183
231
  to_return(:body => '', :status => 200)
184
232
 
185
233
  u = Upton::Scraper.new("http://www.example.com", '.whatever')
234
+ u.sleep_time_between_requests = 0.0
186
235
  u.stash_folder = custom_cache_folder
187
236
  u.debug = true
188
237
  u.scrape do
189
238
  1+1
190
239
  end
191
- puts [custom_cache_folder, custom_cache_folder + "/*", Dir.glob(custom_cache_folder)].inspect
192
240
  files = Dir.glob(custom_cache_folder)
193
241
  expect(files).not_to be_empty
194
242
  end
195
243
 
196
- it "should be silent if verbose if false" do
197
- pending
244
+
245
+ before do
246
+ Upton::Scraper.stub(:puts)
198
247
  end
248
+
249
+ it "should be silent if verbose is false" do
250
+ stub_request(:get, "www.example.com")
251
+ u = Upton::Scraper.new("http://www.example.com", '.whatever')
252
+ u.sleep_time_between_requests = 0.0
253
+ u.verbose = false
254
+ u.should_not_receive(:puts)
255
+ u.scrape
256
+ end
257
+
199
258
  end
metadata CHANGED
@@ -1,102 +1,116 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: upton
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.11
4
+ version: 0.3.0
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Jeremy B. Merrill
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2013-11-14 00:00:00.000000000 Z
12
+ date: 2013-12-22 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: rack
15
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
16
18
  requirements:
17
- - - '>='
19
+ - - ! '>='
18
20
  - !ruby/object:Gem::Version
19
21
  version: '0'
20
22
  type: :development
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
23
26
  requirements:
24
- - - '>='
27
+ - - ! '>='
25
28
  - !ruby/object:Gem::Version
26
29
  version: '0'
27
30
  - !ruby/object:Gem::Dependency
28
31
  name: rspec
29
32
  requirement: !ruby/object:Gem::Requirement
33
+ none: false
30
34
  requirements:
31
- - - '>='
35
+ - - ! '>='
32
36
  - !ruby/object:Gem::Version
33
37
  version: '0'
34
38
  type: :development
35
39
  prerelease: false
36
40
  version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
37
42
  requirements:
38
- - - '>='
43
+ - - ! '>='
39
44
  - !ruby/object:Gem::Version
40
45
  version: '0'
41
46
  - !ruby/object:Gem::Dependency
42
47
  name: webmock
43
48
  requirement: !ruby/object:Gem::Requirement
49
+ none: false
44
50
  requirements:
45
- - - '>='
51
+ - - ! '>='
46
52
  - !ruby/object:Gem::Version
47
53
  version: '0'
48
54
  type: :development
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
51
58
  requirements:
52
- - - '>='
59
+ - - ! '>='
53
60
  - !ruby/object:Gem::Version
54
61
  version: '0'
55
62
  - !ruby/object:Gem::Dependency
56
63
  name: thin
57
64
  requirement: !ruby/object:Gem::Requirement
65
+ none: false
58
66
  requirements:
59
- - - '>='
67
+ - - ! '>='
60
68
  - !ruby/object:Gem::Version
61
69
  version: '0'
62
70
  type: :development
63
71
  prerelease: false
64
72
  version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
65
74
  requirements:
66
- - - '>='
75
+ - - ! '>='
67
76
  - !ruby/object:Gem::Version
68
77
  version: '0'
69
78
  - !ruby/object:Gem::Dependency
70
79
  name: nokogiri
71
80
  requirement: !ruby/object:Gem::Requirement
81
+ none: false
72
82
  requirements:
73
- - - '>='
83
+ - - ! '>='
74
84
  - !ruby/object:Gem::Version
75
- version: '0'
85
+ version: 1.5.1
76
86
  type: :development
77
87
  prerelease: false
78
88
  version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
79
90
  requirements:
80
- - - '>='
91
+ - - ! '>='
81
92
  - !ruby/object:Gem::Version
82
- version: '0'
93
+ version: 1.5.1
83
94
  - !ruby/object:Gem::Dependency
84
95
  name: yard
85
96
  requirement: !ruby/object:Gem::Requirement
97
+ none: false
86
98
  requirements:
87
- - - '>='
99
+ - - ! '>='
88
100
  - !ruby/object:Gem::Version
89
101
  version: '0'
90
102
  type: :development
91
103
  prerelease: false
92
104
  version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
93
106
  requirements:
94
- - - '>='
107
+ - - ! '>='
95
108
  - !ruby/object:Gem::Version
96
109
  version: '0'
97
110
  - !ruby/object:Gem::Dependency
98
111
  name: rest-client
99
112
  requirement: !ruby/object:Gem::Requirement
113
+ none: false
100
114
  requirements:
101
115
  - - ~>
102
116
  - !ruby/object:Gem::Version
@@ -104,6 +118,7 @@ dependencies:
104
118
  type: :runtime
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
107
122
  requirements:
108
123
  - - ~>
109
124
  - !ruby/object:Gem::Version
@@ -111,29 +126,33 @@ dependencies:
111
126
  - !ruby/object:Gem::Dependency
112
127
  name: nokogiri
113
128
  requirement: !ruby/object:Gem::Requirement
129
+ none: false
114
130
  requirements:
115
- - - '>='
131
+ - - ! '>='
116
132
  - !ruby/object:Gem::Version
117
133
  version: '0'
118
134
  type: :runtime
119
135
  prerelease: false
120
136
  version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
121
138
  requirements:
122
- - - '>='
139
+ - - ! '>='
123
140
  - !ruby/object:Gem::Version
124
141
  version: '0'
125
142
  - !ruby/object:Gem::Dependency
126
143
  name: mechanize
127
144
  requirement: !ruby/object:Gem::Requirement
145
+ none: false
128
146
  requirements:
129
- - - '>='
147
+ - - ! '>='
130
148
  - !ruby/object:Gem::Version
131
149
  version: '0'
132
150
  type: :runtime
133
151
  prerelease: false
134
152
  version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
135
154
  requirements:
136
- - - '>='
155
+ - - ! '>='
137
156
  - !ruby/object:Gem::Version
138
157
  version: '0'
139
158
  description: Don't re-write web scrapers every time. Upton gives you a scraper template
@@ -146,52 +165,54 @@ files:
146
165
  - lib/upton.rb
147
166
  - lib/upton/utils.rb
148
167
  - lib/upton/downloader.rb
149
- - spec/data/discussion.html
168
+ - lib/upton/version.rb
169
+ - spec/data/prosecutor.html
150
170
  - spec/data/easttimor.html
151
- - spec/data/propublica-relative.html
152
- - spec/data/propublica.html
171
+ - spec/data/discussion.html
153
172
  - spec/data/propublica_search.html
154
173
  - spec/data/propublica_search_page_2.html
155
- - spec/data/prosecutor.html
156
- - spec/data/sixfacts.html
174
+ - spec/data/propublica-relative.html
157
175
  - spec/data/webinar.html
176
+ - spec/data/propublica.html
177
+ - spec/data/sixfacts.html
158
178
  - spec/upton_spec.rb
159
179
  - spec/spec_helper.rb
160
180
  - spec/upton_downloader_spec.rb
161
181
  homepage: http://github.org/propublica/upton
162
182
  licenses:
163
183
  - MIT
164
- metadata: {}
165
184
  post_install_message:
166
185
  rdoc_options: []
167
186
  require_paths:
168
187
  - lib
169
188
  required_ruby_version: !ruby/object:Gem::Requirement
189
+ none: false
170
190
  requirements:
171
- - - '>='
191
+ - - ! '>='
172
192
  - !ruby/object:Gem::Version
173
- version: 1.8.7
193
+ version: 1.9.2
174
194
  required_rubygems_version: !ruby/object:Gem::Requirement
195
+ none: false
175
196
  requirements:
176
- - - '>='
197
+ - - ! '>='
177
198
  - !ruby/object:Gem::Version
178
199
  version: '0'
179
200
  requirements: []
180
201
  rubyforge_project:
181
- rubygems_version: 2.0.3
202
+ rubygems_version: 1.8.23
182
203
  signing_key:
183
- specification_version: 4
204
+ specification_version: 3
184
205
  summary: A simple web-scraping framework
185
206
  test_files:
186
- - spec/data/discussion.html
207
+ - spec/data/prosecutor.html
187
208
  - spec/data/easttimor.html
188
- - spec/data/propublica-relative.html
189
- - spec/data/propublica.html
209
+ - spec/data/discussion.html
190
210
  - spec/data/propublica_search.html
191
211
  - spec/data/propublica_search_page_2.html
192
- - spec/data/prosecutor.html
193
- - spec/data/sixfacts.html
212
+ - spec/data/propublica-relative.html
194
213
  - spec/data/webinar.html
214
+ - spec/data/propublica.html
215
+ - spec/data/sixfacts.html
195
216
  - spec/upton_spec.rb
196
217
  - spec/spec_helper.rb
197
218
  - spec/upton_downloader_spec.rb
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 2ef1916db6e2fb734cb8ea7ed33eb5edb67b37e3
4
- data.tar.gz: 2a9da49f8a47dfc9e1feab2138045f7aa49268d6
5
- SHA512:
6
- metadata.gz: e94a228a8fb01c90c0e7535b106b2af4dd8983ea3e92b2813cd5d038c3985a5f55c5fbcac19ee5f16f3271ad9e390d426f0ad8ad7b0c08afdf3b9d745cff2738
7
- data.tar.gz: f8b0475e022980cd6ca0eec6dc8512394723084ba59d0b47cd36c24c736fbfc4a58b52ce186a3f5b91c69fd1241dfaa9d57c5f71bf6867255426e0fd3f26ed0f