scraperwiki-api 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +42 -2
- data/lib/scraperwiki-api/matchers.rb +459 -0
- data/lib/scraperwiki-api/version.rb +1 -1
- data/lib/scraperwiki-api.rb +50 -7
- data/scraperwiki-api.gemspec +1 -1
- data/spec/scraperwiki-api_spec.rb +9 -9
- metadata +8 -9
- data/spec/spec.opts +0 -5
data/README.md
CHANGED
@@ -6,7 +6,7 @@ A Ruby wrapper for the ScraperWiki API.
|
|
6
6
|
|
7
7
|
gem install scraperwiki-api
|
8
8
|
|
9
|
-
## Examples
|
9
|
+
## API Examples
|
10
10
|
|
11
11
|
>> require 'scraperwiki-api'
|
12
12
|
|
@@ -30,7 +30,47 @@ A Ruby wrapper for the ScraperWiki API.
|
|
30
30
|
>> api.scraper_usersearch searchquery: 'search terms'
|
31
31
|
=> [{"username"=>"johndoe", "profilename"=>"John Doe", "date_joined"=>...}]
|
32
32
|
|
33
|
-
|
33
|
+
More documentation at [RubyDoc.info](http://rdoc.info/gems/scraperwiki-api/ScraperWiki/API).
|
34
|
+
|
35
|
+
## Scraper validations
|
36
|
+
|
37
|
+
If your project uses a lot of scrapers – for example, [OpenCorporates](http://opencorporates.com/), which [scrapes company registries around the world](http://blog.opencorporates.com/2011/03/25/building-a-global-database-the-open-distributed-way/), or [Represent](http://represent.opennorth.ca/), which scrapes information on elected officials from government websites in Canada – you'll want to check that your scrapers behave the way you expect them to. This gem defines [RSpec](https://www.relishapp.com/rspec) matchers to do just that. For example:
|
38
|
+
|
39
|
+
require 'scraperwiki-api'
|
40
|
+
api = ScraperWiki::API.new
|
41
|
+
|
42
|
+
info = api.scraper_getinfo('example-scraper').first
|
43
|
+
|
44
|
+
describe 'example-scraper' do
|
45
|
+
include ScraperWiki::API::Matchers
|
46
|
+
subject {info}
|
47
|
+
|
48
|
+
it {should be_protected}
|
49
|
+
it {should be_editable_by('frabcus')}
|
50
|
+
it {should run(:daily)}
|
51
|
+
it {should_not be_broken}
|
52
|
+
it {should have_at_least_the_keys(['name', 'email']).on('swdata')}
|
53
|
+
it {should have_at_most_the_keys(['name', 'email', 'tel', 'fax']).on('swdata')}
|
54
|
+
it {should have_a_row_count_of(42).on('swdata')}
|
55
|
+
end
|
56
|
+
|
57
|
+
data = api.datastore_sqlite('example-scraper', 'SELECT * from `swdata`')
|
58
|
+
|
59
|
+
describe 'example-scraper' do
|
60
|
+
include ScraperWiki::API::Matchers
|
61
|
+
subject {data}
|
62
|
+
|
63
|
+
it {should_not have_blank_values.in('name')}
|
64
|
+
it {should have_unique_values.in('email')}
|
65
|
+
it {should have_values_of(['M', 'F']).in('gender')}
|
66
|
+
it {should have_values_matching(/\A[^@\s]+@[^a\s]+\z/).in('email')}
|
67
|
+
it {should have_values_starting_with('http://').in('url')}
|
68
|
+
it {should have_values_ending_with('Inc.').in('company_name')}
|
69
|
+
it {should have_integer_values.in('year')}
|
70
|
+
it {should set_any_of(['name', 'first_name', 'last_name'])}
|
71
|
+
end
|
72
|
+
|
73
|
+
More documentation at [RubyDoc.info](http://rdoc.info/gems/scraperwiki-api/ScraperWiki/API/Matchers).
|
34
74
|
|
35
75
|
## Bugs? Questions?
|
36
76
|
|
@@ -0,0 +1,459 @@
|
|
1
|
+
require 'rspec'
|
2
|
+
|
3
|
+
module ScraperWiki
|
4
|
+
class API
|
5
|
+
# @example
|
6
|
+
# require 'scraperwiki-api'
|
7
|
+
# api = ScraperWiki::API.new
|
8
|
+
#
|
9
|
+
# info = api.scraper_getinfo('example-scraper').first
|
10
|
+
#
|
11
|
+
# describe 'example-scraper' do
|
12
|
+
# include ScraperWiki::API::Matchers
|
13
|
+
# subject {info}
|
14
|
+
#
|
15
|
+
# it {should be_protected}
|
16
|
+
# it {should be_editable_by('frabcus')}
|
17
|
+
# it {should run(:daily)}
|
18
|
+
# it {should_not be_broken}
|
19
|
+
# it {should have_at_least_the_keys(['name', 'email']).on('swdata')}
|
20
|
+
# it {should have_at_most_the_keys(['name', 'email', 'tel', 'fax']).on('swdata')}
|
21
|
+
# it {should have_a_row_count_of(42).on('swdata')}
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# data = api.datastore_sqlite('example-scraper', 'SELECT * from `swdata`')
|
25
|
+
#
|
26
|
+
# describe 'example-scraper' do
|
27
|
+
# include ScraperWiki::API::Matchers
|
28
|
+
# subject {data}
|
29
|
+
#
|
30
|
+
# it {should_not have_blank_values.in('name')}
|
31
|
+
# it {should have_unique_values.in('email')}
|
32
|
+
# it {should have_values_of(['M', 'F']).in('gender')}
|
33
|
+
# it {should have_values_matching(/\A[^@\s]+@[^a\s]+\z/).in('email')}
|
34
|
+
# it {should have_values_starting_with('http://').in('url')}
|
35
|
+
# it {should have_values_ending_with('Inc.').in('company_name')}
|
36
|
+
# it {should have_integer_values.in('year')}
|
37
|
+
# it {should set_any_of(['name', 'first_name', 'last_name'])}
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# RSpec matchers for ScraperWiki scrapers.
|
41
|
+
# @see http://rubydoc.info/gems/rspec-expectations/RSpec/Matchers
|
42
|
+
module Matchers
|
43
|
+
class CustomMatcher
|
44
|
+
def initialize(expected)
|
45
|
+
@expected = expected
|
46
|
+
end
|
47
|
+
|
48
|
+
def matches?(actual)
|
49
|
+
@actual = actual
|
50
|
+
end
|
51
|
+
|
52
|
+
def does_not_match?(actual)
|
53
|
+
@actual = actual
|
54
|
+
end
|
55
|
+
|
56
|
+
def failure_message
|
57
|
+
NotImplementerError
|
58
|
+
end
|
59
|
+
|
60
|
+
def negative_failure_message
|
61
|
+
failure_message
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Scraper matchers -------------------------------------------------------
|
66
|
+
|
67
|
+
class ScraperInfoMatcher < CustomMatcher
|
68
|
+
end
|
69
|
+
|
70
|
+
class PrivacyStatusMatcher < ScraperInfoMatcher
|
71
|
+
def matches?(actual)
|
72
|
+
super
|
73
|
+
actual['privacy_status'] == @expected
|
74
|
+
end
|
75
|
+
|
76
|
+
def failure_message
|
77
|
+
"expected #{@actual['short_name']} to be #{@expected}"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
# @example
|
81
|
+
# it {should be_public}
|
82
|
+
def be_public
|
83
|
+
PrivacyStatusMatcher.new 'public'
|
84
|
+
end
|
85
|
+
# @example
|
86
|
+
# it {should be_protected}
|
87
|
+
def be_protected
|
88
|
+
PrivacyStatusMatcher.new 'visible'
|
89
|
+
end
|
90
|
+
# @example
|
91
|
+
# it {should be_private}
|
92
|
+
def be_private
|
93
|
+
PrivacyStatusMatcher.new 'private'
|
94
|
+
end
|
95
|
+
|
96
|
+
class UserRolesMatcher < ScraperInfoMatcher
|
97
|
+
def matches?(actual)
|
98
|
+
super
|
99
|
+
%w(owner editor).any? do |userrole|
|
100
|
+
actual['userroles'][userrole].include? @expected
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def failure_message
|
105
|
+
"expected #{@actual['short_name']} to be editable by #{@expected}"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
# @example
|
109
|
+
# it {should be_editable_by 'frabcus'}
|
110
|
+
def be_editable_by(expected)
|
111
|
+
UserRolesMatcher.new expected
|
112
|
+
end
|
113
|
+
|
114
|
+
class RunIntervalMatcher < ScraperInfoMatcher
|
115
|
+
def matches?(actual)
|
116
|
+
super
|
117
|
+
actual['run_interval'] == ScraperWiki::API::RUN_INTERVALS[@expected]
|
118
|
+
end
|
119
|
+
|
120
|
+
def failure_message
|
121
|
+
if @expected == -1
|
122
|
+
"expected #{@actual['short_name']} to never run"
|
123
|
+
else
|
124
|
+
"expected #{@actual['short_name']} to run #{@expected}"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
# @example
|
129
|
+
# it {should run(:daily)}
|
130
|
+
def run(expected)
|
131
|
+
RunIntervalMatcher.new expected
|
132
|
+
end
|
133
|
+
# @example
|
134
|
+
# it {should never_run}
|
135
|
+
def never_run
|
136
|
+
RunIntervalMatcher.new :never
|
137
|
+
end
|
138
|
+
|
139
|
+
class TablesMatcher < ScraperInfoMatcher
|
140
|
+
def on(table)
|
141
|
+
@table = table
|
142
|
+
self
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class KeysMatcher < TablesMatcher
|
147
|
+
def matches?(actual)
|
148
|
+
super
|
149
|
+
difference.empty?
|
150
|
+
end
|
151
|
+
|
152
|
+
def failure_predicate
|
153
|
+
raise NotImplementerError
|
154
|
+
end
|
155
|
+
|
156
|
+
def failure_message
|
157
|
+
"#{@actual['short_name']} #{failure_predicate}: #{difference.join ', '}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class MissingKeysMatcher < KeysMatcher
|
162
|
+
def difference
|
163
|
+
@expected - @actual['datasummary']['tables'][@table]['keys']
|
164
|
+
end
|
165
|
+
|
166
|
+
def failure_predicate
|
167
|
+
'is missing keys'
|
168
|
+
end
|
169
|
+
end
|
170
|
+
# @example
|
171
|
+
# it {should have_at_least_the_keys(['fieldA', 'fieldB']).on('swdata')}
|
172
|
+
def have_at_least_the_keys(expected)
|
173
|
+
MissingKeysMatcher.new expected
|
174
|
+
end
|
175
|
+
|
176
|
+
class ExtraKeysMatcher < KeysMatcher
|
177
|
+
def difference
|
178
|
+
@actual['datasummary']['tables'][@table]['keys'] - @expected
|
179
|
+
end
|
180
|
+
|
181
|
+
def failure_predicate
|
182
|
+
'has extra keys'
|
183
|
+
end
|
184
|
+
end
|
185
|
+
# @example
|
186
|
+
# it {should have_at_most_the_keys(['fieldA', 'fieldB', 'fieldC', 'fieldD']).on('swdata')}
|
187
|
+
def have_at_most_the_keys(expected)
|
188
|
+
ExtraKeysMatcher.new expected
|
189
|
+
end
|
190
|
+
|
191
|
+
class CountMatcher < TablesMatcher
|
192
|
+
def matches?(actual)
|
193
|
+
super
|
194
|
+
actual['datasummary']['tables'][@table]['count'] == @expected
|
195
|
+
end
|
196
|
+
|
197
|
+
def failure_message
|
198
|
+
"expected #{@actual['short_name']} to have #{@expected} rows, not #{@actual['datasummary']['tables'][@table]['count']}"
|
199
|
+
end
|
200
|
+
end
|
201
|
+
# @example
|
202
|
+
# it {should have_a_row_count_of(42).on('swdata')}
|
203
|
+
def have_a_row_count_of(expected)
|
204
|
+
CountMatcher.new expected
|
205
|
+
end
|
206
|
+
|
207
|
+
class RunEventsMatcher < ScraperInfoMatcher
|
208
|
+
def last_run
|
209
|
+
@actual['runevents'][0]
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
class ExceptionMessageMatcher < RunEventsMatcher
|
214
|
+
def matches?(actual)
|
215
|
+
super
|
216
|
+
exception_message
|
217
|
+
end
|
218
|
+
|
219
|
+
def exception_message
|
220
|
+
last_run['exception_message']
|
221
|
+
end
|
222
|
+
|
223
|
+
def failure_message
|
224
|
+
"#{@actual['short_name']} is broken: #{exception_message}"
|
225
|
+
end
|
226
|
+
end
|
227
|
+
# @example
|
228
|
+
# it {should_not be_broken}
|
229
|
+
def be_broken
|
230
|
+
ExceptionMessageMatcher.new nil
|
231
|
+
end
|
232
|
+
|
233
|
+
# Datastore matchers -----------------------------------------------------
|
234
|
+
|
235
|
+
class DatastoreMatcher < CustomMatcher
|
236
|
+
def items
|
237
|
+
@items ||= if Array === @actual
|
238
|
+
@actual
|
239
|
+
elsif Hash === @actual
|
240
|
+
@actual['data'].map do |array|
|
241
|
+
hash = {}
|
242
|
+
@actual['keys'].each_with_index do |key,index|
|
243
|
+
hash[key] = array[index]
|
244
|
+
end
|
245
|
+
hash
|
246
|
+
end
|
247
|
+
else
|
248
|
+
raise NotImplementerError
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def matches?(actual)
|
253
|
+
super
|
254
|
+
@mismatches = mismatches
|
255
|
+
@mismatches.empty?
|
256
|
+
end
|
257
|
+
|
258
|
+
def does_not_match?(actual)
|
259
|
+
super
|
260
|
+
@matches = matches
|
261
|
+
@matches.empty?
|
262
|
+
end
|
263
|
+
|
264
|
+
def matches
|
265
|
+
raise NotImplementerError
|
266
|
+
end
|
267
|
+
|
268
|
+
def mismatches
|
269
|
+
raise NotImplementerError
|
270
|
+
end
|
271
|
+
|
272
|
+
def failures
|
273
|
+
if @mismatches
|
274
|
+
@mismatches
|
275
|
+
else
|
276
|
+
@matches
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
def failure_size
|
281
|
+
if @mismatches
|
282
|
+
@mismatches.size
|
283
|
+
else
|
284
|
+
@matches.size
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def failure_description
|
289
|
+
raise NotImplementerError
|
290
|
+
end
|
291
|
+
|
292
|
+
def failure_message
|
293
|
+
"#{failure_size} of #{items.size} #{failure_description}\n#{failures.map(&:inspect).join "\n"}"
|
294
|
+
end
|
295
|
+
|
296
|
+
def negative_failure_message
|
297
|
+
failure_message
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
class SetAnyOf < DatastoreMatcher
|
302
|
+
def mismatches
|
303
|
+
items.select do |item|
|
304
|
+
@expected.all? do |field|
|
305
|
+
item[field].respond_to?(:empty?) ? item[field].empty? : !item[field]
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
def failure_description
|
311
|
+
"records didn't set any of #{@expected.join ','}"
|
312
|
+
end
|
313
|
+
end
|
314
|
+
# @example
|
315
|
+
# it {should set_any_of(['name', 'first_name', 'last_name'])}
|
316
|
+
def set_any_of(expected)
|
317
|
+
SetAnyOf.new expected
|
318
|
+
end
|
319
|
+
|
320
|
+
class FieldMatcher < DatastoreMatcher
|
321
|
+
def in(field)
|
322
|
+
@field = field
|
323
|
+
self
|
324
|
+
end
|
325
|
+
|
326
|
+
def matches
|
327
|
+
items.select do |item|
|
328
|
+
match? item[@field]
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
def mismatches
|
333
|
+
items.reject do |item|
|
334
|
+
match? item[@field]
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
338
|
+
def blank?(v)
|
339
|
+
v.respond_to?(:empty?) ? v.empty? : !v
|
340
|
+
end
|
341
|
+
|
342
|
+
def failure_description
|
343
|
+
"'#{@field}' values #{failure_predicate}"
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
class HaveBlankValues < FieldMatcher
|
348
|
+
def match?(v)
|
349
|
+
blank? v
|
350
|
+
end
|
351
|
+
|
352
|
+
def failure_predicate
|
353
|
+
'are blank'
|
354
|
+
end
|
355
|
+
end
|
356
|
+
# @example
|
357
|
+
# it {should_not have_blank_values.in('name')}
|
358
|
+
def have_blank_values
|
359
|
+
HaveBlankValues.new nil
|
360
|
+
end
|
361
|
+
|
362
|
+
class HaveValuesOf < FieldMatcher
|
363
|
+
def match?(v)
|
364
|
+
blank?(v) || @expected.include?(v)
|
365
|
+
end
|
366
|
+
|
367
|
+
def failure_predicate
|
368
|
+
"aren't one of #{@expected.join ', '}"
|
369
|
+
end
|
370
|
+
end
|
371
|
+
# @example
|
372
|
+
# it {should have_values_of(['M', 'F']).in('gender')}
|
373
|
+
def have_values_of(expected)
|
374
|
+
HaveValuesOf.new expected
|
375
|
+
end
|
376
|
+
|
377
|
+
class HaveValuesMatching < FieldMatcher
|
378
|
+
def match?(v)
|
379
|
+
blank?(v) || v[@expected]
|
380
|
+
end
|
381
|
+
|
382
|
+
def failure_predicate
|
383
|
+
"don't match #{@expected.inspect}"
|
384
|
+
end
|
385
|
+
end
|
386
|
+
# @example
|
387
|
+
# it {should have_values_matching(/\A[^@\s]+@[^a\s]+\z/).in('email')}
|
388
|
+
def have_values_matching(expected)
|
389
|
+
HaveValuesMatching.new expected
|
390
|
+
end
|
391
|
+
|
392
|
+
class HaveUniqueValues < FieldMatcher
|
393
|
+
def mismatches
|
394
|
+
counts = Hash.new 0
|
395
|
+
items.each_with_index do |item,index|
|
396
|
+
unless blank? item[@field]
|
397
|
+
counts[item[@field]] += 1
|
398
|
+
end
|
399
|
+
end
|
400
|
+
counts.select{|_,count| count > 1}.keys
|
401
|
+
end
|
402
|
+
|
403
|
+
def failure_predicate
|
404
|
+
'are not unique'
|
405
|
+
end
|
406
|
+
end
|
407
|
+
# @example
|
408
|
+
# it {should have_unique_values.in('email')}
|
409
|
+
def have_unique_values
|
410
|
+
HaveUniqueValues.new nil
|
411
|
+
end
|
412
|
+
|
413
|
+
class HaveValuesStartingWith < FieldMatcher
|
414
|
+
def match?(v)
|
415
|
+
blank?(v) || v.start_with?(@expected)
|
416
|
+
end
|
417
|
+
|
418
|
+
def failure_predicate
|
419
|
+
"don't start with #{@expected}"
|
420
|
+
end
|
421
|
+
end
|
422
|
+
# @example
|
423
|
+
# it {should have_values_starting_with('http://').in('url')}
|
424
|
+
def have_values_starting_with(expected)
|
425
|
+
HaveValuesStartingWith.new expected
|
426
|
+
end
|
427
|
+
|
428
|
+
class HaveValuesEndingWith < FieldMatcher
|
429
|
+
def match?(v)
|
430
|
+
blank?(v) || v.end_with?(@expected)
|
431
|
+
end
|
432
|
+
|
433
|
+
def failure_predicate
|
434
|
+
"don't end with #{@expected}"
|
435
|
+
end
|
436
|
+
end
|
437
|
+
# @example
|
438
|
+
# it {should have_values_ending_with('Inc.').in('company_name')}
|
439
|
+
def have_values_ending_with(expected)
|
440
|
+
HaveValuesEndingWith.new expected
|
441
|
+
end
|
442
|
+
|
443
|
+
class HaveIntegerValues < FieldMatcher
|
444
|
+
def match?(v)
|
445
|
+
blank?(v) || (Integer(v) rescue false)
|
446
|
+
end
|
447
|
+
|
448
|
+
def failure_predicate
|
449
|
+
"aren't integers"
|
450
|
+
end
|
451
|
+
end
|
452
|
+
# @example
|
453
|
+
# it {should have_integer_values.in('year')}
|
454
|
+
def have_integer_values
|
455
|
+
HaveIntegerValues.new nil
|
456
|
+
end
|
457
|
+
end
|
458
|
+
end
|
459
|
+
end
|
data/lib/scraperwiki-api.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'httparty'
|
2
|
-
require 'scraperwiki-api/version'
|
3
2
|
|
4
3
|
module ScraperWiki
|
5
4
|
# A Ruby wrapper for the ScraperWiki API.
|
@@ -11,6 +10,32 @@ module ScraperWiki
|
|
11
10
|
class Error < StandardError; end
|
12
11
|
class ScraperNotFound < Error; end
|
13
12
|
|
13
|
+
RUN_INTERVALS = {
|
14
|
+
never: -1,
|
15
|
+
monthly: 2678400,
|
16
|
+
weekly: 604800,
|
17
|
+
daily: 86400,
|
18
|
+
hourly: 3600,
|
19
|
+
}
|
20
|
+
|
21
|
+
class << self
|
22
|
+
# Returns the URL to the scraper's overview.
|
23
|
+
#
|
24
|
+
# @param [String] shortname the scraper's shortname
|
25
|
+
# @return [String] the URL to the scraper's overview
|
26
|
+
def scraper_url(shortname)
|
27
|
+
"https://scraperwiki.com/scrapers/#{shortname}/"
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns the URL to edit the scraper.
|
31
|
+
#
|
32
|
+
# @param [String] shortname the scraper's shortname
|
33
|
+
# @return [String] the URL to edit the scraper
|
34
|
+
def edit_scraper_url(shortname)
|
35
|
+
"https://scraperwiki.com/scrapers/#{shortname}/edit/"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
14
39
|
# Initializes a ScraperWiki API object.
|
15
40
|
def initialize(apikey = nil)
|
16
41
|
@apikey = apikey
|
@@ -56,10 +81,11 @@ module ScraperWiki
|
|
56
81
|
# "htmltable" or "rss2"
|
57
82
|
# @option opts [String] :attach ";"-delimited list of shortnames of other
|
58
83
|
# scrapers whose data you need to access
|
84
|
+
# @return [Array,Hash,String]
|
59
85
|
# @see https://scraperwiki.com/docs/ruby/ruby_help_documentation/
|
60
86
|
#
|
61
87
|
# @note The query string parameter is +name+, not +shortname+
|
62
|
-
# {https://scraperwiki.com/docs/api#sqlite as
|
88
|
+
# {https://scraperwiki.com/docs/api#sqlite as in the ScraperWiki docs}
|
63
89
|
def datastore_sqlite(shortname, query, opts = {})
|
64
90
|
if Array === opts[:attach]
|
65
91
|
opts[:attach] = opts[:attach].join ';'
|
@@ -69,10 +95,22 @@ module ScraperWiki
|
|
69
95
|
|
70
96
|
# Extracts data about a scraper's code, owner, history, etc.
|
71
97
|
#
|
72
|
-
#
|
73
|
-
# * The +runid+ is a Unix timestamp with microseconds and a UUID.
|
98
|
+
# * +runid+ is a Unix timestamp with microseconds and a UUID.
|
74
99
|
# * The value of +records+ is the same as that of +total_rows+ under +datasummary+.
|
75
|
-
# * +run_interval+ is the number of seconds between runs.
|
100
|
+
# * +run_interval+ is the number of seconds between runs. It is one of:
|
101
|
+
# * -1 (never)
|
102
|
+
# * 2678400 (monthly)
|
103
|
+
# * 604800 (weekly)
|
104
|
+
# * 86400 (daily)
|
105
|
+
# * 3600 (hourly)
|
106
|
+
# * +privacy_status+ is one of:
|
107
|
+
# * "public" (everyone can see and edit the scraper and its data)
|
108
|
+
# * "visible" (everyone can see the scraper, but only contributors can edit it)
|
109
|
+
# * "private" (only contributors can see and edit the scraper and its data)
|
110
|
+
# * An individual +runevents+ hash will have an +exception_message+ key if
|
111
|
+
# there was an error during that run.
|
112
|
+
#
|
113
|
+
# Example output:
|
76
114
|
#
|
77
115
|
# [
|
78
116
|
# {
|
@@ -153,11 +191,12 @@ module ScraperWiki
|
|
153
191
|
# restricted to this date or after, enter as YYYY-MM-DD
|
154
192
|
# @option opts [String] :quietfields "|"-delimited list of fields to exclude
|
155
193
|
# from the output. Must be a subset of 'code|runevents|datasummary|userroles|history'
|
194
|
+
# @return [Array]
|
156
195
|
#
|
157
196
|
# @note Returns an array although the array seems to always have only one item
|
158
197
|
# @note The +tags+ field seems to always be an empty array
|
159
198
|
# @note The query string parameter is +name+, not +shortname+
|
160
|
-
# {https://scraperwiki.com/docs/api#getinfo as
|
199
|
+
# {https://scraperwiki.com/docs/api#getinfo as in the ScraperWiki docs}
|
161
200
|
def scraper_getinfo(shortname, opts = {})
|
162
201
|
if Array === opts[:quietfields]
|
163
202
|
opts[:quietfields] = opts[:quietfields].join '|'
|
@@ -192,10 +231,11 @@ module ScraperWiki
|
|
192
231
|
# @param [String] shortname the scraper's shortname (as it appears in the URL)
|
193
232
|
# @param [Hash] opts optional arguments
|
194
233
|
# @option opts [String] runid a run ID
|
234
|
+
# @return [Array]
|
195
235
|
#
|
196
236
|
# @note Returns an array although the array seems to always have only one item
|
197
237
|
# @note The query string parameter is +name+, not +shortname+
|
198
|
-
# {https://scraperwiki.com/docs/api#getinfo as
|
238
|
+
# {https://scraperwiki.com/docs/api#getinfo as in the ScraperWiki docs}
|
199
239
|
def scraper_getruninfo(shortname, opts = {})
|
200
240
|
request_with_apikey '/scraper/getruninfo', {name: shortname}.merge(opts)
|
201
241
|
end
|
@@ -227,6 +267,7 @@ module ScraperWiki
|
|
227
267
|
# ]
|
228
268
|
#
|
229
269
|
# @param [String] username a username
|
270
|
+
# @return [Array]
|
230
271
|
#
|
231
272
|
# @note Returns an array although the array seems to always have only one item
|
232
273
|
# @note The date joined field is +date_joined+ (with underscore) on
|
@@ -256,6 +297,7 @@ module ScraperWiki
|
|
256
297
|
# @option opts [Integer] :maxrows number of results to return [default 5]
|
257
298
|
# @option opts [String] :requestinguser the name of the user making the
|
258
299
|
# search, which changes the order of the matches
|
300
|
+
# @return [Array]
|
259
301
|
def scraper_search(opts = {})
|
260
302
|
request_with_apikey '/scraper/search', opts
|
261
303
|
end
|
@@ -280,6 +322,7 @@ module ScraperWiki
|
|
280
322
|
# from the output
|
281
323
|
# @option opts [String] :requestinguser the name of the user making the
|
282
324
|
# search, which changes the order of the matches
|
325
|
+
# @return [Array]
|
283
326
|
#
|
284
327
|
# @note The date joined field is +datejoined+ (without underscore) on
|
285
328
|
# {#scraper_getuserinfo}
|
data/scraperwiki-api.gemspec
CHANGED
@@ -22,7 +22,7 @@ class ScraperWiki::API
|
|
22
22
|
it 'should return a non-empty array containing a single hash' do
|
23
23
|
response = @api.scraper_getinfo EXAMPLE_SHORTNAME
|
24
24
|
response.should be_an(Array)
|
25
|
-
response.
|
25
|
+
response.should have(1).item
|
26
26
|
response.first.should be_a(Hash)
|
27
27
|
end
|
28
28
|
|
@@ -36,10 +36,10 @@ class ScraperWiki::API
|
|
36
36
|
|
37
37
|
it 'should respect the :history_start_date argument' do
|
38
38
|
bare = @api.scraper_getinfo(EXAMPLE_SHORTNAME).first
|
39
|
-
bare['history'].
|
39
|
+
bare['history'].should have_at_least(2).items
|
40
40
|
history_start_date = bare['history'][0]['date'][0..9]
|
41
41
|
result = @api.scraper_getinfo(EXAMPLE_SHORTNAME, history_start_date: history_start_date).first
|
42
|
-
result['history'].
|
42
|
+
result['history'].should have(1).item
|
43
43
|
end
|
44
44
|
|
45
45
|
it 'should respect the :quietfields argument (as an array)' do
|
@@ -61,7 +61,7 @@ class ScraperWiki::API
|
|
61
61
|
it 'should return a non-empty array containing a single hash' do
|
62
62
|
response = @api.scraper_getruninfo EXAMPLE_SHORTNAME
|
63
63
|
response.should be_an(Array)
|
64
|
-
response.
|
64
|
+
response.should have(1).item
|
65
65
|
response.first.should be_a(Hash)
|
66
66
|
end
|
67
67
|
|
@@ -78,7 +78,7 @@ class ScraperWiki::API
|
|
78
78
|
it 'should return a non-empty array containing a single hash' do
|
79
79
|
response = @api.scraper_getuserinfo EXAMPLE_USERNAME
|
80
80
|
response.should be_an(Array)
|
81
|
-
response.
|
81
|
+
response.should have(1).item
|
82
82
|
response.first.should be_a(Hash)
|
83
83
|
end
|
84
84
|
end
|
@@ -87,7 +87,7 @@ class ScraperWiki::API
|
|
87
87
|
it 'should return a non-empty array of hashes' do
|
88
88
|
response = @api.scraper_search
|
89
89
|
response.should be_an(Array)
|
90
|
-
response.
|
90
|
+
response.should have_at_least(1).item
|
91
91
|
response.first.should be_a(Hash)
|
92
92
|
end
|
93
93
|
|
@@ -98,7 +98,7 @@ class ScraperWiki::API
|
|
98
98
|
end
|
99
99
|
|
100
100
|
it 'should respect the :maxrows argument' do
|
101
|
-
@api.scraper_search(maxrows: 1).
|
101
|
+
@api.scraper_search(maxrows: 1).should have(1).item
|
102
102
|
end
|
103
103
|
end
|
104
104
|
|
@@ -106,7 +106,7 @@ class ScraperWiki::API
|
|
106
106
|
it 'should return a non-empty array of hashes' do
|
107
107
|
response = @api.scraper_usersearch
|
108
108
|
response.should be_an(Array)
|
109
|
-
response.
|
109
|
+
response.should have_at_least(1).item
|
110
110
|
response.first.should be_a(Hash)
|
111
111
|
end
|
112
112
|
|
@@ -117,7 +117,7 @@ class ScraperWiki::API
|
|
117
117
|
end
|
118
118
|
|
119
119
|
it 'should respect the :maxrows argument' do
|
120
|
-
@api.scraper_usersearch(maxrows: 1).
|
120
|
+
@api.scraper_usersearch(maxrows: 1).should have(1).item
|
121
121
|
end
|
122
122
|
|
123
123
|
it 'should respect the :nolist argument (as an array)' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scraperwiki-api
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05-
|
12
|
+
date: 2012-05-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: httparty
|
16
|
-
requirement: &
|
16
|
+
requirement: &70314147045360 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,18 +21,18 @@ dependencies:
|
|
21
21
|
version: 0.7.8
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70314147045360
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &70314147044520 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 2.
|
32
|
+
version: 2.10.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70314147044520
|
36
36
|
description: A Ruby wrapper for the ScraperWiki API
|
37
37
|
email:
|
38
38
|
- info@opennorth.ca
|
@@ -47,10 +47,10 @@ files:
|
|
47
47
|
- Rakefile
|
48
48
|
- USAGE
|
49
49
|
- lib/scraperwiki-api.rb
|
50
|
+
- lib/scraperwiki-api/matchers.rb
|
50
51
|
- lib/scraperwiki-api/version.rb
|
51
52
|
- scraperwiki-api.gemspec
|
52
53
|
- spec/scraperwiki-api_spec.rb
|
53
|
-
- spec/spec.opts
|
54
54
|
- spec/spec_helper.rb
|
55
55
|
homepage: http://github.com/opennorth/scraperwiki-api-ruby
|
56
56
|
licenses: []
|
@@ -78,5 +78,4 @@ specification_version: 3
|
|
78
78
|
summary: The ScraperWiki API Ruby Gem
|
79
79
|
test_files:
|
80
80
|
- spec/scraperwiki-api_spec.rb
|
81
|
-
- spec/spec.opts
|
82
81
|
- spec/spec_helper.rb
|