scraperwiki-api 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +42 -2
- data/lib/scraperwiki-api/matchers.rb +459 -0
- data/lib/scraperwiki-api/version.rb +1 -1
- data/lib/scraperwiki-api.rb +50 -7
- data/scraperwiki-api.gemspec +1 -1
- data/spec/scraperwiki-api_spec.rb +9 -9
- metadata +8 -9
- data/spec/spec.opts +0 -5
data/README.md
CHANGED
@@ -6,7 +6,7 @@ A Ruby wrapper for the ScraperWiki API.
|
|
6
6
|
|
7
7
|
gem install scraperwiki-api
|
8
8
|
|
9
|
-
## Examples
|
9
|
+
## API Examples
|
10
10
|
|
11
11
|
>> require 'scraperwiki-api'
|
12
12
|
|
@@ -30,7 +30,47 @@ A Ruby wrapper for the ScraperWiki API.
|
|
30
30
|
>> api.scraper_usersearch searchquery: 'search terms'
|
31
31
|
=> [{"username"=>"johndoe", "profilename"=>"John Doe", "date_joined"=>...}]
|
32
32
|
|
33
|
-
|
33
|
+
More documentation at [RubyDoc.info](http://rdoc.info/gems/scraperwiki-api/ScraperWiki/API).
|
34
|
+
|
35
|
+
## Scraper validations
|
36
|
+
|
37
|
+
If your project uses a lot of scrapers – for example, [OpenCorporates](http://opencorporates.com/), which [scrapes company registries around the world](http://blog.opencorporates.com/2011/03/25/building-a-global-database-the-open-distributed-way/), or [Represent](http://represent.opennorth.ca/), which scrapes information on elected officials from government websites in Canada – you'll want to check that your scrapers behave the way you expect them to. This gem defines [RSpec](https://www.relishapp.com/rspec) matchers to do just that. For example:
|
38
|
+
|
39
|
+
require 'scraperwiki-api'
|
40
|
+
api = ScraperWiki::API.new
|
41
|
+
|
42
|
+
info = api.scraper_getinfo('example-scraper').first
|
43
|
+
|
44
|
+
describe 'example-scraper' do
|
45
|
+
include ScraperWiki::API::Matchers
|
46
|
+
subject {info}
|
47
|
+
|
48
|
+
it {should be_protected}
|
49
|
+
it {should be_editable_by('frabcus')}
|
50
|
+
it {should run(:daily)}
|
51
|
+
it {should_not be_broken}
|
52
|
+
it {should have_at_least_the_keys(['name', 'email']).on('swdata')}
|
53
|
+
it {should have_at_most_the_keys(['name', 'email', 'tel', 'fax']).on('swdata')}
|
54
|
+
it {should have_a_row_count_of(42).on('swdata')}
|
55
|
+
end
|
56
|
+
|
57
|
+
data = api.datastore_sqlite('example-scraper', 'SELECT * from `swdata`')
|
58
|
+
|
59
|
+
describe 'example-scraper' do
|
60
|
+
include ScraperWiki::API::Matchers
|
61
|
+
subject {data}
|
62
|
+
|
63
|
+
it {should_not have_blank_values.in('name')}
|
64
|
+
it {should have_unique_values.in('email')}
|
65
|
+
it {should have_values_of(['M', 'F']).in('gender')}
|
66
|
+
it {should have_values_matching(/\A[^@\s]+@[^a\s]+\z/).in('email')}
|
67
|
+
it {should have_values_starting_with('http://').in('url')}
|
68
|
+
it {should have_values_ending_with('Inc.').in('company_name')}
|
69
|
+
it {should have_integer_values.in('year')}
|
70
|
+
it {should set_any_of(['name', 'first_name', 'last_name'])}
|
71
|
+
end
|
72
|
+
|
73
|
+
More documentation at [RubyDoc.info](http://rdoc.info/gems/scraperwiki-api/ScraperWiki/API/Matchers).
|
34
74
|
|
35
75
|
## Bugs? Questions?
|
36
76
|
|
@@ -0,0 +1,459 @@
|
|
1
|
+
require 'rspec'
|
2
|
+
|
3
|
+
module ScraperWiki
|
4
|
+
class API
|
5
|
+
# @example
|
6
|
+
# require 'scraperwiki-api'
|
7
|
+
# api = ScraperWiki::API.new
|
8
|
+
#
|
9
|
+
# info = api.scraper_getinfo('example-scraper').first
|
10
|
+
#
|
11
|
+
# describe 'example-scraper' do
|
12
|
+
# include ScraperWiki::API::Matchers
|
13
|
+
# subject {info}
|
14
|
+
#
|
15
|
+
# it {should be_protected}
|
16
|
+
# it {should be_editable_by('frabcus')}
|
17
|
+
# it {should run(:daily)}
|
18
|
+
# it {should_not be_broken}
|
19
|
+
# it {should have_at_least_the_keys(['name', 'email']).on('swdata')}
|
20
|
+
# it {should have_at_most_the_keys(['name', 'email', 'tel', 'fax']).on('swdata')}
|
21
|
+
# it {should have_a_row_count_of(42).on('swdata')}
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# data = api.datastore_sqlite('example-scraper', 'SELECT * from `swdata`')
|
25
|
+
#
|
26
|
+
# describe 'example-scraper' do
|
27
|
+
# include ScraperWiki::API::Matchers
|
28
|
+
# subject {data}
|
29
|
+
#
|
30
|
+
# it {should_not have_blank_values.in('name')}
|
31
|
+
# it {should have_unique_values.in('email')}
|
32
|
+
# it {should have_values_of(['M', 'F']).in('gender')}
|
33
|
+
# it {should have_values_matching(/\A[^@\s]+@[^a\s]+\z/).in('email')}
|
34
|
+
# it {should have_values_starting_with('http://').in('url')}
|
35
|
+
# it {should have_values_ending_with('Inc.').in('company_name')}
|
36
|
+
# it {should have_integer_values.in('year')}
|
37
|
+
# it {should set_any_of(['name', 'first_name', 'last_name'])}
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# RSpec matchers for ScraperWiki scrapers.
|
41
|
+
# @see http://rubydoc.info/gems/rspec-expectations/RSpec/Matchers
|
42
|
+
module Matchers
|
43
|
+
class CustomMatcher
|
44
|
+
def initialize(expected)
|
45
|
+
@expected = expected
|
46
|
+
end
|
47
|
+
|
48
|
+
def matches?(actual)
|
49
|
+
@actual = actual
|
50
|
+
end
|
51
|
+
|
52
|
+
def does_not_match?(actual)
|
53
|
+
@actual = actual
|
54
|
+
end
|
55
|
+
|
56
|
+
def failure_message
|
57
|
+
NotImplementerError
|
58
|
+
end
|
59
|
+
|
60
|
+
def negative_failure_message
|
61
|
+
failure_message
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Scraper matchers -------------------------------------------------------
|
66
|
+
|
67
|
+
class ScraperInfoMatcher < CustomMatcher
|
68
|
+
end
|
69
|
+
|
70
|
+
class PrivacyStatusMatcher < ScraperInfoMatcher
|
71
|
+
def matches?(actual)
|
72
|
+
super
|
73
|
+
actual['privacy_status'] == @expected
|
74
|
+
end
|
75
|
+
|
76
|
+
def failure_message
|
77
|
+
"expected #{@actual['short_name']} to be #{@expected}"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
# @example
|
81
|
+
# it {should be_public}
|
82
|
+
def be_public
|
83
|
+
PrivacyStatusMatcher.new 'public'
|
84
|
+
end
|
85
|
+
# @example
|
86
|
+
# it {should be_protected}
|
87
|
+
def be_protected
|
88
|
+
PrivacyStatusMatcher.new 'visible'
|
89
|
+
end
|
90
|
+
# @example
|
91
|
+
# it {should be_private}
|
92
|
+
def be_private
|
93
|
+
PrivacyStatusMatcher.new 'private'
|
94
|
+
end
|
95
|
+
|
96
|
+
class UserRolesMatcher < ScraperInfoMatcher
|
97
|
+
def matches?(actual)
|
98
|
+
super
|
99
|
+
%w(owner editor).any? do |userrole|
|
100
|
+
actual['userroles'][userrole].include? @expected
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def failure_message
|
105
|
+
"expected #{@actual['short_name']} to be editable by #{@expected}"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
# @example
|
109
|
+
# it {should be_editable_by 'frabcus'}
|
110
|
+
def be_editable_by(expected)
|
111
|
+
UserRolesMatcher.new expected
|
112
|
+
end
|
113
|
+
|
114
|
+
class RunIntervalMatcher < ScraperInfoMatcher
|
115
|
+
def matches?(actual)
|
116
|
+
super
|
117
|
+
actual['run_interval'] == ScraperWiki::API::RUN_INTERVALS[@expected]
|
118
|
+
end
|
119
|
+
|
120
|
+
def failure_message
|
121
|
+
if @expected == -1
|
122
|
+
"expected #{@actual['short_name']} to never run"
|
123
|
+
else
|
124
|
+
"expected #{@actual['short_name']} to run #{@expected}"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
# @example
|
129
|
+
# it {should run(:daily)}
|
130
|
+
def run(expected)
|
131
|
+
RunIntervalMatcher.new expected
|
132
|
+
end
|
133
|
+
# @example
|
134
|
+
# it {should never_run}
|
135
|
+
def never_run
|
136
|
+
RunIntervalMatcher.new :never
|
137
|
+
end
|
138
|
+
|
139
|
+
class TablesMatcher < ScraperInfoMatcher
|
140
|
+
def on(table)
|
141
|
+
@table = table
|
142
|
+
self
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class KeysMatcher < TablesMatcher
|
147
|
+
def matches?(actual)
|
148
|
+
super
|
149
|
+
difference.empty?
|
150
|
+
end
|
151
|
+
|
152
|
+
def failure_predicate
|
153
|
+
raise NotImplementerError
|
154
|
+
end
|
155
|
+
|
156
|
+
def failure_message
|
157
|
+
"#{@actual['short_name']} #{failure_predicate}: #{difference.join ', '}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class MissingKeysMatcher < KeysMatcher
|
162
|
+
def difference
|
163
|
+
@expected - @actual['datasummary']['tables'][@table]['keys']
|
164
|
+
end
|
165
|
+
|
166
|
+
def failure_predicate
|
167
|
+
'is missing keys'
|
168
|
+
end
|
169
|
+
end
|
170
|
+
# @example
|
171
|
+
# it {should have_at_least_the_keys(['fieldA', 'fieldB']).on('swdata')}
|
172
|
+
def have_at_least_the_keys(expected)
|
173
|
+
MissingKeysMatcher.new expected
|
174
|
+
end
|
175
|
+
|
176
|
+
class ExtraKeysMatcher < KeysMatcher
|
177
|
+
def difference
|
178
|
+
@actual['datasummary']['tables'][@table]['keys'] - @expected
|
179
|
+
end
|
180
|
+
|
181
|
+
def failure_predicate
|
182
|
+
'has extra keys'
|
183
|
+
end
|
184
|
+
end
|
185
|
+
# @example
|
186
|
+
# it {should have_at_most_the_keys(['fieldA', 'fieldB', 'fieldC', 'fieldD']).on('swdata')}
|
187
|
+
def have_at_most_the_keys(expected)
|
188
|
+
ExtraKeysMatcher.new expected
|
189
|
+
end
|
190
|
+
|
191
|
+
class CountMatcher < TablesMatcher
|
192
|
+
def matches?(actual)
|
193
|
+
super
|
194
|
+
actual['datasummary']['tables'][@table]['count'] == @expected
|
195
|
+
end
|
196
|
+
|
197
|
+
def failure_message
|
198
|
+
"expected #{@actual['short_name']} to have #{@expected} rows, not #{@actual['datasummary']['tables'][@table]['count']}"
|
199
|
+
end
|
200
|
+
end
|
201
|
+
# @example
|
202
|
+
# it {should have_a_row_count_of(42).on('swdata')}
|
203
|
+
def have_a_row_count_of(expected)
|
204
|
+
CountMatcher.new expected
|
205
|
+
end
|
206
|
+
|
207
|
+
class RunEventsMatcher < ScraperInfoMatcher
|
208
|
+
def last_run
|
209
|
+
@actual['runevents'][0]
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
class ExceptionMessageMatcher < RunEventsMatcher
|
214
|
+
def matches?(actual)
|
215
|
+
super
|
216
|
+
exception_message
|
217
|
+
end
|
218
|
+
|
219
|
+
def exception_message
|
220
|
+
last_run['exception_message']
|
221
|
+
end
|
222
|
+
|
223
|
+
def failure_message
|
224
|
+
"#{@actual['short_name']} is broken: #{exception_message}"
|
225
|
+
end
|
226
|
+
end
|
227
|
+
# @example
|
228
|
+
# it {should_not be_broken}
|
229
|
+
def be_broken
|
230
|
+
ExceptionMessageMatcher.new nil
|
231
|
+
end
|
232
|
+
|
233
|
+
# Datastore matchers -----------------------------------------------------
|
234
|
+
|
235
|
+
class DatastoreMatcher < CustomMatcher
|
236
|
+
def items
|
237
|
+
@items ||= if Array === @actual
|
238
|
+
@actual
|
239
|
+
elsif Hash === @actual
|
240
|
+
@actual['data'].map do |array|
|
241
|
+
hash = {}
|
242
|
+
@actual['keys'].each_with_index do |key,index|
|
243
|
+
hash[key] = array[index]
|
244
|
+
end
|
245
|
+
hash
|
246
|
+
end
|
247
|
+
else
|
248
|
+
raise NotImplementerError
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def matches?(actual)
|
253
|
+
super
|
254
|
+
@mismatches = mismatches
|
255
|
+
@mismatches.empty?
|
256
|
+
end
|
257
|
+
|
258
|
+
def does_not_match?(actual)
|
259
|
+
super
|
260
|
+
@matches = matches
|
261
|
+
@matches.empty?
|
262
|
+
end
|
263
|
+
|
264
|
+
def matches
|
265
|
+
raise NotImplementerError
|
266
|
+
end
|
267
|
+
|
268
|
+
def mismatches
|
269
|
+
raise NotImplementerError
|
270
|
+
end
|
271
|
+
|
272
|
+
def failures
|
273
|
+
if @mismatches
|
274
|
+
@mismatches
|
275
|
+
else
|
276
|
+
@matches
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
def failure_size
|
281
|
+
if @mismatches
|
282
|
+
@mismatches.size
|
283
|
+
else
|
284
|
+
@matches.size
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def failure_description
|
289
|
+
raise NotImplementerError
|
290
|
+
end
|
291
|
+
|
292
|
+
def failure_message
|
293
|
+
"#{failure_size} of #{items.size} #{failure_description}\n#{failures.map(&:inspect).join "\n"}"
|
294
|
+
end
|
295
|
+
|
296
|
+
def negative_failure_message
|
297
|
+
failure_message
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
class SetAnyOf < DatastoreMatcher
|
302
|
+
def mismatches
|
303
|
+
items.select do |item|
|
304
|
+
@expected.all? do |field|
|
305
|
+
item[field].respond_to?(:empty?) ? item[field].empty? : !item[field]
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
def failure_description
|
311
|
+
"records didn't set any of #{@expected.join ','}"
|
312
|
+
end
|
313
|
+
end
|
314
|
+
# @example
|
315
|
+
# it {should set_any_of(['name', 'first_name', 'last_name'])}
|
316
|
+
def set_any_of(expected)
|
317
|
+
SetAnyOf.new expected
|
318
|
+
end
|
319
|
+
|
320
|
+
class FieldMatcher < DatastoreMatcher
|
321
|
+
def in(field)
|
322
|
+
@field = field
|
323
|
+
self
|
324
|
+
end
|
325
|
+
|
326
|
+
def matches
|
327
|
+
items.select do |item|
|
328
|
+
match? item[@field]
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
def mismatches
|
333
|
+
items.reject do |item|
|
334
|
+
match? item[@field]
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
338
|
+
def blank?(v)
|
339
|
+
v.respond_to?(:empty?) ? v.empty? : !v
|
340
|
+
end
|
341
|
+
|
342
|
+
def failure_description
|
343
|
+
"'#{@field}' values #{failure_predicate}"
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
class HaveBlankValues < FieldMatcher
|
348
|
+
def match?(v)
|
349
|
+
blank? v
|
350
|
+
end
|
351
|
+
|
352
|
+
def failure_predicate
|
353
|
+
'are blank'
|
354
|
+
end
|
355
|
+
end
|
356
|
+
# @example
|
357
|
+
# it {should_not have_blank_values.in('name')}
|
358
|
+
def have_blank_values
|
359
|
+
HaveBlankValues.new nil
|
360
|
+
end
|
361
|
+
|
362
|
+
class HaveValuesOf < FieldMatcher
|
363
|
+
def match?(v)
|
364
|
+
blank?(v) || @expected.include?(v)
|
365
|
+
end
|
366
|
+
|
367
|
+
def failure_predicate
|
368
|
+
"aren't one of #{@expected.join ', '}"
|
369
|
+
end
|
370
|
+
end
|
371
|
+
# @example
|
372
|
+
# it {should have_values_of(['M', 'F']).in('gender')}
|
373
|
+
def have_values_of(expected)
|
374
|
+
HaveValuesOf.new expected
|
375
|
+
end
|
376
|
+
|
377
|
+
class HaveValuesMatching < FieldMatcher
|
378
|
+
def match?(v)
|
379
|
+
blank?(v) || v[@expected]
|
380
|
+
end
|
381
|
+
|
382
|
+
def failure_predicate
|
383
|
+
"don't match #{@expected.inspect}"
|
384
|
+
end
|
385
|
+
end
|
386
|
+
# @example
|
387
|
+
# it {should have_values_matching(/\A[^@\s]+@[^a\s]+\z/).in('email')}
|
388
|
+
def have_values_matching(expected)
|
389
|
+
HaveValuesMatching.new expected
|
390
|
+
end
|
391
|
+
|
392
|
+
class HaveUniqueValues < FieldMatcher
|
393
|
+
def mismatches
|
394
|
+
counts = Hash.new 0
|
395
|
+
items.each_with_index do |item,index|
|
396
|
+
unless blank? item[@field]
|
397
|
+
counts[item[@field]] += 1
|
398
|
+
end
|
399
|
+
end
|
400
|
+
counts.select{|_,count| count > 1}.keys
|
401
|
+
end
|
402
|
+
|
403
|
+
def failure_predicate
|
404
|
+
'are not unique'
|
405
|
+
end
|
406
|
+
end
|
407
|
+
# @example
|
408
|
+
# it {should have_unique_values.in('email')}
|
409
|
+
def have_unique_values
|
410
|
+
HaveUniqueValues.new nil
|
411
|
+
end
|
412
|
+
|
413
|
+
class HaveValuesStartingWith < FieldMatcher
|
414
|
+
def match?(v)
|
415
|
+
blank?(v) || v.start_with?(@expected)
|
416
|
+
end
|
417
|
+
|
418
|
+
def failure_predicate
|
419
|
+
"don't start with #{@expected}"
|
420
|
+
end
|
421
|
+
end
|
422
|
+
# @example
|
423
|
+
# it {should have_values_starting_with('http://').in('url')}
|
424
|
+
def have_values_starting_with(expected)
|
425
|
+
HaveValuesStartingWith.new expected
|
426
|
+
end
|
427
|
+
|
428
|
+
class HaveValuesEndingWith < FieldMatcher
|
429
|
+
def match?(v)
|
430
|
+
blank?(v) || v.end_with?(@expected)
|
431
|
+
end
|
432
|
+
|
433
|
+
def failure_predicate
|
434
|
+
"don't end with #{@expected}"
|
435
|
+
end
|
436
|
+
end
|
437
|
+
# @example
|
438
|
+
# it {should have_values_ending_with('Inc.').in('company_name')}
|
439
|
+
def have_values_ending_with(expected)
|
440
|
+
HaveValuesEndingWith.new expected
|
441
|
+
end
|
442
|
+
|
443
|
+
class HaveIntegerValues < FieldMatcher
|
444
|
+
def match?(v)
|
445
|
+
blank?(v) || (Integer(v) rescue false)
|
446
|
+
end
|
447
|
+
|
448
|
+
def failure_predicate
|
449
|
+
"aren't integers"
|
450
|
+
end
|
451
|
+
end
|
452
|
+
# @example
|
453
|
+
# it {should have_integer_values.in('year')}
|
454
|
+
def have_integer_values
|
455
|
+
HaveIntegerValues.new nil
|
456
|
+
end
|
457
|
+
end
|
458
|
+
end
|
459
|
+
end
|
data/lib/scraperwiki-api.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'httparty'
|
2
|
-
require 'scraperwiki-api/version'
|
3
2
|
|
4
3
|
module ScraperWiki
|
5
4
|
# A Ruby wrapper for the ScraperWiki API.
|
@@ -11,6 +10,32 @@ module ScraperWiki
|
|
11
10
|
class Error < StandardError; end
|
12
11
|
class ScraperNotFound < Error; end
|
13
12
|
|
13
|
+
RUN_INTERVALS = {
|
14
|
+
never: -1,
|
15
|
+
monthly: 2678400,
|
16
|
+
weekly: 604800,
|
17
|
+
daily: 86400,
|
18
|
+
hourly: 3600,
|
19
|
+
}
|
20
|
+
|
21
|
+
class << self
|
22
|
+
# Returns the URL to the scraper's overview.
|
23
|
+
#
|
24
|
+
# @param [String] shortname the scraper's shortname
|
25
|
+
# @return [String] the URL to the scraper's overview
|
26
|
+
def scraper_url(shortname)
|
27
|
+
"https://scraperwiki.com/scrapers/#{shortname}/"
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns the URL to edit the scraper.
|
31
|
+
#
|
32
|
+
# @param [String] shortname the scraper's shortname
|
33
|
+
# @return [String] the URL to edit the scraper
|
34
|
+
def edit_scraper_url(shortname)
|
35
|
+
"https://scraperwiki.com/scrapers/#{shortname}/edit/"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
14
39
|
# Initializes a ScraperWiki API object.
|
15
40
|
def initialize(apikey = nil)
|
16
41
|
@apikey = apikey
|
@@ -56,10 +81,11 @@ module ScraperWiki
|
|
56
81
|
# "htmltable" or "rss2"
|
57
82
|
# @option opts [String] :attach ";"-delimited list of shortnames of other
|
58
83
|
# scrapers whose data you need to access
|
84
|
+
# @return [Array,Hash,String]
|
59
85
|
# @see https://scraperwiki.com/docs/ruby/ruby_help_documentation/
|
60
86
|
#
|
61
87
|
# @note The query string parameter is +name+, not +shortname+
|
62
|
-
# {https://scraperwiki.com/docs/api#sqlite as
|
88
|
+
# {https://scraperwiki.com/docs/api#sqlite as in the ScraperWiki docs}
|
63
89
|
def datastore_sqlite(shortname, query, opts = {})
|
64
90
|
if Array === opts[:attach]
|
65
91
|
opts[:attach] = opts[:attach].join ';'
|
@@ -69,10 +95,22 @@ module ScraperWiki
|
|
69
95
|
|
70
96
|
# Extracts data about a scraper's code, owner, history, etc.
|
71
97
|
#
|
72
|
-
#
|
73
|
-
# * The +runid+ is a Unix timestamp with microseconds and a UUID.
|
98
|
+
# * +runid+ is a Unix timestamp with microseconds and a UUID.
|
74
99
|
# * The value of +records+ is the same as that of +total_rows+ under +datasummary+.
|
75
|
-
# * +run_interval+ is the number of seconds between runs.
|
100
|
+
# * +run_interval+ is the number of seconds between runs. It is one of:
|
101
|
+
# * -1 (never)
|
102
|
+
# * 2678400 (monthly)
|
103
|
+
# * 604800 (weekly)
|
104
|
+
# * 86400 (daily)
|
105
|
+
# * 3600 (hourly)
|
106
|
+
# * +privacy_status+ is one of:
|
107
|
+
# * "public" (everyone can see and edit the scraper and its data)
|
108
|
+
# * "visible" (everyone can see the scraper, but only contributors can edit it)
|
109
|
+
# * "private" (only contributors can see and edit the scraper and its data)
|
110
|
+
# * An individual +runevents+ hash will have an +exception_message+ key if
|
111
|
+
# there was an error during that run.
|
112
|
+
#
|
113
|
+
# Example output:
|
76
114
|
#
|
77
115
|
# [
|
78
116
|
# {
|
@@ -153,11 +191,12 @@ module ScraperWiki
|
|
153
191
|
# restricted to this date or after, enter as YYYY-MM-DD
|
154
192
|
# @option opts [String] :quietfields "|"-delimited list of fields to exclude
|
155
193
|
# from the output. Must be a subset of 'code|runevents|datasummary|userroles|history'
|
194
|
+
# @return [Array]
|
156
195
|
#
|
157
196
|
# @note Returns an array although the array seems to always have only one item
|
158
197
|
# @note The +tags+ field seems to always be an empty array
|
159
198
|
# @note The query string parameter is +name+, not +shortname+
|
160
|
-
# {https://scraperwiki.com/docs/api#getinfo as
|
199
|
+
# {https://scraperwiki.com/docs/api#getinfo as in the ScraperWiki docs}
|
161
200
|
def scraper_getinfo(shortname, opts = {})
|
162
201
|
if Array === opts[:quietfields]
|
163
202
|
opts[:quietfields] = opts[:quietfields].join '|'
|
@@ -192,10 +231,11 @@ module ScraperWiki
|
|
192
231
|
# @param [String] shortname the scraper's shortname (as it appears in the URL)
|
193
232
|
# @param [Hash] opts optional arguments
|
194
233
|
# @option opts [String] runid a run ID
|
234
|
+
# @return [Array]
|
195
235
|
#
|
196
236
|
# @note Returns an array although the array seems to always have only one item
|
197
237
|
# @note The query string parameter is +name+, not +shortname+
|
198
|
-
# {https://scraperwiki.com/docs/api#getinfo as
|
238
|
+
# {https://scraperwiki.com/docs/api#getinfo as in the ScraperWiki docs}
|
199
239
|
def scraper_getruninfo(shortname, opts = {})
|
200
240
|
request_with_apikey '/scraper/getruninfo', {name: shortname}.merge(opts)
|
201
241
|
end
|
@@ -227,6 +267,7 @@ module ScraperWiki
|
|
227
267
|
# ]
|
228
268
|
#
|
229
269
|
# @param [String] username a username
|
270
|
+
# @return [Array]
|
230
271
|
#
|
231
272
|
# @note Returns an array although the array seems to always have only one item
|
232
273
|
# @note The date joined field is +date_joined+ (with underscore) on
|
@@ -256,6 +297,7 @@ module ScraperWiki
|
|
256
297
|
# @option opts [Integer] :maxrows number of results to return [default 5]
|
257
298
|
# @option opts [String] :requestinguser the name of the user making the
|
258
299
|
# search, which changes the order of the matches
|
300
|
+
# @return [Array]
|
259
301
|
def scraper_search(opts = {})
|
260
302
|
request_with_apikey '/scraper/search', opts
|
261
303
|
end
|
@@ -280,6 +322,7 @@ module ScraperWiki
|
|
280
322
|
# from the output
|
281
323
|
# @option opts [String] :requestinguser the name of the user making the
|
282
324
|
# search, which changes the order of the matches
|
325
|
+
# @return [Array]
|
283
326
|
#
|
284
327
|
# @note The date joined field is +datejoined+ (without underscore) on
|
285
328
|
# {#scraper_getuserinfo}
|
data/scraperwiki-api.gemspec
CHANGED
@@ -22,7 +22,7 @@ class ScraperWiki::API
|
|
22
22
|
it 'should return a non-empty array containing a single hash' do
|
23
23
|
response = @api.scraper_getinfo EXAMPLE_SHORTNAME
|
24
24
|
response.should be_an(Array)
|
25
|
-
response.
|
25
|
+
response.should have(1).item
|
26
26
|
response.first.should be_a(Hash)
|
27
27
|
end
|
28
28
|
|
@@ -36,10 +36,10 @@ class ScraperWiki::API
|
|
36
36
|
|
37
37
|
it 'should respect the :history_start_date argument' do
|
38
38
|
bare = @api.scraper_getinfo(EXAMPLE_SHORTNAME).first
|
39
|
-
bare['history'].
|
39
|
+
bare['history'].should have_at_least(2).items
|
40
40
|
history_start_date = bare['history'][0]['date'][0..9]
|
41
41
|
result = @api.scraper_getinfo(EXAMPLE_SHORTNAME, history_start_date: history_start_date).first
|
42
|
-
result['history'].
|
42
|
+
result['history'].should have(1).item
|
43
43
|
end
|
44
44
|
|
45
45
|
it 'should respect the :quietfields argument (as an array)' do
|
@@ -61,7 +61,7 @@ class ScraperWiki::API
|
|
61
61
|
it 'should return a non-empty array containing a single hash' do
|
62
62
|
response = @api.scraper_getruninfo EXAMPLE_SHORTNAME
|
63
63
|
response.should be_an(Array)
|
64
|
-
response.
|
64
|
+
response.should have(1).item
|
65
65
|
response.first.should be_a(Hash)
|
66
66
|
end
|
67
67
|
|
@@ -78,7 +78,7 @@ class ScraperWiki::API
|
|
78
78
|
it 'should return a non-empty array containing a single hash' do
|
79
79
|
response = @api.scraper_getuserinfo EXAMPLE_USERNAME
|
80
80
|
response.should be_an(Array)
|
81
|
-
response.
|
81
|
+
response.should have(1).item
|
82
82
|
response.first.should be_a(Hash)
|
83
83
|
end
|
84
84
|
end
|
@@ -87,7 +87,7 @@ class ScraperWiki::API
|
|
87
87
|
it 'should return a non-empty array of hashes' do
|
88
88
|
response = @api.scraper_search
|
89
89
|
response.should be_an(Array)
|
90
|
-
response.
|
90
|
+
response.should have_at_least(1).item
|
91
91
|
response.first.should be_a(Hash)
|
92
92
|
end
|
93
93
|
|
@@ -98,7 +98,7 @@ class ScraperWiki::API
|
|
98
98
|
end
|
99
99
|
|
100
100
|
it 'should respect the :maxrows argument' do
|
101
|
-
@api.scraper_search(maxrows: 1).
|
101
|
+
@api.scraper_search(maxrows: 1).should have(1).item
|
102
102
|
end
|
103
103
|
end
|
104
104
|
|
@@ -106,7 +106,7 @@ class ScraperWiki::API
|
|
106
106
|
it 'should return a non-empty array of hashes' do
|
107
107
|
response = @api.scraper_usersearch
|
108
108
|
response.should be_an(Array)
|
109
|
-
response.
|
109
|
+
response.should have_at_least(1).item
|
110
110
|
response.first.should be_a(Hash)
|
111
111
|
end
|
112
112
|
|
@@ -117,7 +117,7 @@ class ScraperWiki::API
|
|
117
117
|
end
|
118
118
|
|
119
119
|
it 'should respect the :maxrows argument' do
|
120
|
-
@api.scraper_usersearch(maxrows: 1).
|
120
|
+
@api.scraper_usersearch(maxrows: 1).should have(1).item
|
121
121
|
end
|
122
122
|
|
123
123
|
it 'should respect the :nolist argument (as an array)' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scraperwiki-api
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05-
|
12
|
+
date: 2012-05-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: httparty
|
16
|
-
requirement: &
|
16
|
+
requirement: &70314147045360 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,18 +21,18 @@ dependencies:
|
|
21
21
|
version: 0.7.8
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70314147045360
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &70314147044520 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 2.
|
32
|
+
version: 2.10.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70314147044520
|
36
36
|
description: A Ruby wrapper for the ScraperWiki API
|
37
37
|
email:
|
38
38
|
- info@opennorth.ca
|
@@ -47,10 +47,10 @@ files:
|
|
47
47
|
- Rakefile
|
48
48
|
- USAGE
|
49
49
|
- lib/scraperwiki-api.rb
|
50
|
+
- lib/scraperwiki-api/matchers.rb
|
50
51
|
- lib/scraperwiki-api/version.rb
|
51
52
|
- scraperwiki-api.gemspec
|
52
53
|
- spec/scraperwiki-api_spec.rb
|
53
|
-
- spec/spec.opts
|
54
54
|
- spec/spec_helper.rb
|
55
55
|
homepage: http://github.com/opennorth/scraperwiki-api-ruby
|
56
56
|
licenses: []
|
@@ -78,5 +78,4 @@ specification_version: 3
|
|
78
78
|
summary: The ScraperWiki API Ruby Gem
|
79
79
|
test_files:
|
80
80
|
- spec/scraperwiki-api_spec.rb
|
81
|
-
- spec/spec.opts
|
82
81
|
- spec/spec_helper.rb
|