scraperwiki-api 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -49,9 +49,13 @@ If your project uses a lot of scrapers – for example, [OpenCorporates](http:/
49
49
  it {should be_editable_by('frabcus')}
50
50
  it {should run(:daily)}
51
51
  it {should_not be_broken}
52
+ it {should have_a_row_count_of(42).on('swdata')}
53
+
54
+ # Check for missing keys:
52
55
  it {should have_at_least_the_keys(['name', 'email']).on('swdata')}
56
+
57
+ # Check for extra keys:
53
58
  it {should have_at_most_the_keys(['name', 'email', 'tel', 'fax']).on('swdata')}
54
- it {should have_a_row_count_of(42).on('swdata')}
55
59
  end
56
60
 
57
61
  data = api.datastore_sqlite('example-scraper', 'SELECT * from `swdata`')
@@ -60,6 +64,9 @@ If your project uses a lot of scrapers – for example, [OpenCorporates](http:/
60
64
  include ScraperWiki::API::Matchers
61
65
  subject {data}
62
66
 
67
+ it {should set_any_of(['name', 'first_name', 'last_name'])}
68
+
69
+ # Validate the values of individual fields:
63
70
  it {should_not have_blank_values.in('name')}
64
71
  it {should have_unique_values.in('email')}
65
72
  it {should have_values_of(['M', 'F']).in('gender')}
@@ -67,7 +74,16 @@ If your project uses a lot of scrapers – for example, [OpenCorporates](http:/
67
74
  it {should have_values_starting_with('http://').in('url')}
68
75
  it {should have_values_ending_with('Inc.').in('company_name')}
69
76
  it {should have_integer_values.in('year')}
70
- it {should set_any_of(['name', 'first_name', 'last_name'])}
77
+
78
+ # If you store a hash or an array of hashes in a field as a JSON string,
79
+ # you can validate the values of these subfields by chaining on an +at+:
80
+ it {should have_values_of(['M', 'F']).in('extra').at('gender')}
81
+
82
+ # Check for missing keys within subfields:
83
+ it {should have_values_with_at_least_the_keys(['subfield1', 'subfield2']).in('fieldA')}
84
+
85
+ # Check for extra keys within subfields:
86
+ it {should have_values_with_at_most_the_keys(['subfield1', 'subfield2', 'subfield3', 'subfield4']).in('fieldA')}
71
87
  end
72
88
 
73
89
  More documentation at [RubyDoc.info](http://rdoc.info/gems/scraperwiki-api/ScraperWiki/API/Matchers).
@@ -16,9 +16,13 @@ module ScraperWiki
16
16
  # it {should be_editable_by('frabcus')}
17
17
  # it {should run(:daily)}
18
18
  # it {should_not be_broken}
19
+ # it {should have_a_row_count_of(42).on('swdata')}
20
+ #
21
+ # # Check for missing keys:
19
22
  # it {should have_at_least_the_keys(['name', 'email']).on('swdata')}
23
+ #
24
+ # # Check for extra keys:
20
25
  # it {should have_at_most_the_keys(['name', 'email', 'tel', 'fax']).on('swdata')}
21
- # it {should have_a_row_count_of(42).on('swdata')}
22
26
  # end
23
27
  #
24
28
  # data = api.datastore_sqlite('example-scraper', 'SELECT * from `swdata`')
@@ -27,6 +31,9 @@ module ScraperWiki
27
31
  # include ScraperWiki::API::Matchers
28
32
  # subject {data}
29
33
  #
34
+ # it {should set_any_of(['name', 'first_name', 'last_name'])}
35
+ #
36
+ # # Validate the values of individual fields:
30
37
  # it {should_not have_blank_values.in('name')}
31
38
  # it {should have_unique_values.in('email')}
32
39
  # it {should have_values_of(['M', 'F']).in('gender')}
@@ -34,7 +41,16 @@ module ScraperWiki
34
41
  # it {should have_values_starting_with('http://').in('url')}
35
42
  # it {should have_values_ending_with('Inc.').in('company_name')}
36
43
  # it {should have_integer_values.in('year')}
37
- # it {should set_any_of(['name', 'first_name', 'last_name'])}
44
+ #
45
+ # # If you store a hash or an array of hashes in a field as a JSON string,
46
+ # # you can validate the values of these subfields by chaining on an +at+:
47
+ # it {should have_values_of(['M', 'F']).in('extra').at('gender')}
48
+ #
49
+ # # Check for missing keys within subfields:
50
+ # it {should have_values_with_at_least_the_keys(['subfield1', 'subfield2']).in('fieldA')}
51
+ #
52
+ # # Check for extra keys within subfields:
53
+ # it {should have_values_with_at_most_the_keys(['subfield1', 'subfield2', 'subfield3', 'subfield4']).in('fieldA')}
38
54
  # end
39
55
  #
40
56
  # RSpec matchers for ScraperWiki scrapers.
@@ -54,7 +70,7 @@ module ScraperWiki
54
70
  end
55
71
 
56
72
  def failure_message
57
- NotImplementerError
73
+ raise NotImplementerError, 'Subclasses must implement this method'
58
74
  end
59
75
 
60
76
  def negative_failure_message
@@ -149,12 +165,16 @@ module ScraperWiki
149
165
  difference.empty?
150
166
  end
151
167
 
168
+ def failure_message
169
+ "#{@actual['short_name']} #{failure_predicate}: #{difference.join ', '}"
170
+ end
171
+
152
172
  def failure_predicate
153
- raise NotImplementerError
173
+ raise NotImplementerError, 'Subclasses must implement this method'
154
174
  end
155
175
 
156
- def failure_message
157
- "#{@actual['short_name']} #{failure_predicate}: #{difference.join ', '}"
176
+ def difference
177
+ raise NotImplementerError, 'Subclasses must implement this method'
158
178
  end
159
179
  end
160
180
 
@@ -245,7 +265,7 @@ module ScraperWiki
245
265
  hash
246
266
  end
247
267
  else
248
- raise NotImplementerError
268
+ raise NotImplementerError, "Can only handle jsondict or jsonlist formats"
249
269
  end
250
270
  end
251
271
 
@@ -262,11 +282,11 @@ module ScraperWiki
262
282
  end
263
283
 
264
284
  def matches
265
- raise NotImplementerError
285
+ raise NotImplementerError, 'Subclasses must implement this method'
266
286
  end
267
287
 
268
288
  def mismatches
269
- raise NotImplementerError
289
+ raise NotImplementerError, 'Subclasses must implement this method'
270
290
  end
271
291
 
272
292
  def failures
@@ -285,10 +305,6 @@ module ScraperWiki
285
305
  end
286
306
  end
287
307
 
288
- def failure_description
289
- raise NotImplementerError
290
- end
291
-
292
308
  def failure_message
293
309
  "#{failure_size} of #{items.size} #{failure_description}\n#{failures.map(&:inspect).join "\n"}"
294
310
  end
@@ -296,6 +312,10 @@ module ScraperWiki
296
312
  def negative_failure_message
297
313
  failure_message
298
314
  end
315
+
316
+ def failure_description
317
+ raise NotImplementerError, 'Subclasses must implement this method'
318
+ end
299
319
  end
300
320
 
301
321
  class SetAnyOf < DatastoreMatcher
@@ -323,16 +343,47 @@ module ScraperWiki
323
343
  self
324
344
  end
325
345
 
326
- def matches
327
- items.select do |item|
328
- match? item[@field]
346
+ def at(subfield)
347
+ @subfield = subfield
348
+ self
349
+ end
350
+
351
+ # @note +@subfield+ can be a hash or an array of hashes
352
+ def matcher(meth)
353
+ if @subfield
354
+ items.send(meth) do |item|
355
+ if blank? item[@field]
356
+ true
357
+ else
358
+ v = Yajl::Parser.parse item[@field]
359
+ if Hash === v
360
+ v.has_key?(@subfield) && match?(v[@subfield])
361
+ elsif Array === v
362
+ v.all? do |w|
363
+ if Hash === w
364
+ w.has_key?(@subfield) && match?(w[@subfield])
365
+ else
366
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
367
+ end
368
+ end
369
+ else
370
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
371
+ end
372
+ end
373
+ end
374
+ else
375
+ items.send(meth) do |item|
376
+ match? item[@field]
377
+ end
329
378
  end
330
379
  end
331
380
 
381
+ def matches
382
+ matcher :select
383
+ end
384
+
332
385
  def mismatches
333
- items.reject do |item|
334
- match? item[@field]
335
- end
386
+ matcher :reject
336
387
  end
337
388
 
338
389
  def blank?(v)
@@ -340,7 +391,15 @@ module ScraperWiki
340
391
  end
341
392
 
342
393
  def failure_description
343
- "'#{@field}' values #{failure_predicate}"
394
+ if @subfield
395
+ "#{@field}:#{@subfield} values #{failure_predicate}"
396
+ else
397
+ "#{@field} values #{failure_predicate}"
398
+ end
399
+ end
400
+
401
+ def failure_predicate
402
+ raise NotImplementerError, 'Subclasses must implement this method'
344
403
  end
345
404
  end
346
405
 
@@ -392,9 +451,34 @@ module ScraperWiki
392
451
  class HaveUniqueValues < FieldMatcher
393
452
  def mismatches
394
453
  counts = Hash.new 0
395
- items.each_with_index do |item,index|
396
- unless blank? item[@field]
397
- counts[item[@field]] += 1
454
+ if @subfield
455
+ items.each do |item|
456
+ unless blank? item[@field]
457
+ v = Yajl::Parser.parse item[@field]
458
+ if Hash === v
459
+ unless blank? v[@subfield]
460
+ counts[v[@subfield]] += 1
461
+ end
462
+ elsif Array === v
463
+ v.each do |w|
464
+ if Hash === w
465
+ unless blank? w[@subfield]
466
+ counts[w[@subfield]] += 1
467
+ end
468
+ else
469
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
470
+ end
471
+ end
472
+ else
473
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
474
+ end
475
+ end
476
+ end
477
+ else
478
+ items.each do |item|
479
+ unless blank? item[@field]
480
+ counts[item[@field]] += 1
481
+ end
398
482
  end
399
483
  end
400
484
  counts.select{|_,count| count > 1}.keys
@@ -454,6 +538,67 @@ module ScraperWiki
454
538
  def have_integer_values
455
539
  HaveIntegerValues.new nil
456
540
  end
541
+
542
+ class FieldKeyMatcher < FieldMatcher
543
+ def match?(v)
544
+ if blank? v
545
+ true
546
+ else
547
+ w = Yajl::Parser.parse v
548
+ if Hash === w
549
+ difference(w).empty?
550
+ elsif Array === w
551
+ w.all? do |x|
552
+ if Hash === x
553
+ difference(x).empty?
554
+ else
555
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
556
+ end
557
+ end
558
+ else
559
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
560
+ end
561
+ end
562
+ end
563
+
564
+ def difference(v)
565
+ raise NotImplementerError, 'Subclasses must implement this method'
566
+ end
567
+
568
+ def failure_predicate
569
+ "#{predicate}: #{difference.join ', '}"
570
+ end
571
+ end
572
+
573
+ class HaveValuesWithAtLeastTheKeys < FieldKeyMatcher
574
+ def difference(v)
575
+ @expected - v.keys
576
+ end
577
+
578
+ def failure_predicate
579
+ 'have missing keys'
580
+ end
581
+ end
582
+ # @example
583
+ # it {should have_values_with_at_least_the_keys(['subfield1', 'subfield2']).in('fieldA')}
584
+ def have_values_with_at_least_the_keys(expected)
585
+ HaveValuesWithAtLeastTheKeys.new expected
586
+ end
587
+
588
+ class HaveValuesWithAtMostTheKeys < FieldKeyMatcher
589
+ def difference(v)
590
+ v.keys - @expected
591
+ end
592
+
593
+ def failure_predicate
594
+ 'have extra keys'
595
+ end
596
+ end
597
+ # @example
598
+ # it {should have_values_with_at_most_the_keys(['subfield1', 'subfield2', 'subfield3', 'subfield4']).in('fieldA')}
599
+ def have_values_with_at_most_the_keys(expected)
600
+ HaveValuesWithAtMostTheKeys.new expected
601
+ end
457
602
  end
458
603
  end
459
604
  end
@@ -1,5 +1,5 @@
1
1
  module ScraperWiki
2
2
  class API
3
- VERSION = "0.0.3"
3
+ VERSION = "0.0.4"
4
4
  end
5
5
  end
@@ -17,6 +17,7 @@ Gem::Specification.new do |s|
17
17
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
18
  s.require_paths = ["lib"]
19
19
 
20
- s.add_runtime_dependency('httparty', '~> 0.7.8')
21
- s.add_development_dependency('rspec', '~> 2.10.0')
20
+ s.add_runtime_dependency('yajl-ruby', '~> 1.0')
21
+ s.add_runtime_dependency('httparty', '~> 0.8.0')
22
+ s.add_development_dependency('rspec', '~> 2.10')
22
23
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraperwiki-api
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,28 +11,39 @@ bindir: bin
11
11
  cert_chain: []
12
12
  date: 2012-05-28 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: yajl-ruby
16
+ requirement: &70366370555240 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70366370555240
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: httparty
16
- requirement: &70314147045360 !ruby/object:Gem::Requirement
27
+ requirement: &70366370554220 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
30
  - - ~>
20
31
  - !ruby/object:Gem::Version
21
- version: 0.7.8
32
+ version: 0.8.0
22
33
  type: :runtime
23
34
  prerelease: false
24
- version_requirements: *70314147045360
35
+ version_requirements: *70366370554220
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: rspec
27
- requirement: &70314147044520 !ruby/object:Gem::Requirement
38
+ requirement: &70366370553000 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
31
42
  - !ruby/object:Gem::Version
32
- version: 2.10.0
43
+ version: '2.10'
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *70314147044520
46
+ version_requirements: *70366370553000
36
47
  description: A Ruby wrapper for the ScraperWiki API
37
48
  email:
38
49
  - info@opennorth.ca