scraperwiki-api 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -49,9 +49,13 @@ If your project uses a lot of scrapers – for example, [OpenCorporates](http:/
49
49
  it {should be_editable_by('frabcus')}
50
50
  it {should run(:daily)}
51
51
  it {should_not be_broken}
52
+ it {should have_a_row_count_of(42).on('swdata')}
53
+
54
+ # Check for missing keys:
52
55
  it {should have_at_least_the_keys(['name', 'email']).on('swdata')}
56
+
57
+ # Check for extra keys:
53
58
  it {should have_at_most_the_keys(['name', 'email', 'tel', 'fax']).on('swdata')}
54
- it {should have_a_row_count_of(42).on('swdata')}
55
59
  end
56
60
 
57
61
  data = api.datastore_sqlite('example-scraper', 'SELECT * from `swdata`')
@@ -60,6 +64,9 @@ If your project uses a lot of scrapers – for example, [OpenCorporates](http:/
60
64
  include ScraperWiki::API::Matchers
61
65
  subject {data}
62
66
 
67
+ it {should set_any_of(['name', 'first_name', 'last_name'])}
68
+
69
+ # Validate the values of individual fields:
63
70
  it {should_not have_blank_values.in('name')}
64
71
  it {should have_unique_values.in('email')}
65
72
  it {should have_values_of(['M', 'F']).in('gender')}
@@ -67,7 +74,16 @@ If your project uses a lot of scrapers – for example, [OpenCorporates](http:/
67
74
  it {should have_values_starting_with('http://').in('url')}
68
75
  it {should have_values_ending_with('Inc.').in('company_name')}
69
76
  it {should have_integer_values.in('year')}
70
- it {should set_any_of(['name', 'first_name', 'last_name'])}
77
+
78
+ # If you store a hash or an array of hashes in a field as a JSON string,
79
+ # you can validate the values of these subfields by chaining on an +at+:
80
+ it {should have_values_of(['M', 'F']).in('extra').at('gender')}
81
+
82
+ # Check for missing keys within subfields:
83
+ it {should have_values_with_at_least_the_keys(['subfield1', 'subfield2']).in('fieldA')}
84
+
85
+ # Check for extra keys within subfields:
86
+ it {should have_values_with_at_most_the_keys(['subfield1', 'subfield2', 'subfield3', 'subfield4']).in('fieldA')}
71
87
  end
72
88
 
73
89
  More documentation at [RubyDoc.info](http://rdoc.info/gems/scraperwiki-api/ScraperWiki/API/Matchers).
@@ -16,9 +16,13 @@ module ScraperWiki
16
16
  # it {should be_editable_by('frabcus')}
17
17
  # it {should run(:daily)}
18
18
  # it {should_not be_broken}
19
+ # it {should have_a_row_count_of(42).on('swdata')}
20
+ #
21
+ # # Check for missing keys:
19
22
  # it {should have_at_least_the_keys(['name', 'email']).on('swdata')}
23
+ #
24
+ # # Check for extra keys:
20
25
  # it {should have_at_most_the_keys(['name', 'email', 'tel', 'fax']).on('swdata')}
21
- # it {should have_a_row_count_of(42).on('swdata')}
22
26
  # end
23
27
  #
24
28
  # data = api.datastore_sqlite('example-scraper', 'SELECT * from `swdata`')
@@ -27,6 +31,9 @@ module ScraperWiki
27
31
  # include ScraperWiki::API::Matchers
28
32
  # subject {data}
29
33
  #
34
+ # it {should set_any_of(['name', 'first_name', 'last_name'])}
35
+ #
36
+ # # Validate the values of individual fields:
30
37
  # it {should_not have_blank_values.in('name')}
31
38
  # it {should have_unique_values.in('email')}
32
39
  # it {should have_values_of(['M', 'F']).in('gender')}
@@ -34,7 +41,16 @@ module ScraperWiki
34
41
  # it {should have_values_starting_with('http://').in('url')}
35
42
  # it {should have_values_ending_with('Inc.').in('company_name')}
36
43
  # it {should have_integer_values.in('year')}
37
- # it {should set_any_of(['name', 'first_name', 'last_name'])}
44
+ #
45
+ # # If you store a hash or an array of hashes in a field as a JSON string,
46
+ # # you can validate the values of these subfields by chaining on an +at+:
47
+ # it {should have_values_of(['M', 'F']).in('extra').at('gender')}
48
+ #
49
+ # # Check for missing keys within subfields:
50
+ # it {should have_values_with_at_least_the_keys(['subfield1', 'subfield2']).in('fieldA')}
51
+ #
52
+ # # Check for extra keys within subfields:
53
+ # it {should have_values_with_at_most_the_keys(['subfield1', 'subfield2', 'subfield3', 'subfield4']).in('fieldA')}
38
54
  # end
39
55
  #
40
56
  # RSpec matchers for ScraperWiki scrapers.
@@ -54,7 +70,7 @@ module ScraperWiki
54
70
  end
55
71
 
56
72
  def failure_message
57
- NotImplementerError
73
+ raise NotImplementerError, 'Subclasses must implement this method'
58
74
  end
59
75
 
60
76
  def negative_failure_message
@@ -149,12 +165,16 @@ module ScraperWiki
149
165
  difference.empty?
150
166
  end
151
167
 
168
+ def failure_message
169
+ "#{@actual['short_name']} #{failure_predicate}: #{difference.join ', '}"
170
+ end
171
+
152
172
  def failure_predicate
153
- raise NotImplementerError
173
+ raise NotImplementerError, 'Subclasses must implement this method'
154
174
  end
155
175
 
156
- def failure_message
157
- "#{@actual['short_name']} #{failure_predicate}: #{difference.join ', '}"
176
+ def difference
177
+ raise NotImplementerError, 'Subclasses must implement this method'
158
178
  end
159
179
  end
160
180
 
@@ -245,7 +265,7 @@ module ScraperWiki
245
265
  hash
246
266
  end
247
267
  else
248
- raise NotImplementerError
268
+ raise NotImplementerError, "Can only handle jsondict or jsonlist formats"
249
269
  end
250
270
  end
251
271
 
@@ -262,11 +282,11 @@ module ScraperWiki
262
282
  end
263
283
 
264
284
  def matches
265
- raise NotImplementerError
285
+ raise NotImplementerError, 'Subclasses must implement this method'
266
286
  end
267
287
 
268
288
  def mismatches
269
- raise NotImplementerError
289
+ raise NotImplementerError, 'Subclasses must implement this method'
270
290
  end
271
291
 
272
292
  def failures
@@ -285,10 +305,6 @@ module ScraperWiki
285
305
  end
286
306
  end
287
307
 
288
- def failure_description
289
- raise NotImplementerError
290
- end
291
-
292
308
  def failure_message
293
309
  "#{failure_size} of #{items.size} #{failure_description}\n#{failures.map(&:inspect).join "\n"}"
294
310
  end
@@ -296,6 +312,10 @@ module ScraperWiki
296
312
  def negative_failure_message
297
313
  failure_message
298
314
  end
315
+
316
+ def failure_description
317
+ raise NotImplementerError, 'Subclasses must implement this method'
318
+ end
299
319
  end
300
320
 
301
321
  class SetAnyOf < DatastoreMatcher
@@ -323,16 +343,47 @@ module ScraperWiki
323
343
  self
324
344
  end
325
345
 
326
- def matches
327
- items.select do |item|
328
- match? item[@field]
346
+ def at(subfield)
347
+ @subfield = subfield
348
+ self
349
+ end
350
+
351
+ # @note +@subfield+ can be a hash or an array of hashes
352
+ def matcher(meth)
353
+ if @subfield
354
+ items.send(meth) do |item|
355
+ if blank? item[@field]
356
+ true
357
+ else
358
+ v = Yajl::Parser.parse item[@field]
359
+ if Hash === v
360
+ v.has_key?(@subfield) && match?(v[@subfield])
361
+ elsif Array === v
362
+ v.all? do |w|
363
+ if Hash === w
364
+ w.has_key?(@subfield) && match?(w[@subfield])
365
+ else
366
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
367
+ end
368
+ end
369
+ else
370
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
371
+ end
372
+ end
373
+ end
374
+ else
375
+ items.send(meth) do |item|
376
+ match? item[@field]
377
+ end
329
378
  end
330
379
  end
331
380
 
381
+ def matches
382
+ matcher :select
383
+ end
384
+
332
385
  def mismatches
333
- items.reject do |item|
334
- match? item[@field]
335
- end
386
+ matcher :reject
336
387
  end
337
388
 
338
389
  def blank?(v)
@@ -340,7 +391,15 @@ module ScraperWiki
340
391
  end
341
392
 
342
393
  def failure_description
343
- "'#{@field}' values #{failure_predicate}"
394
+ if @subfield
395
+ "#{@field}:#{@subfield} values #{failure_predicate}"
396
+ else
397
+ "#{@field} values #{failure_predicate}"
398
+ end
399
+ end
400
+
401
+ def failure_predicate
402
+ raise NotImplementerError, 'Subclasses must implement this method'
344
403
  end
345
404
  end
346
405
 
@@ -392,9 +451,34 @@ module ScraperWiki
392
451
  class HaveUniqueValues < FieldMatcher
393
452
  def mismatches
394
453
  counts = Hash.new 0
395
- items.each_with_index do |item,index|
396
- unless blank? item[@field]
397
- counts[item[@field]] += 1
454
+ if @subfield
455
+ items.each do |item|
456
+ unless blank? item[@field]
457
+ v = Yajl::Parser.parse item[@field]
458
+ if Hash === v
459
+ unless blank? v[@subfield]
460
+ counts[v[@subfield]] += 1
461
+ end
462
+ elsif Array === v
463
+ v.each do |w|
464
+ if Hash === w
465
+ unless blank? w[@subfield]
466
+ counts[w[@subfield]] += 1
467
+ end
468
+ else
469
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
470
+ end
471
+ end
472
+ else
473
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
474
+ end
475
+ end
476
+ end
477
+ else
478
+ items.each do |item|
479
+ unless blank? item[@field]
480
+ counts[item[@field]] += 1
481
+ end
398
482
  end
399
483
  end
400
484
  counts.select{|_,count| count > 1}.keys
@@ -454,6 +538,67 @@ module ScraperWiki
454
538
  def have_integer_values
455
539
  HaveIntegerValues.new nil
456
540
  end
541
+
542
+ class FieldKeyMatcher < FieldMatcher
543
+ def match?(v)
544
+ if blank? v
545
+ true
546
+ else
547
+ w = Yajl::Parser.parse v
548
+ if Hash === w
549
+ difference(w).empty?
550
+ elsif Array === w
551
+ w.all? do |x|
552
+ if Hash === x
553
+ difference(x).empty?
554
+ else
555
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
556
+ end
557
+ end
558
+ else
559
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
560
+ end
561
+ end
562
+ end
563
+
564
+ def difference(v)
565
+ raise NotImplementerError, 'Subclasses must implement this method'
566
+ end
567
+
568
+ def failure_predicate
569
+ "#{predicate}: #{difference.join ', '}"
570
+ end
571
+ end
572
+
573
+ class HaveValuesWithAtLeastTheKeys < FieldKeyMatcher
574
+ def difference(v)
575
+ @expected - v.keys
576
+ end
577
+
578
+ def failure_predicate
579
+ 'have missing keys'
580
+ end
581
+ end
582
+ # @example
583
+ # it {should have_values_with_at_least_the_keys(['subfield1', 'subfield2']).in('fieldA')}
584
+ def have_values_with_at_least_the_keys(expected)
585
+ HaveValuesWithAtLeastTheKeys.new expected
586
+ end
587
+
588
+ class HaveValuesWithAtMostTheKeys < FieldKeyMatcher
589
+ def difference(v)
590
+ v.keys - @expected
591
+ end
592
+
593
+ def failure_predicate
594
+ 'have extra keys'
595
+ end
596
+ end
597
+ # @example
598
+ # it {should have_values_with_at_most_the_keys(['subfield1', 'subfield2', 'subfield3', 'subfield4']).in('fieldA')}
599
+ def have_values_with_at_most_the_keys(expected)
600
+ HaveValuesWithAtMostTheKeys.new expected
601
+ end
457
602
  end
458
603
  end
459
604
  end
@@ -1,5 +1,5 @@
1
1
  module ScraperWiki
2
2
  class API
3
- VERSION = "0.0.3"
3
+ VERSION = "0.0.4"
4
4
  end
5
5
  end
@@ -17,6 +17,7 @@ Gem::Specification.new do |s|
17
17
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
18
  s.require_paths = ["lib"]
19
19
 
20
- s.add_runtime_dependency('httparty', '~> 0.7.8')
21
- s.add_development_dependency('rspec', '~> 2.10.0')
20
+ s.add_runtime_dependency('yajl-ruby', '~> 1.0')
21
+ s.add_runtime_dependency('httparty', '~> 0.8.0')
22
+ s.add_development_dependency('rspec', '~> 2.10')
22
23
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraperwiki-api
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,28 +11,39 @@ bindir: bin
11
11
  cert_chain: []
12
12
  date: 2012-05-28 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: yajl-ruby
16
+ requirement: &70366370555240 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70366370555240
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: httparty
16
- requirement: &70314147045360 !ruby/object:Gem::Requirement
27
+ requirement: &70366370554220 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
30
  - - ~>
20
31
  - !ruby/object:Gem::Version
21
- version: 0.7.8
32
+ version: 0.8.0
22
33
  type: :runtime
23
34
  prerelease: false
24
- version_requirements: *70314147045360
35
+ version_requirements: *70366370554220
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: rspec
27
- requirement: &70314147044520 !ruby/object:Gem::Requirement
38
+ requirement: &70366370553000 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
31
42
  - !ruby/object:Gem::Version
32
- version: 2.10.0
43
+ version: '2.10'
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *70314147044520
46
+ version_requirements: *70366370553000
36
47
  description: A Ruby wrapper for the ScraperWiki API
37
48
  email:
38
49
  - info@opennorth.ca