scraperwiki-api 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -34,7 +34,9 @@ More documentation at [RubyDoc.info](http://rdoc.info/gems/scraperwiki-api/Scrap
34
34
 
35
35
  ## Scraper validations
36
36
 
37
- If your project uses a lot of scrapers – for example, [OpenCorporates](http://opencorporates.com/), which [scrapes company registries around the world](http://blog.opencorporates.com/2011/03/25/building-a-global-database-the-open-distributed-way/), or [Represent](http://represent.opennorth.ca/), which scrapes information on elected officials from government websites in Canada – you'll want to check that your scrapers behave the way you expect them to. This gem defines [RSpec](https://www.relishapp.com/rspec) matchers to do just that. For example:
37
+ If your project uses a lot of scrapers – for example, [OpenCorporates](http://opencorporates.com/), which [scrapes company registries around the world](http://blog.opencorporates.com/2011/03/25/building-a-global-database-the-open-distributed-way/), or [Represent](http://represent.opennorth.ca/), which scrapes information on elected officials from government websites in Canada – you'll want to check that your scrapers behave the way you expect them to. This gem defines [RSpec](https://www.relishapp.com/rspec) matchers to do just that.
38
+
39
+ You can validate a scraper's metadata (how often it runs, what fields it stores, etc.) like so:
38
40
 
39
41
  require 'scraperwiki-api'
40
42
  api = ScraperWiki::API.new
@@ -49,40 +51,48 @@ If your project uses a lot of scrapers – for example, [OpenCorporates](http:/
49
51
  it {should be_editable_by('frabcus')}
50
52
  it {should run(:daily)}
51
53
  it {should_not be_broken}
54
+
55
+ # Validate the properties of a SQLite table by chaining on a +on+.
52
56
  it {should have_a_row_count_of(42).on('swdata')}
53
57
 
54
- # Check for missing keys:
58
+ # Ensure that the scraper sets required fields.
55
59
  it {should have_at_least_the_keys(['name', 'email']).on('swdata')}
56
60
 
57
- # Check for extra keys:
61
+ # Ensure that the scraper doesn't set too many fields.
58
62
  it {should have_at_most_the_keys(['name', 'email', 'tel', 'fax']).on('swdata')}
59
63
  end
60
64
 
65
+ And you can validate the scraped data like so:
66
+
67
+ require 'scraperwiki-api'
68
+ api = ScraperWiki::API.new
69
+
61
70
  data = api.datastore_sqlite('example-scraper', 'SELECT * from `swdata`')
62
71
 
63
72
  describe 'example-scraper' do
64
73
  include ScraperWiki::API::Matchers
65
74
  subject {data}
66
75
 
76
+ # If you need at least one of a set of fields to be set:
67
77
  it {should set_any_of(['name', 'first_name', 'last_name'])}
68
78
 
69
- # Validate the values of individual fields:
79
+ # Validate the values of individual fields by chaining on an +in+.
70
80
  it {should_not have_blank_values.in('name')}
71
81
  it {should have_unique_values.in('email')}
72
82
  it {should have_values_of(['M', 'F']).in('gender')}
73
- it {should have_values_matching(/\A[^@\s]+@[^a\s]+\z/).in('email')}
83
+ it {should have_values_matching(/\A[^@\s]+@[^@\s]+\z/).in('email')}
74
84
  it {should have_values_starting_with('http://').in('url')}
75
85
  it {should have_values_ending_with('Inc.').in('company_name')}
76
86
  it {should have_integer_values.in('year')}
77
87
 
78
88
  # If you store a hash or an array of hashes in a field as a JSON string,
79
- # you can validate the values of these subfields by chaining on an +at+:
89
+ # you can validate the values of these subfields by chaining on an +at+.
80
90
  it {should have_values_of(['M', 'F']).in('extra').at('gender')}
81
91
 
82
- # Check for missing keys within subfields:
92
+ # Check for missing keys within subfields.
83
93
  it {should have_values_with_at_least_the_keys(['subfield1', 'subfield2']).in('fieldA')}
84
94
 
85
- # Check for extra keys within subfields:
95
+ # Check for extra keys within subfields.
86
96
  it {should have_values_with_at_most_the_keys(['subfield1', 'subfield2', 'subfield3', 'subfield4']).in('fieldA')}
87
97
  end
88
98
 
@@ -37,7 +37,7 @@ module ScraperWiki
37
37
  # it {should_not have_blank_values.in('name')}
38
38
  # it {should have_unique_values.in('email')}
39
39
  # it {should have_values_of(['M', 'F']).in('gender')}
40
- # it {should have_values_matching(/\A[^@\s]+@[^a\s]+\z/).in('email')}
40
+ # it {should have_values_matching(/\A[^@\s]+@[^@\s]+\z/).in('email')}
41
41
  # it {should have_values_starting_with('http://').in('url')}
42
42
  # it {should have_values_ending_with('Inc.').in('company_name')}
43
43
  # it {should have_integer_values.in('year')}
@@ -74,7 +74,7 @@ module ScraperWiki
74
74
  end
75
75
 
76
76
  def negative_failure_message
77
- failure_message
77
+ raise NotImplementerError, 'Subclasses must implement this method'
78
78
  end
79
79
  end
80
80
 
@@ -92,6 +92,10 @@ module ScraperWiki
92
92
  def failure_message
93
93
  "expected #{@actual['short_name']} to be #{@expected}"
94
94
  end
95
+
96
+ def negative_failure_message
97
+ "expected #{@actual['short_name']} to not be #{@expected}"
98
+ end
95
99
  end
96
100
  # @example
97
101
  # it {should be_public}
@@ -120,6 +124,10 @@ module ScraperWiki
120
124
  def failure_message
121
125
  "expected #{@actual['short_name']} to be editable by #{@expected}"
122
126
  end
127
+
128
+ def negative_failure_message
129
+ "expected #{@actual['short_name']} to not be editable by #{@expected}"
130
+ end
123
131
  end
124
132
  # @example
125
133
  # it {should be_editable_by 'frabcus'}
@@ -140,6 +148,14 @@ module ScraperWiki
140
148
  "expected #{@actual['short_name']} to run #{@expected}"
141
149
  end
142
150
  end
151
+
152
+ def negative_failure_message
153
+ if @expected == -1
154
+ "expected #{@actual['short_name']} to run at some time"
155
+ else
156
+ "expected #{@actual['short_name']} to not run #{@expected}"
157
+ end
158
+ end
143
159
  end
144
160
  # @example
145
161
  # it {should run(:daily)}
@@ -169,10 +185,18 @@ module ScraperWiki
169
185
  "#{@actual['short_name']} #{failure_predicate}: #{difference.join ', '}"
170
186
  end
171
187
 
188
+ def negative_failure_message
189
+ "#{@actual['short_name']} #{negative_failure_predicate}: #{difference.join ', '}"
190
+ end
191
+
172
192
  def failure_predicate
173
193
  raise NotImplementerError, 'Subclasses must implement this method'
174
194
  end
175
195
 
196
+ def negative_failure_message
197
+ raise NotImplementerError, 'Subclasses must implement this method'
198
+ end
199
+
176
200
  def difference
177
201
  raise NotImplementerError, 'Subclasses must implement this method'
178
202
  end
@@ -186,6 +210,10 @@ module ScraperWiki
186
210
  def failure_predicate
187
211
  'is missing keys'
188
212
  end
213
+
214
+ def negative_failure_predicate
215
+ "isn't missing keys"
216
+ end
189
217
  end
190
218
  # @example
191
219
  # it {should have_at_least_the_keys(['fieldA', 'fieldB']).on('swdata')}
@@ -201,6 +229,10 @@ module ScraperWiki
201
229
  def failure_predicate
202
230
  'has extra keys'
203
231
  end
232
+
233
+ def negative_failure_predicate
234
+ 'has no extra keys'
235
+ end
204
236
  end
205
237
  # @example
206
238
  # it {should have_at_most_the_keys(['fieldA', 'fieldB', 'fieldC', 'fieldD']).on('swdata')}
@@ -217,6 +249,10 @@ module ScraperWiki
217
249
  def failure_message
218
250
  "expected #{@actual['short_name']} to have #{@expected} rows, not #{@actual['datasummary']['tables'][@table]['count']}"
219
251
  end
252
+
253
+ def negative_failure_message
254
+ "expected #{@actual['short_name']} to not have #{@expected} rows"
255
+ end
220
256
  end
221
257
  # @example
222
258
  # it {should have_a_row_count_of(42).on('swdata')}
@@ -243,6 +279,10 @@ module ScraperWiki
243
279
  def failure_message
244
280
  "#{@actual['short_name']} is broken: #{exception_message}"
245
281
  end
282
+
283
+ def negative_failure_message
284
+ "#{@actual['short_name']} isn't broken: #{exception_message}"
285
+ end
246
286
  end
247
287
  # @example
248
288
  # it {should_not be_broken}
@@ -310,12 +350,16 @@ module ScraperWiki
310
350
  end
311
351
 
312
352
  def negative_failure_message
313
- failure_message
353
+ "#{failure_size} of #{items.size} #{negative_failure_description}\n#{failures.map(&:inspect).join "\n"}"
314
354
  end
315
355
 
316
356
  def failure_description
317
357
  raise NotImplementerError, 'Subclasses must implement this method'
318
358
  end
359
+
360
+ def negative_failure_description
361
+ raise NotImplementerError, 'Subclasses must implement this method'
362
+ end
319
363
  end
320
364
 
321
365
  class SetAnyOf < DatastoreMatcher
@@ -330,6 +374,10 @@ module ScraperWiki
330
374
  def failure_description
331
375
  "records didn't set any of #{@expected.join ','}"
332
376
  end
377
+
378
+ def negative_failure_description
379
+ "records set any of #{@expected.join ','}"
380
+ end
333
381
  end
334
382
  # @example
335
383
  # it {should set_any_of(['name', 'first_name', 'last_name'])}
@@ -353,15 +401,23 @@ module ScraperWiki
353
401
  if @subfield
354
402
  items.send(meth) do |item|
355
403
  if blank? item[@field]
356
- true
404
+ meth == :reject
357
405
  else
358
406
  v = Yajl::Parser.parse item[@field]
359
407
  if Hash === v
360
- v.has_key?(@subfield) && match?(v[@subfield])
408
+ if blank? v[@subfield]
409
+ meth == :reject
410
+ else
411
+ match? v[@subfield]
412
+ end
361
413
  elsif Array === v
362
414
  v.all? do |w|
363
415
  if Hash === w
364
- w.has_key?(@subfield) && match?(w[@subfield])
416
+ if blank? w[@subfield]
417
+ meth == :reject
418
+ else
419
+ match? w[@subfield]
420
+ end
365
421
  else
366
422
  raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
367
423
  end
@@ -373,7 +429,11 @@ module ScraperWiki
373
429
  end
374
430
  else
375
431
  items.send(meth) do |item|
376
- match? item[@field]
432
+ if blank? item[@field]
433
+ meth == :reject
434
+ else
435
+ match? item[@field]
436
+ end
377
437
  end
378
438
  end
379
439
  end
@@ -398,9 +458,21 @@ module ScraperWiki
398
458
  end
399
459
  end
400
460
 
461
+ def negative_failure_description
462
+ if @subfield
463
+ "#{@field}:#{@subfield} values #{negative_failure_predicate}"
464
+ else
465
+ "#{@field} values #{negative_failure_predicate}"
466
+ end
467
+ end
468
+
401
469
  def failure_predicate
402
470
  raise NotImplementerError, 'Subclasses must implement this method'
403
471
  end
472
+
473
+ def negative_failure_predicate
474
+ raise NotImplementerError, 'Subclasses must implement this method'
475
+ end
404
476
  end
405
477
 
406
478
  class HaveBlankValues < FieldMatcher
@@ -411,6 +483,10 @@ module ScraperWiki
411
483
  def failure_predicate
412
484
  'are blank'
413
485
  end
486
+
487
+ def negative_failure_predicate
488
+ 'are present'
489
+ end
414
490
  end
415
491
  # @example
416
492
  # it {should_not have_blank_values.in('name')}
@@ -420,12 +496,16 @@ module ScraperWiki
420
496
 
421
497
  class HaveValuesOf < FieldMatcher
422
498
  def match?(v)
423
- blank?(v) || @expected.include?(v)
499
+ @expected.include? v
424
500
  end
425
501
 
426
502
  def failure_predicate
427
503
  "aren't one of #{@expected.join ', '}"
428
504
  end
505
+
506
+ def negative_failure_predicate
507
+ "are one of #{@expected.join ', '}"
508
+ end
429
509
  end
430
510
  # @example
431
511
  # it {should have_values_of(['M', 'F']).in('gender')}
@@ -435,12 +515,16 @@ module ScraperWiki
435
515
 
436
516
  class HaveValuesMatching < FieldMatcher
437
517
  def match?(v)
438
- blank?(v) || v[@expected]
518
+ v[@expected]
439
519
  end
440
520
 
441
521
  def failure_predicate
442
522
  "don't match #{@expected.inspect}"
443
523
  end
524
+
525
+ def negative_failure_predicate
526
+ "match #{@expected.inspect}"
527
+ end
444
528
  end
445
529
  # @example
446
530
  # it {should have_values_matching(/\A[^@\s]+@[^a\s]+\z/).in('email')}
@@ -485,7 +569,11 @@ module ScraperWiki
485
569
  end
486
570
 
487
571
  def failure_predicate
488
- 'are not unique'
572
+ "aren't unique"
573
+ end
574
+
575
+ def negative_failure_predicate
576
+ 'are unique'
489
577
  end
490
578
  end
491
579
  # @example
@@ -496,12 +584,16 @@ module ScraperWiki
496
584
 
497
585
  class HaveValuesStartingWith < FieldMatcher
498
586
  def match?(v)
499
- blank?(v) || v.start_with?(@expected)
587
+ v.start_with? @expected
500
588
  end
501
589
 
502
590
  def failure_predicate
503
591
  "don't start with #{@expected}"
504
592
  end
593
+
594
+ def negative_failure_predicate
595
+ "start with #{@expected}"
596
+ end
505
597
  end
506
598
  # @example
507
599
  # it {should have_values_starting_with('http://').in('url')}
@@ -511,12 +603,16 @@ module ScraperWiki
511
603
 
512
604
  class HaveValuesEndingWith < FieldMatcher
513
605
  def match?(v)
514
- blank?(v) || v.end_with?(@expected)
606
+ v.end_with? @expected
515
607
  end
516
608
 
517
609
  def failure_predicate
518
610
  "don't end with #{@expected}"
519
611
  end
612
+
613
+ def negative_failure_predicate
614
+ "end with #{@expected}"
615
+ end
520
616
  end
521
617
  # @example
522
618
  # it {should have_values_ending_with('Inc.').in('company_name')}
@@ -526,12 +622,16 @@ module ScraperWiki
526
622
 
527
623
  class HaveIntegerValues < FieldMatcher
528
624
  def match?(v)
529
- blank?(v) || (Integer(v) rescue false)
625
+ Integer(v) rescue false
530
626
  end
531
627
 
532
628
  def failure_predicate
533
629
  "aren't integers"
534
630
  end
631
+
632
+ def negative_failure_predicate
633
+ 'are integers'
634
+ end
535
635
  end
536
636
  # @example
537
637
  # it {should have_integer_values.in('year')}
@@ -541,23 +641,19 @@ module ScraperWiki
541
641
 
542
642
  class FieldKeyMatcher < FieldMatcher
543
643
  def match?(v)
544
- if blank? v
545
- true
546
- else
547
- w = Yajl::Parser.parse v
548
- if Hash === w
549
- difference(w).empty?
550
- elsif Array === w
551
- w.all? do |x|
552
- if Hash === x
553
- difference(x).empty?
554
- else
555
- raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
556
- end
644
+ w = Yajl::Parser.parse v
645
+ if Hash === w
646
+ difference(w).empty?
647
+ elsif Array === w
648
+ w.all? do |x|
649
+ if Hash === x
650
+ difference(x).empty?
651
+ else
652
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
557
653
  end
558
- else
559
- raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
560
654
  end
655
+ else
656
+ raise NotImplementerError, 'Can only handle subfields that are hashes or arrays of hashes'
561
657
  end
562
658
  end
563
659
 
@@ -568,6 +664,10 @@ module ScraperWiki
568
664
  def failure_predicate
569
665
  "#{predicate}: #{difference.join ', '}"
570
666
  end
667
+
668
+ def negative_failure_predicate
669
+ "#{negative_predicate}: #{difference.join ', '}"
670
+ end
571
671
  end
572
672
 
573
673
  class HaveValuesWithAtLeastTheKeys < FieldKeyMatcher
@@ -575,8 +675,12 @@ module ScraperWiki
575
675
  @expected - v.keys
576
676
  end
577
677
 
578
- def failure_predicate
579
- 'have missing keys'
678
+ def predicate
679
+ 'are missing keys'
680
+ end
681
+
682
+ def negative_predicate
683
+ "aren't missing keys"
580
684
  end
581
685
  end
582
686
  # @example
@@ -590,9 +694,13 @@ module ScraperWiki
590
694
  v.keys - @expected
591
695
  end
592
696
 
593
- def failure_predicate
697
+ def predicate
594
698
  'have extra keys'
595
699
  end
700
+
701
+ def negative_predicate
702
+ 'have no extra keys'
703
+ end
596
704
  end
597
705
  # @example
598
706
  # it {should have_values_with_at_most_the_keys(['subfield1', 'subfield2', 'subfield3', 'subfield4']).in('fieldA')}
@@ -1,5 +1,5 @@
1
1
  module ScraperWiki
2
2
  class API
3
- VERSION = "0.0.4"
3
+ VERSION = "0.0.5"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraperwiki-api
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-28 00:00:00.000000000 Z
12
+ date: 2012-06-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yajl-ruby
16
- requirement: &70366370555240 !ruby/object:Gem::Requirement
16
+ requirement: &70265218891100 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70366370555240
24
+ version_requirements: *70265218891100
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: httparty
27
- requirement: &70366370554220 !ruby/object:Gem::Requirement
27
+ requirement: &70265218882020 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.8.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70366370554220
35
+ version_requirements: *70265218882020
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rspec
38
- requirement: &70366370553000 !ruby/object:Gem::Requirement
38
+ requirement: &70265218880500 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '2.10'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70366370553000
46
+ version_requirements: *70265218880500
47
47
  description: A Ruby wrapper for the ScraperWiki API
48
48
  email:
49
49
  - info@opennorth.ca