mida 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ require 'mida/datatype'
2
+
3
+ describe Mida::DataType::Text do
4
+
5
+ it '#extract should return an empty string if an empty string passed' do
6
+ value = ''
7
+ Mida::DataType::Text.extract(value).should == value
8
+ end
9
+
10
+ it '#extract should return the input value' do
11
+ value = 'A Test Value'
12
+ Mida::DataType::Text.extract(value).should == value
13
+ end
14
+ end
@@ -1,5 +1,5 @@
1
- require_relative 'spec_helper'
2
- require_relative '../lib/mida'
1
+ require 'spec_helper'
2
+ require 'mida'
3
3
 
4
4
  def test_parsing(md, vocabulary, expected_results)
5
5
  items = md.search(vocabulary)
@@ -40,292 +40,45 @@ shared_examples_for 'one root itemscope' do
40
40
  end
41
41
  end
42
42
 
43
- describe Mida::Document, 'when run with a document containing textContent and non textContent itemprops' do
44
- before do
45
- @html = '
46
- <html>
47
- <head itemscope>
48
- <link itemprop="link_field" rel="stylesheet" type="text/css" href="stylesheet.css" />
49
- </head>
50
- <body>
51
- There is some text here
52
- <div>
53
- and also some here
54
- <div itemscope>
55
- <span itemprop="span_field">Some span content</span>
56
- <time itemprop="dtreviewed" datetime="2009-01-06">Jan 6</time>.
57
- <meta itemprop="meta_field" content="Some meta content">
58
- <a itemprop="a_field1" href="http://example.com">non content</a>
59
- <a itemprop="a_field2" href="welcome/index.html">non content</a>
60
- <a itemprop="a_field3" href="/intro">non content</a>
61
- <a itemprop="a_field4" href="/intro/index.html">non content</a>
62
- <map name="somemap">
63
- <area shape="rect" coords="0,0,50,120" href="left.html" />
64
- <area itemprop="area_right" shape="rect" coords="51,0,120,120" href="right.html" />
65
- </map>
66
- <audio itemprop="audio_field" src="asound.ogg" controls="controls">
67
- Audio tag not supported by your browser.
68
- </audio>
69
-
70
- <embed itemprop="embed_field" src="helloworld.swf" />
71
- <iframe itemprop="iframe_field" src="http://www.example.com/iframe_test"></iframe>
72
- <img itemprop="img_field" src="animage.png" width="120" height="120" usemap="#planetmap" />
73
- <object itemprop="object_field" data="object.png" type="image/png" />
74
- <audio controls="controls">
75
- <source itemprop="source_field" src="song.ogg" type="audio/ogg" />
76
- <track itemprop="track_field" src="atrack.ogg" />
77
- Audio tag not supported by your browser.
78
- </audio>
79
- <video itemprop="video_field" src="movie.ogg" controls="controls">
80
- Video tag not supported by your browser.
81
- </video>
82
- </div>
83
- </div>
84
- </body>
85
- </html>
86
- '
87
- end
88
-
89
-
90
- context 'when not given a page_url' do
91
- before do
92
- @md = Mida::Document.new(@html)
93
- end
94
-
95
- it 'should return all the properties and types with the correct values' do
96
- expected_results = [
97
- { vocabulary: Mida::Vocabulary::Generic,
98
- type: nil, id: nil, properties: {'link_field' => ['']} },
99
- { vocabulary: Mida::Vocabulary::Generic,
100
- type: nil,
101
- id: nil,
102
- properties: {
103
- 'span_field' => ['Some span content'],
104
- 'dtreviewed' => ['2009-01-06'],
105
- 'meta_field' => ['Some meta content'],
106
- 'a_field1' => ['http://example.com'],
107
- 'a_field2' => [''],
108
- 'a_field3' => [''],
109
- 'a_field4' => [''],
110
- 'area_right' => [''],
111
- 'audio_field' => [''],
112
- 'embed_field' => [''],
113
- 'iframe_field' => ['http://www.example.com/iframe_test'],
114
- 'img_field' => [''],
115
- 'object_field' => [''],
116
- 'source_field' => [''],
117
- 'track_field' => [''],
118
- 'video_field' => ['']
119
- }
120
- }
121
- ]
122
-
123
- test_parsing(@md, %r{}, expected_results)
124
-
125
- end
126
- end
127
-
128
- context 'when given a page_url' do
129
- before do
130
- @md = Mida::Document.new(@html, 'http://example.com/start/')
131
- end
132
-
133
- it 'should return all the properties and types with the correct values' do
134
- expected_results = [
135
- { vocabulary: Mida::Vocabulary::Generic,
136
- type: nil, id: nil, properties: {
137
- 'link_field' => ['http://example.com/start/stylesheet.css']
138
- }
139
- },
140
- { vocabulary: Mida::Vocabulary::Generic,
141
- type: nil,
142
- id: nil,
143
- properties: {
144
- 'span_field' => ['Some span content'],
145
- 'dtreviewed' => ['2009-01-06'],
146
- 'meta_field' => ['Some meta content'],
147
- 'a_field1' => ['http://example.com'],
148
- 'a_field2' => ['http://example.com/start/welcome/index.html'],
149
- 'a_field3' => ['http://example.com/intro'],
150
- 'a_field4' => ['http://example.com/intro/index.html'],
151
- 'area_right' => ['http://example.com/start/right.html'],
152
- 'audio_field' => ['http://example.com/start/asound.ogg'],
153
- 'embed_field' => ['http://example.com/start/helloworld.swf'],
154
- 'iframe_field' => ['http://www.example.com/iframe_test'],
155
- 'img_field' => ['http://example.com/start/animage.png'],
156
- 'object_field' => ['http://example.com/start/object.png'],
157
- 'source_field' => ['http://example.com/start/song.ogg'],
158
- 'track_field' => ['http://example.com/start/atrack.ogg'],
159
- 'video_field' => ['http://example.com/start/movie.ogg']
160
- }
161
- }
162
- ]
163
-
164
- test_parsing(@md, %r{}, expected_results)
165
- end
166
- end
167
-
168
- end
169
-
170
- describe Mida::Document, 'when run against a full html document containing one itemscope with no itemtype' do
171
-
43
+ describe Mida::Document do
172
44
  before do
173
45
  html = '
174
46
  <html><body>
175
47
  There is some text here
176
48
  <div>
177
49
  and also some here
178
- <div itemscope>
50
+ <div itemscope itemtype="http://data-vocabulary.org/Review">
179
51
  <span itemprop="itemreviewed">Romeo Pizza</span>
180
- Reviewed by <span itemprop="reviewer">Ulysses Grant</span> on
181
- <time itemprop="dtreviewed" datetime="2009-01-06">Jan 6</time>.
182
- <meta itemprop="fielda" content="a5482">
183
-
184
- <span itemprop="summary">Delicious, tasty pizza in Eastlake!</span>
185
- <span itemprop="description">This is a very nice pizza place.</span>
186
52
  Rating: <span itemprop="rating">4.5</span>
187
53
  </div>
188
- </div>
189
- </body></html>
190
- '
191
- @md = Mida::Document.new(html)
192
-
193
- end
194
-
195
- it_should_behave_like 'one root itemscope'
196
-
197
- it 'should return all the properties and types with the correct values' do
198
- expected_results = [{
199
- vocabulary: Mida::Vocabulary::Generic,
200
- type: nil,
201
- id: nil,
202
- properties: {
203
- 'itemreviewed' => ['Romeo Pizza'],
204
- 'reviewer' => ['Ulysses Grant'],
205
- 'dtreviewed' => ['2009-01-06'],
206
- 'fielda' => ['a5482'],
207
- 'summary' => ['Delicious, tasty pizza in Eastlake!'],
208
- 'description' => ['This is a very nice pizza place.'],
209
- 'rating' => ['4.5']
210
- }
211
- }]
212
-
213
- test_parsing(@md, %r{}, expected_results)
214
- end
215
-
216
- end
217
-
218
- describe Mida::Document, 'when run against a full html document containing one itemscope nested within another' do
219
-
220
- before do
221
- html = '
222
- <html><body>
223
- There is some text here
224
- <div>
225
- and also some here
226
- <div itemscope>
227
- <span itemprop="itemreviewed">Romeo Pizza</span>
228
- <div itemprop="address" itemscope>
229
- <span itemprop="firstline">237 Italian Way</span>
230
- <span itemprop="country">United Kingdom</span>
231
- </div>
232
- Rating: <span itemprop="rating">4.5</span>
54
+ <div itemscope itemtype="http://data-vocabulary.org/Organization">
55
+ <span itemprop="name">An org name</span>
56
+ <span itemprop="url">http://example.com</span>
233
57
  </div>
234
58
  </div>
235
59
  </body></html>
236
60
  '
237
61
 
238
62
  @md = Mida::Document.new(html)
239
-
240
63
  end
241
64
 
242
- it_should_behave_like 'one root itemscope'
243
-
244
- it 'should return all the properties and types with the correct values' do
245
- expected_results = [{
246
- vocabulary: Mida::Vocabulary::Generic,
247
- type: nil,
248
- id: nil,
249
- properties: {
250
- 'itemreviewed' => ['Romeo Pizza'],
251
- 'address' => [{
252
- vocabulary: Mida::Vocabulary::Generic,
253
- type: nil, id: nil, properties: {
254
- 'firstline' => ['237 Italian Way'],
255
- 'country' => ['United Kingdom']
256
- }
257
- }],
258
- 'rating' => ['4.5']
259
- }
260
- }]
261
-
262
- test_parsing(@md, %r{}, expected_results)
263
- end
264
-
265
- end
266
-
267
- describe Mida::Document, 'when run against a full html document containing one itemscope nested within another within another' do
268
-
269
- before do
270
- html = '
271
- <html><body>
272
- There is some text here
273
- <div>
274
- and also some here
275
- <div itemscope>
276
- <span itemprop="itemreviewed">Romeo Pizza</span>
277
- <div itemprop="address" itemscope>
278
- <div itemprop="firstline" itemscope>
279
- <span itemprop="number">237</span>
280
- <span itemprop="road">Italian Way</span>
281
- </div>
282
- <span itemprop="country">United Kingdom</span>
283
- </div>
284
- Rating: <span itemprop="rating">4.5</span>
285
- </div>
286
- </div>
287
- </body></html>
288
- '
289
-
290
- @md = Mida::Document.new(html)
65
+ it '#each should pass each item to the block' do
66
+ item_num = 0
67
+ @md.each {|item| item.should == @md.items[item_num]; item_num += 1}
291
68
  end
292
69
 
293
- it_should_behave_like 'one root itemscope'
70
+ it 'should have access to the Enumerable mixin methods such as #find' do
71
+ review = @md.find {|item| item.type == 'http://data-vocabulary.org/Review'}
72
+ review.type.should == 'http://data-vocabulary.org/Review'
73
+ review.properties['itemreviewed'].should == ["Romeo Pizza"]
294
74
 
295
- it 'should return all the properties and types with the correct values' do
296
- expected_results = [{
297
- vocabulary: Mida::Vocabulary::Generic,
298
- type: nil,
299
- id: nil,
300
- properties: {
301
- 'itemreviewed' => ['Romeo Pizza'],
302
- 'address' => [{
303
- vocabulary: Mida::Vocabulary::Generic,
304
- type: nil,
305
- id: nil,
306
- properties: {
307
- 'firstline' => [{
308
- vocabulary: Mida::Vocabulary::Generic,
309
- type: nil,
310
- id: nil,
311
- properties: {
312
- 'number' => ['237'],
313
- 'road' => ['Italian Way']
314
- },
315
- }],
316
- 'country' => ['United Kingdom']
317
- },
318
- }],
319
- 'rating' => ['4.5']
320
- }
321
- }]
322
-
323
- test_parsing(@md, %r{^$}, expected_results)
75
+ organization = @md.find {|item| item.type == 'http://data-vocabulary.org/Organization'}
76
+ organization.type.should == 'http://data-vocabulary.org/Organization'
77
+ organization.properties['name'].should == ["An org name"]
324
78
  end
325
-
326
79
  end
327
80
 
328
- describe Mida::Document, 'when run against a full html document containing one itemscope with an itemtype' do
81
+ describe Mida::Document, 'when run against a full html document containing itemscopes with and without itemtypes' do
329
82
 
330
83
  before do
331
84
  html = '
@@ -335,12 +88,12 @@ describe Mida::Document, 'when run against a full html document containing one i
335
88
  and also some here
336
89
  <div itemscope itemtype="http://data-vocabulary.org/Review">
337
90
  <span itemprop="itemreviewed">Romeo Pizza</span>
338
- Reviewed by <span itemprop="reviewer">Ulysses Grant</span> on
339
- <time itemprop="dtreviewed" datetime="2009-01-06">Jan 6</time>.
340
- <span itemprop="summary">Delicious, tasty pizza in Eastlake!</span>
341
- <span itemprop="description">This is a very nice pizza place.</span>
342
91
  Rating: <span itemprop="rating">4.5</span>
343
92
  </div>
93
+ <div itemscope>
94
+ <span itemprop="name">An org name</span>
95
+ <span itemprop="url">http://example.com</span>
96
+ </div>
344
97
  </div>
345
98
  </body></html>
346
99
  '
@@ -349,33 +102,17 @@ describe Mida::Document, 'when run against a full html document containing one i
349
102
 
350
103
  end
351
104
 
352
- it_should_behave_like 'one root itemscope'
353
-
354
- it 'should find the correct number of itemscopes if outer specified' do
355
- @md.search(%r{http://data-vocabulary.org/Review}).size.should == 1
356
- end
357
-
358
- it 'should specify the correct type' do
359
- @md.search(%r{http://data-vocabulary.org/Review}).first.type.should == 'http://data-vocabulary.org/Review'
105
+ it '#search should be able to match against items without an itemtype' do
106
+ items = @md.search(%r{^$})
107
+ items.size.should == 1
108
+ items[0].properties['name'].should == ['An org name']
360
109
  end
361
110
 
362
- it 'should return all the properties and types with the correct values' do
363
- expected_results = [{
364
- vocabulary: Mida::Vocabulary::Generic,
365
- type: 'http://data-vocabulary.org/Review',
366
- id: nil,
367
- properties: {
368
- 'itemreviewed' => ['Romeo Pizza'],
369
- 'reviewer' => ['Ulysses Grant'],
370
- 'dtreviewed' => ['2009-01-06'],
371
- 'summary' => ['Delicious, tasty pizza in Eastlake!'],
372
- 'description' => ['This is a very nice pizza place.'],
373
- 'rating' => ['4.5']
374
- }
375
- }]
376
- test_parsing(@md, %r{http://data-vocabulary.org/Review}, expected_results)
111
+ it '#search should be able to match against items with an itemtype' do
112
+ items = @md.search(%r{^.+$})
113
+ items.size.should == 1
114
+ items[0].type.should == 'http://data-vocabulary.org/Review'
377
115
  end
378
-
379
116
  end
380
117
 
381
118
  describe Mida::Document, 'when run against a full html document containing two non-nested itemscopes with itemtypes' do
@@ -423,7 +160,6 @@ describe Mida::Document, 'when run against a full html document containing two n
423
160
 
424
161
  it 'should return all the properties and types with the correct values for 1st itemscope' do
425
162
  expected_results = [{
426
- vocabulary: Mida::Vocabulary::Generic,
427
163
  type: 'http://data-vocabulary.org/Review',
428
164
  id: nil,
429
165
  properties: {
@@ -436,7 +172,6 @@ describe Mida::Document, 'when run against a full html document containing two n
436
172
 
437
173
  it 'should return all the properties from the text for 2nd itemscope' do
438
174
  expected_results = [{
439
- vocabulary: Mida::Vocabulary::Generic,
440
175
  type: 'http://data-vocabulary.org/Organization',
441
176
  id: nil,
442
177
  properties: {
@@ -482,19 +217,18 @@ describe Mida::Document, 'when run against a full html document containing one
482
217
  %r{http://data-vocabulary.org/Review-aggregate}
483
218
  ]
484
219
  vocabularies.each {|vocabulary| @md.search(vocabulary).size.should == 1}
220
+
485
221
  end
486
222
 
487
223
  context "when looking at the outer vocabulary" do
488
224
  it 'should return all the properties from the text with the correct values' do
489
225
  expected_results = [{
490
- vocabulary: Mida::Vocabulary::Generic,
491
226
  type: 'http://data-vocabulary.org/Product',
492
227
  id: nil,
493
228
  properties: {
494
229
  'name' => ['DC07'],
495
230
  'brand' => ['Dyson'],
496
231
  'review' => [{
497
- vocabulary: Mida::Vocabulary::Generic,
498
232
  type: 'http://data-vocabulary.org/Review-aggregate',
499
233
  id: nil,
500
234
  properties: {
@@ -572,193 +306,3 @@ describe Mida::Document, 'when run against a document containing an itemscope
572
306
  end
573
307
  end
574
308
  end
575
-
576
- describe Mida::Document, 'when run against a document using itemrefs' do
577
-
578
- before do
579
- html = '
580
- <html><body>
581
- <div itemscope id="amanda" itemref="a b">
582
- <span itemprop="age">30</span>
583
- </div>
584
- <p id="a">Name: <span itemprop="name">Amanda</span></p>
585
- <div id="b" itemprop="band" itemscope itemref="c"></div>
586
- <div id="c">
587
- <p>Band: <span itemprop="name">Jazz Band</span></p>
588
- <p>Size: <span itemprop="size">12</span> players</p>
589
- </div>
590
- </body></html>
591
- '
592
-
593
- @md = Mida::Document.new(html)
594
- end
595
-
596
- it 'should return all the properties from the text with the correct values' do
597
- expected_results = [{
598
- vocabulary: Mida::Vocabulary::Generic,
599
- type: nil,
600
- id: nil,
601
- properties: {
602
- 'name' => ['Amanda'],
603
- 'band' => [{
604
- vocabulary: Mida::Vocabulary::Generic,
605
- type: nil,
606
- id: nil,
607
- properties: {
608
- 'name' => ['Jazz Band'],
609
- 'size' => ['12']
610
- }
611
- }],
612
- 'age' => ['30']
613
- }
614
- }]
615
-
616
- test_parsing(@md, %r{}, expected_results)
617
- end
618
- end
619
-
620
- describe Mida::Document, 'when run against a document using multiple itemprops with the same name' do
621
-
622
- before do
623
- html = '
624
- <html><body>
625
- <div itemscope itemtype="icecreams">
626
- <p>Flavours in my favourite ice cream:</p>
627
- <ul>
628
- <li itemprop="flavour">Lemon sorbet</li>
629
- <li itemprop="flavour">Apricot sorbet</li>
630
- <li itemprop="flavour" itemscope itemtype="icecream-type">
631
- <span itemprop="fruit">Strawberry</span>
632
- <span itemprop="style">Homemade</span>
633
- </li>
634
- </ul>
635
- </div>
636
- </body></html>
637
- '
638
-
639
- @md = Mida::Document.new(html)
640
- end
641
-
642
- it_should_behave_like 'one root itemscope'
643
-
644
- it 'should return the correct number of itemscopes' do
645
- vocabularies = [
646
- %r{icecreams},
647
- %r{icecream-type}
648
- ]
649
- vocabularies.each {|vocabulary| @md.search(vocabulary).size.should == 1}
650
- end
651
-
652
- it 'should return all the properties from the text with the correct values' do
653
- expected_results = [{
654
- vocabulary: Mida::Vocabulary::Generic,
655
- type: 'icecreams',
656
- id: nil,
657
- properties: {
658
- 'flavour' => [
659
- 'Lemon sorbet',
660
- 'Apricot sorbet',
661
- { vocabulary: Mida::Vocabulary::Generic,
662
- type: 'icecream-type',
663
- id: nil,
664
- properties: {
665
- 'fruit' => ['Strawberry'],
666
- 'style' => ['Homemade']
667
- }
668
- }
669
- ]
670
- }
671
- }]
672
-
673
- test_parsing(@md, %r{icecreams}, expected_results)
674
- end
675
- end
676
-
677
- describe Mida::Document, 'when run against a document using an itemprop with multiple properties' do
678
-
679
- before do
680
- html = '
681
- <html><body>
682
- <div itemscope>
683
- <span itemprop="favourite-colour favourite-fruit">orange</span>
684
- </div>
685
- </body></html>
686
- '
687
-
688
- @md = Mida::Document.new(html)
689
- end
690
-
691
- it 'should return all the properties from the text with the correct values' do
692
- expected_results = [{
693
- vocabulary: Mida::Vocabulary::Generic,
694
- type: nil,
695
- id: nil,
696
- properties: {
697
- 'favourite-colour' => ['orange'],
698
- 'favourite-fruit' => ['orange']
699
- }
700
- }]
701
-
702
- test_parsing(@md, %r{}, expected_results)
703
- end
704
- end
705
-
706
- describe Mida::Document, 'when run against a full html document containing an itemtype that matches a registered vocabulary' do
707
-
708
- before do
709
- html = '
710
- <html><body>
711
- There is some text here
712
- <div>
713
- and also some here
714
- <div itemscope itemtype="http://data-vocabulary.org/Review">
715
- <span itemprop="itemreviewed">Romeo Pizza</span>
716
- Reviewed by <span itemprop="reviewer">Ulysses Grant</span> on
717
- <time itemprop="dtreviewed" datetime="2009-01-06">Jan 6</time>.
718
- <span itemprop="summary">Delicious, tasty pizza in Eastlake!</span>
719
- <span itemprop="description">This is a very nice pizza place.</span>
720
- Rating: <span itemprop="rating">4.5</span>
721
- </div>
722
- </div>
723
- </body></html>
724
- '
725
-
726
- class Review < Mida::VocabularyDesc
727
- itemtype %r{http://data-vocabulary.org/Review}
728
- has_one 'itemreviewed', 'reviewer', 'dtreviewed', 'summary'
729
- has_one 'rating', 'description'
730
- end
731
- Mida::Vocabulary.register(Review)
732
-
733
- @md = Mida::Document.new(html)
734
-
735
- end
736
-
737
- it_should_behave_like 'one root itemscope'
738
-
739
- it '#search should match against Review' do
740
- @md.search(Review).size.should == 1
741
- end
742
-
743
- it 'should specify the correct type' do
744
- @md.search(Review).first.type.should == 'http://data-vocabulary.org/Review'
745
- end
746
-
747
- it 'should return all the properties and types with the correct values' do
748
- expected_results = [{
749
- vocabulary: Review,
750
- type: 'http://data-vocabulary.org/Review',
751
- id: nil,
752
- properties: {
753
- 'itemreviewed' => ['Romeo Pizza'],
754
- 'reviewer' => ['Ulysses Grant'],
755
- 'dtreviewed' => ['2009-01-06'],
756
- 'summary' => ['Delicious, tasty pizza in Eastlake!'],
757
- 'description' => ['This is a very nice pizza place.'],
758
- 'rating' => ['4.5']
759
- }
760
- }]
761
- test_parsing(@md, Review, expected_results)
762
- end
763
-
764
- end