mida 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +4 -0
- data/README.rdoc +24 -3
- data/Rakefile +2 -1
- data/bin/mida +7 -3
- data/lib/mida/item.rb +2 -4
- data/lib/mida/vocabulary.rb +32 -10
- data/spec/item_spec.rb +57 -0
- data/spec/vocabulary_spec.rb +72 -0
- metadata +13 -2
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
== 0.3.2 (9th July 2011)
|
2
|
+
* Ensure <tt>bin/mida</tt> only searches if type regexp passed
|
3
|
+
* Allow vocabularies to be included into other vocabularies
|
4
|
+
|
1
5
|
== 0.3.1 (5th July 2011)
|
2
6
|
* Add bin/mida exectuable
|
3
7
|
* Changed <tt>Item#to_h</tt> to only return keys with values
|
data/README.rdoc
CHANGED
@@ -77,9 +77,7 @@ Mida allows you to define vocabularies, so that input data can be constrained to
|
|
77
77
|
expected patterns. By default a generic vocabulary (<tt>Mida::GenericVocabulary</tt>)
|
78
78
|
is registered which will match against any +itemtype+ with any number of properties.
|
79
79
|
|
80
|
-
If you want to specify a vocabulary you create a class derived from <tt>Mida::Vocabulary</tt
|
81
|
-
and use +itemtype+, +has_one+, +has_many+ and +extract+ to describe the vocabulary.
|
82
|
-
|
80
|
+
If you want to specify a vocabulary you create a class derived from <tt>Mida::Vocabulary</tt>.
|
83
81
|
As an example the following describes a subset of Google's Review vocabulary:
|
84
82
|
|
85
83
|
class Rating < Mida::Vocabulary
|
@@ -105,6 +103,29 @@ will only allow the specified properties and will reject any that don't have the
|
|
105
103
|
will also set <tt>Item#vocabulary</tt> accordingly, e.g.
|
106
104
|
doc.items.first.vocabulary # => Review
|
107
105
|
|
106
|
+
If you want to include the properties of another vocabulary you can use
|
107
|
+
+include_vocabulary+:
|
108
|
+
class Thing < Mida::Vocabulary
|
109
|
+
itemtype %r{http://example.com/vocab/thing}i
|
110
|
+
has_one 'name', 'description'
|
111
|
+
end
|
112
|
+
|
113
|
+
class Book < Mida::Vocabulary
|
114
|
+
itemtype %r{http://example.com/vocab/book}i
|
115
|
+
include_vocabulary Thing
|
116
|
+
has_one 'title', 'author'
|
117
|
+
end
|
118
|
+
|
119
|
+
class Collection < Mida::Vocabulary
|
120
|
+
itemtype %r{http://example.com/vocab/collection}i
|
121
|
+
has_many 'item' do
|
122
|
+
extract Thing
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
In the above if you gave a +Book+ as an item of +Collection+ this would be
|
127
|
+
accepted because it includes the +Thing+ vocabulary.
|
128
|
+
|
108
129
|
== Bugs/Feature Requests
|
109
130
|
If you find a bug or want to make a feature request, please report it at the
|
110
131
|
Mida project's {issues tracker}[https://github.com/LawrenceWoodman/mida/issues]
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ spec = Gem::Specification.new do |s|
|
|
6
6
|
s.name = "mida"
|
7
7
|
s.summary = "A Microdata parser/extractor library"
|
8
8
|
s.description = "A Microdata parser and extractor library, based on the latest published version of the Microdata Specification, dated 5th April 2011."
|
9
|
-
s.version = "0.3.
|
9
|
+
s.version = "0.3.2"
|
10
10
|
s.author = "Lawrence Woodman"
|
11
11
|
s.email = "lwoodman@vlifesystems.com"
|
12
12
|
s.homepage = %q{http://lawrencewoodman.github.com/mida/}
|
@@ -18,6 +18,7 @@ spec = Gem::Specification.new do |s|
|
|
18
18
|
s.rdoc_options << '--main' << 'README.rdoc'
|
19
19
|
s.add_dependency('nokogiri', '>= 1.5')
|
20
20
|
s.add_development_dependency('rspec', '>= 2.0' )
|
21
|
+
s.add_development_dependency('bundler')
|
21
22
|
end
|
22
23
|
Gem::PackageTask.new(spec).define
|
23
24
|
|
data/bin/mida
CHANGED
@@ -54,7 +54,7 @@ ARGV.options do |option|
|
|
54
54
|
begin
|
55
55
|
option.parse!
|
56
56
|
rescue OptionParser::InvalidOption => error
|
57
|
-
puts "#{error}\n#{option}"; exit
|
57
|
+
puts "#{error.to_s.capitalize}\n#{option}"; exit
|
58
58
|
end
|
59
59
|
if ARGV.empty? then puts option; exit end
|
60
60
|
end
|
@@ -85,8 +85,12 @@ def parse_source(source, options)
|
|
85
85
|
begin
|
86
86
|
open(source) do |f|
|
87
87
|
doc = Mida::Document.new(f, url, options[:validate])
|
88
|
-
|
89
|
-
|
88
|
+
items = if options[:type]
|
89
|
+
doc.search(options[:type])
|
90
|
+
else
|
91
|
+
doc.items
|
92
|
+
end
|
93
|
+
|
90
94
|
if items.empty?
|
91
95
|
puts "No microdata found in this document."; exit
|
92
96
|
else
|
data/lib/mida/item.rb
CHANGED
@@ -140,10 +140,8 @@ module Mida
|
|
140
140
|
# Returns whether the +itemtype+ is a valid type
|
141
141
|
def valid_itemtype?(valid_types, itemtype)
|
142
142
|
return true if valid_types.include?(:any)
|
143
|
-
|
144
|
-
valid_types.find
|
145
|
-
type.respond_to?(:itemtype) && type.itemtype =~ itemtype
|
146
|
-
end
|
143
|
+
vocabulary = Vocabulary.find(itemtype)
|
144
|
+
valid_types.find {|type| vocabulary.kind_of?(type) }
|
147
145
|
end
|
148
146
|
|
149
147
|
# Returns the extracted value or +nil+ if none of the datatypes
|
data/lib/mida/vocabulary.rb
CHANGED
@@ -8,15 +8,11 @@ module Mida
|
|
8
8
|
class Vocabulary
|
9
9
|
|
10
10
|
class << self
|
11
|
-
# Return the properties specification
|
12
|
-
attr_reader :properties
|
13
|
-
|
14
11
|
# Return the registered vocabularies
|
15
12
|
attr_reader :vocabularies
|
16
13
|
end
|
17
14
|
|
18
15
|
@vocabularies = Set.new
|
19
|
-
@properties = {}
|
20
16
|
|
21
17
|
# Register a vocabulary that can be used when parsing,
|
22
18
|
# later vocabularies are given precedence over earlier ones
|
@@ -41,14 +37,40 @@ module Mida
|
|
41
37
|
register(subclass)
|
42
38
|
end
|
43
39
|
|
40
|
+
# Return the properties specification
|
41
|
+
def self.properties
|
42
|
+
@properties ||= {}
|
43
|
+
end
|
44
|
+
|
45
|
+
# Return the included vocabularies
|
46
|
+
def self.included_vocabularies
|
47
|
+
@included_vocabularies ||= Set.new
|
48
|
+
end
|
49
|
+
|
50
|
+
# Include the properties from the specified <tt>vocabularies</tt>.
|
51
|
+
# This is the correct way to inherit properties from another vocabulary,
|
52
|
+
# rather than subclassing.
|
53
|
+
def self.include_vocabulary(*vocabularies)
|
54
|
+
vocabularies.each do |vocabulary|
|
55
|
+
included_vocabularies.merge(vocabulary.included_vocabularies)
|
56
|
+
included_vocabularies << vocabulary
|
57
|
+
properties.merge!(vocabulary.properties)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# As per the standard <tt>kind_of?</tt>, but also checks to see if vocabulary has
|
62
|
+
# been included by self
|
63
|
+
def self.kind_of?(vocabulary)
|
64
|
+
return true if self == vocabulary
|
65
|
+
return true if self.ancestors.include?(vocabulary)
|
66
|
+
included_vocabularies.include?(vocabulary)
|
67
|
+
end
|
68
|
+
|
44
69
|
# Sets the regular expression to match against the +itemtype+
|
45
70
|
# or returns the current regular expression
|
46
|
-
def self.itemtype(
|
47
|
-
|
48
|
-
|
49
|
-
else
|
50
|
-
@itemtype
|
51
|
-
end
|
71
|
+
def self.itemtype(regexp=nil)
|
72
|
+
return @itemtype unless regexp
|
73
|
+
@itemtype = regexp
|
52
74
|
end
|
53
75
|
|
54
76
|
|
data/spec/item_spec.rb
CHANGED
@@ -257,6 +257,63 @@ describe Mida::Item, 'when initialized with an itemscope containing another corr
|
|
257
257
|
|
258
258
|
end
|
259
259
|
|
260
|
+
describe Mida::Item, 'when initialized with an itemscope that has a property type that is a child of the specified type' do
|
261
|
+
before do
|
262
|
+
class Person < Mida::Vocabulary
|
263
|
+
itemtype %r{http://example.com/vocab/person}
|
264
|
+
has_one 'name'
|
265
|
+
has_many 'tel'
|
266
|
+
end
|
267
|
+
|
268
|
+
class Student < Mida::Vocabulary
|
269
|
+
itemtype %r{http://example.com/vocab/student}
|
270
|
+
include_vocabulary Person
|
271
|
+
has_one 'studying'
|
272
|
+
end
|
273
|
+
|
274
|
+
class Organization < Mida::Vocabulary
|
275
|
+
itemtype %r{http://example.com/vocab/organization}
|
276
|
+
has_one 'name'
|
277
|
+
has_many 'employee' do
|
278
|
+
extract Person
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
student_itemscope = mock(Mida::Itemscope)
|
283
|
+
student_itemscope.stub!(:kind_of?).any_number_of_times.with(Mida::Itemscope).and_return(true)
|
284
|
+
student_itemscope.stub!(:type).and_return("http://example.com/vocab/student")
|
285
|
+
student_itemscope.stub!(:id).and_return(nil)
|
286
|
+
student_itemscope.stub!(:properties).and_return(
|
287
|
+
{ 'name' => ['Lorry Woodman'],
|
288
|
+
'tel' => ['000004847582'],
|
289
|
+
'studying' => ['Classics']
|
290
|
+
}
|
291
|
+
)
|
292
|
+
|
293
|
+
org_itemscope = mock(Mida::Itemscope)
|
294
|
+
org_itemscope.stub!(:kind_of?).any_number_of_times.with(Mida::Itemscope).and_return(true)
|
295
|
+
org_itemscope.stub!(:type).and_return("http://example.com/vocab/organization")
|
296
|
+
org_itemscope.stub!(:id).and_return(nil)
|
297
|
+
org_itemscope.stub!(:properties).and_return(
|
298
|
+
{ 'name' => ['Acme Inc.'],
|
299
|
+
'employee' => [student_itemscope]
|
300
|
+
}
|
301
|
+
)
|
302
|
+
@item = Mida::Item.new(org_itemscope)
|
303
|
+
end
|
304
|
+
|
305
|
+
it 'should recognise an itemtype that is the child of that specified' do
|
306
|
+
@item.properties['employee'][0].vocabulary.should == Student
|
307
|
+
@item.properties['employee'][0].type.should == 'http://example.com/vocab/student'
|
308
|
+
@item.properties['employee'][0].properties.should == {
|
309
|
+
'name' => 'Lorry Woodman',
|
310
|
+
'tel' => ['000004847582'],
|
311
|
+
'studying' => 'Classics'
|
312
|
+
}
|
313
|
+
end
|
314
|
+
|
315
|
+
end
|
316
|
+
|
260
317
|
describe Mida::Item, 'when initialized with an itemscope containing another invalid itemscope' do
|
261
318
|
before do
|
262
319
|
class Person < Mida::Vocabulary
|
data/spec/vocabulary_spec.rb
CHANGED
@@ -126,6 +126,10 @@ describe Mida::Vocabulary, 'when subclassed' do
|
|
126
126
|
Mida::Vocabulary.vocabularies.should include(Person)
|
127
127
|
end
|
128
128
|
|
129
|
+
it '#included_vocabularies should be empty' do
|
130
|
+
Person.included_vocabularies.empty?.should be_true
|
131
|
+
end
|
132
|
+
|
129
133
|
end
|
130
134
|
|
131
135
|
describe Mida::Vocabulary, 'when subclassed and has no properties' do
|
@@ -146,3 +150,71 @@ describe Mida::Vocabulary, 'when subclassed and has no properties' do
|
|
146
150
|
end
|
147
151
|
|
148
152
|
end
|
153
|
+
|
154
|
+
describe Mida::Vocabulary, 'when subclassed and using #include_vocabulary' do
|
155
|
+
before do
|
156
|
+
class Thing < Mida::Vocabulary
|
157
|
+
itemtype %r{http://example\.com.*?thing$}i
|
158
|
+
has_one 'description'
|
159
|
+
end
|
160
|
+
|
161
|
+
class Product < Mida::Vocabulary
|
162
|
+
include_vocabulary Thing
|
163
|
+
itemtype %r{http://example\.com.*?product$}i
|
164
|
+
has_one 'make', 'model'
|
165
|
+
has_many 'addons'
|
166
|
+
end
|
167
|
+
|
168
|
+
class Vehicle < Mida::Vocabulary
|
169
|
+
itemtype %r{http://example\.com.*?thing$}i
|
170
|
+
include_vocabulary Product
|
171
|
+
has_one 'colour'
|
172
|
+
end
|
173
|
+
|
174
|
+
class Car < Mida::Vocabulary
|
175
|
+
include_vocabulary Product, Vehicle
|
176
|
+
itemtype %r{http://example\.com.*?car$}i
|
177
|
+
has_one 'engine'
|
178
|
+
has_many 'stickers'
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
it '#itemtype should return the new regexp' do
|
183
|
+
Car.itemtype.should == %r{http://example\.com.*?car$}i
|
184
|
+
end
|
185
|
+
|
186
|
+
it "should contain included vocabularies' properties" do
|
187
|
+
['description', 'make','model', 'colour'].each do
|
188
|
+
|prop| Car.properties[prop][:num].should == :one
|
189
|
+
end
|
190
|
+
Car.properties['addons'][:num].should == :many
|
191
|
+
end
|
192
|
+
|
193
|
+
it "should contain new properties" do
|
194
|
+
Car.properties['engine'][:num].should == :one
|
195
|
+
Car.properties['stickers'][:num].should == :many
|
196
|
+
end
|
197
|
+
|
198
|
+
it '#included_vocabularies should return the included vocabularies' do
|
199
|
+
[Thing, Product, Vehicle].each do |vocab|
|
200
|
+
Car.included_vocabularies.should include(vocab)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
it '.kind_of? should still work with plain Vocabulary' do
|
205
|
+
Car.kind_of?(Mida::Vocabulary).should be_true
|
206
|
+
end
|
207
|
+
|
208
|
+
it '.kind_of? should recognize included vocabularies' do
|
209
|
+
Car.kind_of?(Car).should be_true
|
210
|
+
Car.kind_of?(Vehicle).should be_true
|
211
|
+
Vehicle.kind_of?(Product).should be_true
|
212
|
+
Car.kind_of?(Product).should be_true
|
213
|
+
Car.kind_of?(Thing).should be_true
|
214
|
+
end
|
215
|
+
|
216
|
+
it '.kind_of? should recognize vocabularies without a relationship' do
|
217
|
+
Vehicle.kind_of?(Car).should be_false
|
218
|
+
Thing.kind_of?(Product).should be_false
|
219
|
+
end
|
220
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: mida
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.3.
|
5
|
+
version: 0.3.2
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Lawrence Woodman
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-07-
|
13
|
+
date: 2011-07-09 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -34,6 +34,17 @@ dependencies:
|
|
34
34
|
version: "2.0"
|
35
35
|
type: :development
|
36
36
|
version_requirements: *id002
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: bundler
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id003
|
37
48
|
description: A Microdata parser and extractor library, based on the latest published version of the Microdata Specification, dated 5th April 2011.
|
38
49
|
email: lwoodman@vlifesystems.com
|
39
50
|
executables:
|