mida 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +18 -2
- data/README.rdoc +22 -11
- data/Rakefile +2 -2
- data/lib/mida.rb +1 -1
- data/lib/mida/datatype.rb +15 -0
- data/lib/mida/datatype/boolean.rb +18 -0
- data/lib/mida/datatype/float.rb +15 -0
- data/lib/mida/datatype/integer.rb +15 -0
- data/lib/mida/datatype/iso8601date.rb +17 -0
- data/lib/mida/datatype/number.rb +15 -0
- data/lib/mida/datatype/text.rb +13 -0
- data/lib/mida/document.rb +21 -19
- data/lib/mida/genericvocabulary.rb +13 -0
- data/lib/mida/item.rb +83 -71
- data/lib/mida/itemprop.rb +55 -30
- data/lib/mida/itemscope.rb +82 -0
- data/lib/mida/propertydesc.rb +36 -0
- data/lib/mida/vocabulary.rb +60 -6
- data/spec/datatype/boolean_spec.rb +27 -0
- data/spec/datatype/float_spec.rb +23 -0
- data/spec/datatype/integer_spec.rb +23 -0
- data/spec/datatype/iso8601date_spec.rb +20 -0
- data/spec/datatype/number_spec.rb +23 -0
- data/spec/datatype/text_spec.rb +14 -0
- data/spec/document_spec.rb +31 -487
- data/spec/item_spec.rb +163 -472
- data/spec/itemprop_spec.rb +40 -45
- data/spec/itemscope_spec.rb +287 -0
- data/spec/propertydesc_spec.rb +56 -0
- data/spec/spec_helper.rb +13 -36
- data/spec/vocabulary_spec.rb +148 -0
- metadata +22 -6
- data/lib/mida/vocabulary/generic.rb +0 -15
- data/lib/mida/vocabularydesc.rb +0 -57
- data/spec/vocabularydesc_spec.rb +0 -106
data/lib/mida/itemprop.rb
CHANGED
@@ -1,20 +1,11 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'uri'
|
3
|
+
require 'mida/itemscope'
|
3
4
|
|
4
5
|
module Mida
|
5
6
|
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
# Returns a Hash representing the property.
|
10
|
-
# Hash is of the form {'property name' => 'value'}
|
11
|
-
# [element] The itemprop element to be parsed
|
12
|
-
# [page_url] The url of the page, including the filename, used to form absolute urls
|
13
|
-
def self.parse(element, page_url=nil)
|
14
|
-
extract_property_names(element).each_with_object({}) do |name, memo|
|
15
|
-
memo[name] = extract_property(element, page_url)
|
16
|
-
end
|
17
|
-
end
|
7
|
+
# Class that parses itemprop elements
|
8
|
+
class Itemprop
|
18
9
|
|
19
10
|
NON_TEXTCONTENT_ELEMENTS = {
|
20
11
|
'a' => 'href', 'area' => 'href',
|
@@ -28,44 +19,78 @@ module Mida
|
|
28
19
|
|
29
20
|
URL_ATTRIBUTES = ['data', 'href', 'src']
|
30
21
|
|
22
|
+
# A Hash representing the properties.
|
23
|
+
# Hash is of the form {'property name' => 'value'}
|
24
|
+
attr_reader :properties
|
25
|
+
|
26
|
+
# Create a new Itemprop object
|
27
|
+
# [element] The itemprop element to be parsed
|
28
|
+
# [page_url] The url of the page, including filename, used to form
|
29
|
+
# absolute urls
|
30
|
+
def initialize(element, page_url=nil)
|
31
|
+
@element, @page_url = element, page_url
|
32
|
+
@properties = extract_properties
|
33
|
+
end
|
34
|
+
|
35
|
+
# Parse the element and return a hash representing the properties.
|
36
|
+
# Hash is of the form {'property name' => 'value'}
|
37
|
+
# [element] The itemprop element to be parsed
|
38
|
+
# [page_url] The url of the page, including filename, used to form
|
39
|
+
# absolute urls
|
40
|
+
def self.parse(element, page_url=nil)
|
41
|
+
self.new(element, page_url).properties
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def extract_properties
|
46
|
+
prop_names = extract_property_names
|
47
|
+
prop_names.each_with_object({}) do |name, memo|
|
48
|
+
memo[name] = extract_property
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
31
52
|
# This returns an empty string if can't form a valid
|
32
53
|
# absolute url as per the Microdata spec.
|
33
|
-
def
|
54
|
+
def make_absolute_url(url)
|
34
55
|
return url unless URI.parse(url).relative?
|
35
56
|
begin
|
36
|
-
URI.parse(page_url).merge(url).to_s
|
57
|
+
URI.parse(@page_url).merge(url).to_s
|
37
58
|
rescue URI::Error
|
38
59
|
''
|
39
60
|
end
|
40
61
|
end
|
41
62
|
|
42
|
-
def
|
43
|
-
|
63
|
+
def non_textcontent_element?(element)
|
64
|
+
NON_TEXTCONTENT_ELEMENTS.has_key?(element)
|
65
|
+
end
|
66
|
+
|
67
|
+
def url_attribute?(attribute)
|
68
|
+
URL_ATTRIBUTES.include?(attribute)
|
69
|
+
end
|
70
|
+
|
71
|
+
def extract_property_names
|
72
|
+
itemprop_attr = @element.attribute('itemprop')
|
44
73
|
itemprop_attr ? itemprop_attr.value.split() : []
|
45
74
|
end
|
46
75
|
|
47
|
-
def
|
48
|
-
element =
|
49
|
-
if
|
76
|
+
def extract_property_value
|
77
|
+
element = @element.name
|
78
|
+
if non_textcontent_element?(element)
|
50
79
|
attribute = NON_TEXTCONTENT_ELEMENTS[element]
|
51
|
-
value =
|
52
|
-
|
80
|
+
value = @element.attribute(attribute).value
|
81
|
+
url_attribute?(attribute) ? make_absolute_url(value) : value
|
53
82
|
else
|
54
|
-
|
83
|
+
@element.inner_text.strip
|
55
84
|
end
|
56
85
|
end
|
57
86
|
|
58
|
-
def
|
59
|
-
if
|
60
|
-
|
87
|
+
def extract_property
|
88
|
+
if @element.attribute('itemscope')
|
89
|
+
Itemscope.new(@element, @page_url)
|
61
90
|
else
|
62
|
-
extract_property_value
|
91
|
+
extract_property_value
|
63
92
|
end
|
64
93
|
end
|
65
94
|
|
66
|
-
private_class_method :make_absolute_url, :extract_property_names
|
67
|
-
private_class_method :extract_property_value, :extract_property
|
68
|
-
|
69
95
|
end
|
70
|
-
|
71
96
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Mida
|
4
|
+
|
5
|
+
# Class that parses itemscope elements
|
6
|
+
class Itemscope
|
7
|
+
|
8
|
+
# The Type of the itemscope
|
9
|
+
attr_reader :type
|
10
|
+
|
11
|
+
# The Global Identifier of the itemscope
|
12
|
+
attr_reader :id
|
13
|
+
|
14
|
+
# A Hash representing the properties as name/values paris
|
15
|
+
# The values will be an array containing either +String+
|
16
|
+
# or <tt>Mida::Item</tt> instances
|
17
|
+
attr_reader :properties
|
18
|
+
|
19
|
+
# Create a new Itemscope object
|
20
|
+
#
|
21
|
+
# [itemscope_node] The itemscope_node that you want to parse.
|
22
|
+
# [page_url] The url of target used for form absolute url.
|
23
|
+
def initialize(itemscope_node, page_url=nil)
|
24
|
+
@itemscope_node, @page_url = itemscope_node, page_url
|
25
|
+
@type, @id = extract_attribute('itemtype'), extract_attribute('itemid')
|
26
|
+
@properties = {}
|
27
|
+
add_itemref_properties
|
28
|
+
parse_elements(extract_elements(@itemscope_node))
|
29
|
+
end
|
30
|
+
|
31
|
+
# Same as +new+ for convenience
|
32
|
+
def self.parse(itemscope, page_url=nil)
|
33
|
+
self.new itemscope, page_url
|
34
|
+
end
|
35
|
+
|
36
|
+
def ==(other)
|
37
|
+
@type == other.type && @id == other.id && @properties == other.properties
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def extract_attribute(attribute)
|
43
|
+
(value = @itemscope_node.attribute(attribute)) ? value.value : nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def extract_elements(itemscope)
|
47
|
+
itemscope.search('./*')
|
48
|
+
end
|
49
|
+
|
50
|
+
# Find an element with a matching id
|
51
|
+
def find_with_id(id)
|
52
|
+
@itemscope_node.search("//*[@id='#{id}']")
|
53
|
+
end
|
54
|
+
|
55
|
+
# Add any properties referred to by 'itemref'
|
56
|
+
def add_itemref_properties
|
57
|
+
itemref = extract_attribute('itemref')
|
58
|
+
if itemref
|
59
|
+
itemref.split.each {|id| parse_elements(find_with_id(id))}
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse_elements(elements)
|
64
|
+
elements.each {|element| parse_element(element)}
|
65
|
+
end
|
66
|
+
|
67
|
+
def parse_element(element)
|
68
|
+
itemscope = element.attribute('itemscope')
|
69
|
+
itemprop = element.attribute('itemprop')
|
70
|
+
internal_elements = extract_elements(element)
|
71
|
+
add_itemprop(element) if itemscope || itemprop
|
72
|
+
parse_elements(internal_elements) if internal_elements && !itemscope
|
73
|
+
end
|
74
|
+
|
75
|
+
# Add an 'itemprop' to the properties
|
76
|
+
def add_itemprop(itemprop)
|
77
|
+
properties = Itemprop.parse(itemprop, @page_url)
|
78
|
+
properties.each { |name, value| (@properties[name] ||= []) << value }
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Mida
|
2
|
+
|
3
|
+
# Class used to describe a property
|
4
|
+
class PropertyDesc
|
5
|
+
def initialize(num, &block)
|
6
|
+
@num, @types = num, []
|
7
|
+
if block_given?
|
8
|
+
instance_eval(&block)
|
9
|
+
@types = [DataType::Text] unless @types.size >= 1
|
10
|
+
else
|
11
|
+
@types = [DataType::Text]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# What to extract for this property.
|
16
|
+
# This can be a datatype such as +:text+ or a +Vocabulary+.
|
17
|
+
# The types should be supplied in order of preference.
|
18
|
+
# If you want to say any type, then use +:any+ as the class
|
19
|
+
def extract(*types)
|
20
|
+
@types += types
|
21
|
+
end
|
22
|
+
|
23
|
+
# <b>DEPRECATED:</b> Please use +extract+ instead
|
24
|
+
def types(*types)
|
25
|
+
warn "[DEPRECATION] Mida::PropertyDesc#types is deprecated. "+
|
26
|
+
"Please use Mida::PropertyDesc#extract instead."
|
27
|
+
extract *types
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_h
|
31
|
+
{num: @num, types: @types}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
data/lib/mida/vocabulary.rb
CHANGED
@@ -1,12 +1,32 @@
|
|
1
|
+
require 'set'
|
1
2
|
module Mida
|
2
3
|
|
3
|
-
#
|
4
|
-
|
4
|
+
# Class used to describe a vocabulary
|
5
|
+
#
|
6
|
+
# To specify a vocabulary use the following methods:
|
7
|
+
# +itemtype+, +has_one+, +has_many+
|
8
|
+
class Vocabulary
|
9
|
+
|
10
|
+
class << self
|
11
|
+
# Return the properties specification
|
12
|
+
attr_reader :properties
|
13
|
+
|
14
|
+
# Return the registered vocabularies
|
15
|
+
attr_reader :vocabularies
|
16
|
+
end
|
17
|
+
|
18
|
+
@vocabularies = Set.new
|
19
|
+
@properties = {}
|
5
20
|
|
6
21
|
# Register a vocabulary that can be used when parsing,
|
7
22
|
# later vocabularies are given precedence over earlier ones
|
8
23
|
def self.register(vocabulary)
|
9
|
-
|
24
|
+
@vocabularies << vocabulary
|
25
|
+
end
|
26
|
+
|
27
|
+
# Un-register a vocabulary
|
28
|
+
def self.unregister(vocabulary)
|
29
|
+
@vocabularies.delete(vocabulary)
|
10
30
|
end
|
11
31
|
|
12
32
|
# Find the last vocabulary registered that matches the itemtype
|
@@ -17,10 +37,44 @@ module Mida
|
|
17
37
|
nil
|
18
38
|
end
|
19
39
|
|
20
|
-
|
21
|
-
|
22
|
-
|
40
|
+
def self.inherited(subclass)
|
41
|
+
register(subclass)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Sets the regular expression to match against the +itemtype+
|
45
|
+
# or returns the current regular expression
|
46
|
+
def self.itemtype(regexp_arg=nil)
|
47
|
+
if regexp_arg
|
48
|
+
@itemtype = regexp_arg
|
49
|
+
else
|
50
|
+
@itemtype
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
# Defines the properties as only containing one value
|
56
|
+
# If want to say any property name, then use +:any+ as a name
|
57
|
+
# Within a block you can use the methods of the class +PropertyDesc+
|
58
|
+
def self.has_one(*property_names, &block)
|
59
|
+
has(:one, *property_names, &block)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Defines the properties as containing many values
|
63
|
+
# If want to say any property name, then use +:any+ as a name
|
64
|
+
# Within a block you can use the methods of the class +PropertyDesc+
|
65
|
+
def self.has_many(*property_names, &block)
|
66
|
+
has(:many, *property_names, &block)
|
23
67
|
end
|
24
68
|
|
69
|
+
def self.has(num, *property_names, &block)
|
70
|
+
@properties ||= {}
|
71
|
+
property_names.each_with_object(@properties) do |name, properties|
|
72
|
+
property_desc = PropertyDesc.new(num, &block)
|
73
|
+
properties[name] = property_desc.to_h
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
private_class_method :has
|
78
|
+
|
25
79
|
end
|
26
80
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'mida/datatype'
|
2
|
+
|
3
|
+
describe Mida::DataType::Boolean do
|
4
|
+
|
5
|
+
it '#extract should raise an exception if some other text' do
|
6
|
+
test = lambda {Mida::DataType::Boolean.extract('hello')}
|
7
|
+
test.should raise_error(ArgumentError)
|
8
|
+
end
|
9
|
+
|
10
|
+
it '#extract should raise an exception if value is empty' do
|
11
|
+
test = lambda {Mida::DataType::Boolean.extract('')}
|
12
|
+
test.should raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it '#extract should return true for "True" whatever the case' do
|
16
|
+
['true', 'True', 'TRUE', 'tRUE'].each do |true_text|
|
17
|
+
Mida::DataType::Boolean.extract(true_text).should be_true
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it '#extract should return false for "False" whatever the case' do
|
22
|
+
['false', 'False', 'FALSE', 'fALSE'].each do |false_text|
|
23
|
+
Mida::DataType::Boolean.extract(false_text).should be_false
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mida/datatype'
|
2
|
+
|
3
|
+
describe Mida::DataType::Float do
|
4
|
+
|
5
|
+
it '#extract should raise an exception if not a number' do
|
6
|
+
test = lambda {Mida::DataType::Float.extract('hello')}
|
7
|
+
test.should raise_error(ArgumentError)
|
8
|
+
end
|
9
|
+
|
10
|
+
it '#extract should raise an exception if value is empty' do
|
11
|
+
test = lambda {Mida::DataType::Float.extract('')}
|
12
|
+
test.should raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it '#extract? should return the input value as a Float if a floating point' do
|
16
|
+
Mida::DataType::Float.extract("3.14").should == 3.14
|
17
|
+
end
|
18
|
+
|
19
|
+
it '#extract? should return the input value as a Float if a integer' do
|
20
|
+
Mida::DataType::Float.extract("3").should == 3
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mida/datatype'
|
2
|
+
|
3
|
+
describe Mida::DataType::Integer do
|
4
|
+
|
5
|
+
it '#extract should raise an exception if not a number' do
|
6
|
+
test = lambda {Mida::DataType::Integer.extract('hello')}
|
7
|
+
test.should raise_error(ArgumentError)
|
8
|
+
end
|
9
|
+
|
10
|
+
it '#extract should raise an exception if value is empty' do
|
11
|
+
test = lambda {Mida::DataType::Integer.extract('')}
|
12
|
+
test.should raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it '#extract? should raise an exception if a floating point number' do
|
16
|
+
test = lambda {Mida::DataType::Integer.extract('3.14')}
|
17
|
+
test.should raise_error(ArgumentError)
|
18
|
+
end
|
19
|
+
|
20
|
+
it '#extract? should return the input value as a Integer if a integer' do
|
21
|
+
Mida::DataType::Integer.extract("3").should == 3
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'mida/datatype'
|
2
|
+
|
3
|
+
describe Mida::DataType::ISO8601Date do
|
4
|
+
|
5
|
+
it '#extract should raise an exception if some other text' do
|
6
|
+
test = lambda {Mida::DataType::ISO8601Date.extract('27th Aug 2009')}
|
7
|
+
test.should raise_error(ArgumentError)
|
8
|
+
end
|
9
|
+
|
10
|
+
it '#extract should raise an exception if value is empty' do
|
11
|
+
test = lambda {Mida::DataType::ISO8601Date.extract('')}
|
12
|
+
test.should raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it '#extract? should return the input value' do
|
16
|
+
date = "2009-08-27T01:13:04+05:10"
|
17
|
+
Mida::DataType::ISO8601Date.extract(date).should == DateTime.parse(date)
|
18
|
+
Mida::DataType::ISO8601Date.extract(date).to_s.should == date
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mida/datatype'
|
2
|
+
|
3
|
+
describe Mida::DataType::Number do
|
4
|
+
|
5
|
+
it '#extract should raise an exception if not a number' do
|
6
|
+
test = lambda {Mida::DataType::Number.extract('hello')}
|
7
|
+
test.should raise_error(ArgumentError)
|
8
|
+
end
|
9
|
+
|
10
|
+
it '#extract should raise an exception if value is empty' do
|
11
|
+
test = lambda {Mida::DataType::Number.extract('')}
|
12
|
+
test.should raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it '#extract? should return the input value as a number if a floating point' do
|
16
|
+
Mida::DataType::Number.extract("3.14").should == 3.14
|
17
|
+
end
|
18
|
+
|
19
|
+
it '#extract? should return the input value as a number if a integer' do
|
20
|
+
Mida::DataType::Number.extract("3").should == 3
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|