mida 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +18 -2
- data/README.rdoc +22 -11
- data/Rakefile +2 -2
- data/lib/mida.rb +1 -1
- data/lib/mida/datatype.rb +15 -0
- data/lib/mida/datatype/boolean.rb +18 -0
- data/lib/mida/datatype/float.rb +15 -0
- data/lib/mida/datatype/integer.rb +15 -0
- data/lib/mida/datatype/iso8601date.rb +17 -0
- data/lib/mida/datatype/number.rb +15 -0
- data/lib/mida/datatype/text.rb +13 -0
- data/lib/mida/document.rb +21 -19
- data/lib/mida/genericvocabulary.rb +13 -0
- data/lib/mida/item.rb +83 -71
- data/lib/mida/itemprop.rb +55 -30
- data/lib/mida/itemscope.rb +82 -0
- data/lib/mida/propertydesc.rb +36 -0
- data/lib/mida/vocabulary.rb +60 -6
- data/spec/datatype/boolean_spec.rb +27 -0
- data/spec/datatype/float_spec.rb +23 -0
- data/spec/datatype/integer_spec.rb +23 -0
- data/spec/datatype/iso8601date_spec.rb +20 -0
- data/spec/datatype/number_spec.rb +23 -0
- data/spec/datatype/text_spec.rb +14 -0
- data/spec/document_spec.rb +31 -487
- data/spec/item_spec.rb +163 -472
- data/spec/itemprop_spec.rb +40 -45
- data/spec/itemscope_spec.rb +287 -0
- data/spec/propertydesc_spec.rb +56 -0
- data/spec/spec_helper.rb +13 -36
- data/spec/vocabulary_spec.rb +148 -0
- metadata +22 -6
- data/lib/mida/vocabulary/generic.rb +0 -15
- data/lib/mida/vocabularydesc.rb +0 -57
- data/spec/vocabularydesc_spec.rb +0 -106
data/lib/mida/itemprop.rb
CHANGED
@@ -1,20 +1,11 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'uri'
|
3
|
+
require 'mida/itemscope'
|
3
4
|
|
4
5
|
module Mida
|
5
6
|
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
# Returns a Hash representing the property.
|
10
|
-
# Hash is of the form {'property name' => 'value'}
|
11
|
-
# [element] The itemprop element to be parsed
|
12
|
-
# [page_url] The url of the page, including the filename, used to form absolute urls
|
13
|
-
def self.parse(element, page_url=nil)
|
14
|
-
extract_property_names(element).each_with_object({}) do |name, memo|
|
15
|
-
memo[name] = extract_property(element, page_url)
|
16
|
-
end
|
17
|
-
end
|
7
|
+
# Class that parses itemprop elements
|
8
|
+
class Itemprop
|
18
9
|
|
19
10
|
NON_TEXTCONTENT_ELEMENTS = {
|
20
11
|
'a' => 'href', 'area' => 'href',
|
@@ -28,44 +19,78 @@ module Mida
|
|
28
19
|
|
29
20
|
URL_ATTRIBUTES = ['data', 'href', 'src']
|
30
21
|
|
22
|
+
# A Hash representing the properties.
|
23
|
+
# Hash is of the form {'property name' => 'value'}
|
24
|
+
attr_reader :properties
|
25
|
+
|
26
|
+
# Create a new Itemprop object
|
27
|
+
# [element] The itemprop element to be parsed
|
28
|
+
# [page_url] The url of the page, including filename, used to form
|
29
|
+
# absolute urls
|
30
|
+
def initialize(element, page_url=nil)
|
31
|
+
@element, @page_url = element, page_url
|
32
|
+
@properties = extract_properties
|
33
|
+
end
|
34
|
+
|
35
|
+
# Parse the element and return a hash representing the properties.
|
36
|
+
# Hash is of the form {'property name' => 'value'}
|
37
|
+
# [element] The itemprop element to be parsed
|
38
|
+
# [page_url] The url of the page, including filename, used to form
|
39
|
+
# absolute urls
|
40
|
+
def self.parse(element, page_url=nil)
|
41
|
+
self.new(element, page_url).properties
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def extract_properties
|
46
|
+
prop_names = extract_property_names
|
47
|
+
prop_names.each_with_object({}) do |name, memo|
|
48
|
+
memo[name] = extract_property
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
31
52
|
# This returns an empty string if can't form a valid
|
32
53
|
# absolute url as per the Microdata spec.
|
33
|
-
def
|
54
|
+
def make_absolute_url(url)
|
34
55
|
return url unless URI.parse(url).relative?
|
35
56
|
begin
|
36
|
-
URI.parse(page_url).merge(url).to_s
|
57
|
+
URI.parse(@page_url).merge(url).to_s
|
37
58
|
rescue URI::Error
|
38
59
|
''
|
39
60
|
end
|
40
61
|
end
|
41
62
|
|
42
|
-
def
|
43
|
-
|
63
|
+
def non_textcontent_element?(element)
|
64
|
+
NON_TEXTCONTENT_ELEMENTS.has_key?(element)
|
65
|
+
end
|
66
|
+
|
67
|
+
def url_attribute?(attribute)
|
68
|
+
URL_ATTRIBUTES.include?(attribute)
|
69
|
+
end
|
70
|
+
|
71
|
+
def extract_property_names
|
72
|
+
itemprop_attr = @element.attribute('itemprop')
|
44
73
|
itemprop_attr ? itemprop_attr.value.split() : []
|
45
74
|
end
|
46
75
|
|
47
|
-
def
|
48
|
-
element =
|
49
|
-
if
|
76
|
+
def extract_property_value
|
77
|
+
element = @element.name
|
78
|
+
if non_textcontent_element?(element)
|
50
79
|
attribute = NON_TEXTCONTENT_ELEMENTS[element]
|
51
|
-
value =
|
52
|
-
|
80
|
+
value = @element.attribute(attribute).value
|
81
|
+
url_attribute?(attribute) ? make_absolute_url(value) : value
|
53
82
|
else
|
54
|
-
|
83
|
+
@element.inner_text.strip
|
55
84
|
end
|
56
85
|
end
|
57
86
|
|
58
|
-
def
|
59
|
-
if
|
60
|
-
|
87
|
+
def extract_property
|
88
|
+
if @element.attribute('itemscope')
|
89
|
+
Itemscope.new(@element, @page_url)
|
61
90
|
else
|
62
|
-
extract_property_value
|
91
|
+
extract_property_value
|
63
92
|
end
|
64
93
|
end
|
65
94
|
|
66
|
-
private_class_method :make_absolute_url, :extract_property_names
|
67
|
-
private_class_method :extract_property_value, :extract_property
|
68
|
-
|
69
95
|
end
|
70
|
-
|
71
96
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Mida
|
4
|
+
|
5
|
+
# Class that parses itemscope elements
|
6
|
+
class Itemscope
|
7
|
+
|
8
|
+
# The Type of the itemscope
|
9
|
+
attr_reader :type
|
10
|
+
|
11
|
+
# The Global Identifier of the itemscope
|
12
|
+
attr_reader :id
|
13
|
+
|
14
|
+
# A Hash representing the properties as name/values paris
|
15
|
+
# The values will be an array containing either +String+
|
16
|
+
# or <tt>Mida::Item</tt> instances
|
17
|
+
attr_reader :properties
|
18
|
+
|
19
|
+
# Create a new Itemscope object
|
20
|
+
#
|
21
|
+
# [itemscope_node] The itemscope_node that you want to parse.
|
22
|
+
# [page_url] The url of target used for form absolute url.
|
23
|
+
def initialize(itemscope_node, page_url=nil)
|
24
|
+
@itemscope_node, @page_url = itemscope_node, page_url
|
25
|
+
@type, @id = extract_attribute('itemtype'), extract_attribute('itemid')
|
26
|
+
@properties = {}
|
27
|
+
add_itemref_properties
|
28
|
+
parse_elements(extract_elements(@itemscope_node))
|
29
|
+
end
|
30
|
+
|
31
|
+
# Same as +new+ for convenience
|
32
|
+
def self.parse(itemscope, page_url=nil)
|
33
|
+
self.new itemscope, page_url
|
34
|
+
end
|
35
|
+
|
36
|
+
def ==(other)
|
37
|
+
@type == other.type && @id == other.id && @properties == other.properties
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def extract_attribute(attribute)
|
43
|
+
(value = @itemscope_node.attribute(attribute)) ? value.value : nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def extract_elements(itemscope)
|
47
|
+
itemscope.search('./*')
|
48
|
+
end
|
49
|
+
|
50
|
+
# Find an element with a matching id
|
51
|
+
def find_with_id(id)
|
52
|
+
@itemscope_node.search("//*[@id='#{id}']")
|
53
|
+
end
|
54
|
+
|
55
|
+
# Add any properties referred to by 'itemref'
|
56
|
+
def add_itemref_properties
|
57
|
+
itemref = extract_attribute('itemref')
|
58
|
+
if itemref
|
59
|
+
itemref.split.each {|id| parse_elements(find_with_id(id))}
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse_elements(elements)
|
64
|
+
elements.each {|element| parse_element(element)}
|
65
|
+
end
|
66
|
+
|
67
|
+
def parse_element(element)
|
68
|
+
itemscope = element.attribute('itemscope')
|
69
|
+
itemprop = element.attribute('itemprop')
|
70
|
+
internal_elements = extract_elements(element)
|
71
|
+
add_itemprop(element) if itemscope || itemprop
|
72
|
+
parse_elements(internal_elements) if internal_elements && !itemscope
|
73
|
+
end
|
74
|
+
|
75
|
+
# Add an 'itemprop' to the properties
|
76
|
+
def add_itemprop(itemprop)
|
77
|
+
properties = Itemprop.parse(itemprop, @page_url)
|
78
|
+
properties.each { |name, value| (@properties[name] ||= []) << value }
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Mida
|
2
|
+
|
3
|
+
# Class used to describe a property
|
4
|
+
class PropertyDesc
|
5
|
+
def initialize(num, &block)
|
6
|
+
@num, @types = num, []
|
7
|
+
if block_given?
|
8
|
+
instance_eval(&block)
|
9
|
+
@types = [DataType::Text] unless @types.size >= 1
|
10
|
+
else
|
11
|
+
@types = [DataType::Text]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# What to extract for this property.
|
16
|
+
# This can be a datatype such as +:text+ or a +Vocabulary+.
|
17
|
+
# The types should be supplied in order of preference.
|
18
|
+
# If you want to say any type, then use +:any+ as the class
|
19
|
+
def extract(*types)
|
20
|
+
@types += types
|
21
|
+
end
|
22
|
+
|
23
|
+
# <b>DEPRECATED:</b> Please use +extract+ instead
|
24
|
+
def types(*types)
|
25
|
+
warn "[DEPRECATION] Mida::PropertyDesc#types is deprecated. "+
|
26
|
+
"Please use Mida::PropertyDesc#extract instead."
|
27
|
+
extract *types
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_h
|
31
|
+
{num: @num, types: @types}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
data/lib/mida/vocabulary.rb
CHANGED
@@ -1,12 +1,32 @@
|
|
1
|
+
require 'set'
|
1
2
|
module Mida
|
2
3
|
|
3
|
-
#
|
4
|
-
|
4
|
+
# Class used to describe a vocabulary
|
5
|
+
#
|
6
|
+
# To specify a vocabulary use the following methods:
|
7
|
+
# +itemtype+, +has_one+, +has_many+
|
8
|
+
class Vocabulary
|
9
|
+
|
10
|
+
class << self
|
11
|
+
# Return the properties specification
|
12
|
+
attr_reader :properties
|
13
|
+
|
14
|
+
# Return the registered vocabularies
|
15
|
+
attr_reader :vocabularies
|
16
|
+
end
|
17
|
+
|
18
|
+
@vocabularies = Set.new
|
19
|
+
@properties = {}
|
5
20
|
|
6
21
|
# Register a vocabulary that can be used when parsing,
|
7
22
|
# later vocabularies are given precedence over earlier ones
|
8
23
|
def self.register(vocabulary)
|
9
|
-
|
24
|
+
@vocabularies << vocabulary
|
25
|
+
end
|
26
|
+
|
27
|
+
# Un-register a vocabulary
|
28
|
+
def self.unregister(vocabulary)
|
29
|
+
@vocabularies.delete(vocabulary)
|
10
30
|
end
|
11
31
|
|
12
32
|
# Find the last vocabulary registered that matches the itemtype
|
@@ -17,10 +37,44 @@ module Mida
|
|
17
37
|
nil
|
18
38
|
end
|
19
39
|
|
20
|
-
|
21
|
-
|
22
|
-
|
40
|
+
def self.inherited(subclass)
|
41
|
+
register(subclass)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Sets the regular expression to match against the +itemtype+
|
45
|
+
# or returns the current regular expression
|
46
|
+
def self.itemtype(regexp_arg=nil)
|
47
|
+
if regexp_arg
|
48
|
+
@itemtype = regexp_arg
|
49
|
+
else
|
50
|
+
@itemtype
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
# Defines the properties as only containing one value
|
56
|
+
# If want to say any property name, then use +:any+ as a name
|
57
|
+
# Within a block you can use the methods of the class +PropertyDesc+
|
58
|
+
def self.has_one(*property_names, &block)
|
59
|
+
has(:one, *property_names, &block)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Defines the properties as containing many values
|
63
|
+
# If want to say any property name, then use +:any+ as a name
|
64
|
+
# Within a block you can use the methods of the class +PropertyDesc+
|
65
|
+
def self.has_many(*property_names, &block)
|
66
|
+
has(:many, *property_names, &block)
|
23
67
|
end
|
24
68
|
|
69
|
+
def self.has(num, *property_names, &block)
|
70
|
+
@properties ||= {}
|
71
|
+
property_names.each_with_object(@properties) do |name, properties|
|
72
|
+
property_desc = PropertyDesc.new(num, &block)
|
73
|
+
properties[name] = property_desc.to_h
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
private_class_method :has
|
78
|
+
|
25
79
|
end
|
26
80
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'mida/datatype'
|
2
|
+
|
3
|
+
describe Mida::DataType::Boolean do
|
4
|
+
|
5
|
+
it '#extract should raise an exception if some other text' do
|
6
|
+
test = lambda {Mida::DataType::Boolean.extract('hello')}
|
7
|
+
test.should raise_error(ArgumentError)
|
8
|
+
end
|
9
|
+
|
10
|
+
it '#extract should raise an exception if value is empty' do
|
11
|
+
test = lambda {Mida::DataType::Boolean.extract('')}
|
12
|
+
test.should raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it '#extract should return true for "True" whatever the case' do
|
16
|
+
['true', 'True', 'TRUE', 'tRUE'].each do |true_text|
|
17
|
+
Mida::DataType::Boolean.extract(true_text).should be_true
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it '#extract should return false for "False" whatever the case' do
|
22
|
+
['false', 'False', 'FALSE', 'fALSE'].each do |false_text|
|
23
|
+
Mida::DataType::Boolean.extract(false_text).should be_false
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mida/datatype'
|
2
|
+
|
3
|
+
describe Mida::DataType::Float do
|
4
|
+
|
5
|
+
it '#extract should raise an exception if not a number' do
|
6
|
+
test = lambda {Mida::DataType::Float.extract('hello')}
|
7
|
+
test.should raise_error(ArgumentError)
|
8
|
+
end
|
9
|
+
|
10
|
+
it '#extract should raise an exception if value is empty' do
|
11
|
+
test = lambda {Mida::DataType::Float.extract('')}
|
12
|
+
test.should raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it '#extract? should return the input value as a Float if a floating point' do
|
16
|
+
Mida::DataType::Float.extract("3.14").should == 3.14
|
17
|
+
end
|
18
|
+
|
19
|
+
it '#extract? should return the input value as a Float if a integer' do
|
20
|
+
Mida::DataType::Float.extract("3").should == 3
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mida/datatype'
|
2
|
+
|
3
|
+
describe Mida::DataType::Integer do
|
4
|
+
|
5
|
+
it '#extract should raise an exception if not a number' do
|
6
|
+
test = lambda {Mida::DataType::Integer.extract('hello')}
|
7
|
+
test.should raise_error(ArgumentError)
|
8
|
+
end
|
9
|
+
|
10
|
+
it '#extract should raise an exception if value is empty' do
|
11
|
+
test = lambda {Mida::DataType::Integer.extract('')}
|
12
|
+
test.should raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it '#extract? should raise an exception if a floating point number' do
|
16
|
+
test = lambda {Mida::DataType::Integer.extract('3.14')}
|
17
|
+
test.should raise_error(ArgumentError)
|
18
|
+
end
|
19
|
+
|
20
|
+
it '#extract? should return the input value as a Integer if a integer' do
|
21
|
+
Mida::DataType::Integer.extract("3").should == 3
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'mida/datatype'
|
2
|
+
|
3
|
+
describe Mida::DataType::ISO8601Date do
|
4
|
+
|
5
|
+
it '#extract should raise an exception if some other text' do
|
6
|
+
test = lambda {Mida::DataType::ISO8601Date.extract('27th Aug 2009')}
|
7
|
+
test.should raise_error(ArgumentError)
|
8
|
+
end
|
9
|
+
|
10
|
+
it '#extract should raise an exception if value is empty' do
|
11
|
+
test = lambda {Mida::DataType::ISO8601Date.extract('')}
|
12
|
+
test.should raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it '#extract? should return the input value' do
|
16
|
+
date = "2009-08-27T01:13:04+05:10"
|
17
|
+
Mida::DataType::ISO8601Date.extract(date).should == DateTime.parse(date)
|
18
|
+
Mida::DataType::ISO8601Date.extract(date).to_s.should == date
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'mida/datatype'
|
2
|
+
|
3
|
+
describe Mida::DataType::Number do
|
4
|
+
|
5
|
+
it '#extract should raise an exception if not a number' do
|
6
|
+
test = lambda {Mida::DataType::Number.extract('hello')}
|
7
|
+
test.should raise_error(ArgumentError)
|
8
|
+
end
|
9
|
+
|
10
|
+
it '#extract should raise an exception if value is empty' do
|
11
|
+
test = lambda {Mida::DataType::Number.extract('')}
|
12
|
+
test.should raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it '#extract? should return the input value as a number if a floating point' do
|
16
|
+
Mida::DataType::Number.extract("3.14").should == 3.14
|
17
|
+
end
|
18
|
+
|
19
|
+
it '#extract? should return the input value as a number if a integer' do
|
20
|
+
Mida::DataType::Number.extract("3").should == 3
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|