mida 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/mida/itemprop.rb CHANGED
@@ -1,20 +1,11 @@
1
1
  require 'nokogiri'
2
2
  require 'uri'
3
+ require 'mida/itemscope'
3
4
 
4
5
  module Mida
5
6
 
6
- # Module that parses itemprop elements
7
- module Itemprop
8
-
9
- # Returns a Hash representing the property.
10
- # Hash is of the form {'property name' => 'value'}
11
- # [element] The itemprop element to be parsed
12
- # [page_url] The url of the page, including the filename, used to form absolute urls
13
- def self.parse(element, page_url=nil)
14
- extract_property_names(element).each_with_object({}) do |name, memo|
15
- memo[name] = extract_property(element, page_url)
16
- end
17
- end
7
+ # Class that parses itemprop elements
8
+ class Itemprop
18
9
 
19
10
  NON_TEXTCONTENT_ELEMENTS = {
20
11
  'a' => 'href', 'area' => 'href',
@@ -28,44 +19,78 @@ module Mida
28
19
 
29
20
  URL_ATTRIBUTES = ['data', 'href', 'src']
30
21
 
22
+ # A Hash representing the properties.
23
+ # Hash is of the form {'property name' => 'value'}
24
+ attr_reader :properties
25
+
26
+ # Create a new Itemprop object
27
+ # [element] The itemprop element to be parsed
28
+ # [page_url] The url of the page, including filename, used to form
29
+ # absolute urls
30
+ def initialize(element, page_url=nil)
31
+ @element, @page_url = element, page_url
32
+ @properties = extract_properties
33
+ end
34
+
35
+ # Parse the element and return a hash representing the properties.
36
+ # Hash is of the form {'property name' => 'value'}
37
+ # [element] The itemprop element to be parsed
38
+ # [page_url] The url of the page, including filename, used to form
39
+ # absolute urls
40
+ def self.parse(element, page_url=nil)
41
+ self.new(element, page_url).properties
42
+ end
43
+
44
+ private
45
+ def extract_properties
46
+ prop_names = extract_property_names
47
+ prop_names.each_with_object({}) do |name, memo|
48
+ memo[name] = extract_property
49
+ end
50
+ end
51
+
31
52
  # This returns an empty string if can't form a valid
32
53
  # absolute url as per the Microdata spec.
33
- def self.make_absolute_url(url, page_url)
54
+ def make_absolute_url(url)
34
55
  return url unless URI.parse(url).relative?
35
56
  begin
36
- URI.parse(page_url).merge(url).to_s
57
+ URI.parse(@page_url).merge(url).to_s
37
58
  rescue URI::Error
38
59
  ''
39
60
  end
40
61
  end
41
62
 
42
- def self.extract_property_names(itemprop)
43
- itemprop_attr = itemprop.attribute('itemprop')
63
+ def non_textcontent_element?(element)
64
+ NON_TEXTCONTENT_ELEMENTS.has_key?(element)
65
+ end
66
+
67
+ def url_attribute?(attribute)
68
+ URL_ATTRIBUTES.include?(attribute)
69
+ end
70
+
71
+ def extract_property_names
72
+ itemprop_attr = @element.attribute('itemprop')
44
73
  itemprop_attr ? itemprop_attr.value.split() : []
45
74
  end
46
75
 
47
- def self.extract_property_value(itemprop, page_url)
48
- element = itemprop.name
49
- if NON_TEXTCONTENT_ELEMENTS.has_key?(element)
76
+ def extract_property_value
77
+ element = @element.name
78
+ if non_textcontent_element?(element)
50
79
  attribute = NON_TEXTCONTENT_ELEMENTS[element]
51
- value = itemprop.attribute(attribute).value
52
- (URL_ATTRIBUTES.include?(attribute)) ? make_absolute_url(value, page_url) : value
80
+ value = @element.attribute(attribute).value
81
+ url_attribute?(attribute) ? make_absolute_url(value) : value
53
82
  else
54
- itemprop.inner_text
83
+ @element.inner_text.strip
55
84
  end
56
85
  end
57
86
 
58
- def self.extract_property(itemprop, page_url)
59
- if itemprop.attribute('itemscope')
60
- Mida::Item.new(itemprop, page_url)
87
+ def extract_property
88
+ if @element.attribute('itemscope')
89
+ Itemscope.new(@element, @page_url)
61
90
  else
62
- extract_property_value(itemprop, page_url)
91
+ extract_property_value
63
92
  end
64
93
  end
65
94
 
66
- private_class_method :make_absolute_url, :extract_property_names
67
- private_class_method :extract_property_value, :extract_property
68
-
69
95
  end
70
-
71
96
  end
@@ -0,0 +1,82 @@
1
+ require 'nokogiri'
2
+
3
+ module Mida
4
+
5
+ # Class that parses itemscope elements
6
+ class Itemscope
7
+
8
+ # The Type of the itemscope
9
+ attr_reader :type
10
+
11
+ # The Global Identifier of the itemscope
12
+ attr_reader :id
13
+
14
+ # A Hash representing the properties as name/values paris
15
+ # The values will be an array containing either +String+
16
+ # or <tt>Mida::Item</tt> instances
17
+ attr_reader :properties
18
+
19
+ # Create a new Itemscope object
20
+ #
21
+ # [itemscope_node] The itemscope_node that you want to parse.
22
+ # [page_url] The url of target used for form absolute url.
23
+ def initialize(itemscope_node, page_url=nil)
24
+ @itemscope_node, @page_url = itemscope_node, page_url
25
+ @type, @id = extract_attribute('itemtype'), extract_attribute('itemid')
26
+ @properties = {}
27
+ add_itemref_properties
28
+ parse_elements(extract_elements(@itemscope_node))
29
+ end
30
+
31
+ # Same as +new+ for convenience
32
+ def self.parse(itemscope, page_url=nil)
33
+ self.new itemscope, page_url
34
+ end
35
+
36
+ def ==(other)
37
+ @type == other.type && @id == other.id && @properties == other.properties
38
+ end
39
+
40
+ private
41
+
42
+ def extract_attribute(attribute)
43
+ (value = @itemscope_node.attribute(attribute)) ? value.value : nil
44
+ end
45
+
46
+ def extract_elements(itemscope)
47
+ itemscope.search('./*')
48
+ end
49
+
50
+ # Find an element with a matching id
51
+ def find_with_id(id)
52
+ @itemscope_node.search("//*[@id='#{id}']")
53
+ end
54
+
55
+ # Add any properties referred to by 'itemref'
56
+ def add_itemref_properties
57
+ itemref = extract_attribute('itemref')
58
+ if itemref
59
+ itemref.split.each {|id| parse_elements(find_with_id(id))}
60
+ end
61
+ end
62
+
63
+ def parse_elements(elements)
64
+ elements.each {|element| parse_element(element)}
65
+ end
66
+
67
+ def parse_element(element)
68
+ itemscope = element.attribute('itemscope')
69
+ itemprop = element.attribute('itemprop')
70
+ internal_elements = extract_elements(element)
71
+ add_itemprop(element) if itemscope || itemprop
72
+ parse_elements(internal_elements) if internal_elements && !itemscope
73
+ end
74
+
75
+ # Add an 'itemprop' to the properties
76
+ def add_itemprop(itemprop)
77
+ properties = Itemprop.parse(itemprop, @page_url)
78
+ properties.each { |name, value| (@properties[name] ||= []) << value }
79
+ end
80
+
81
+ end
82
+ end
@@ -0,0 +1,36 @@
1
+ module Mida
2
+
3
+ # Class used to describe a property
4
+ class PropertyDesc
5
+ def initialize(num, &block)
6
+ @num, @types = num, []
7
+ if block_given?
8
+ instance_eval(&block)
9
+ @types = [DataType::Text] unless @types.size >= 1
10
+ else
11
+ @types = [DataType::Text]
12
+ end
13
+ end
14
+
15
+ # What to extract for this property.
16
+ # This can be a datatype such as +:text+ or a +Vocabulary+.
17
+ # The types should be supplied in order of preference.
18
+ # If you want to say any type, then use +:any+ as the class
19
+ def extract(*types)
20
+ @types += types
21
+ end
22
+
23
+ # <b>DEPRECATED:</b> Please use +extract+ instead
24
+ def types(*types)
25
+ warn "[DEPRECATION] Mida::PropertyDesc#types is deprecated. "+
26
+ "Please use Mida::PropertyDesc#extract instead."
27
+ extract *types
28
+ end
29
+
30
+ def to_h
31
+ {num: @num, types: @types}
32
+ end
33
+
34
+ end
35
+
36
+ end
@@ -1,12 +1,32 @@
1
+ require 'set'
1
2
  module Mida
2
3
 
3
- # Module to register the Vocabularies with
4
- module Vocabulary
4
+ # Class used to describe a vocabulary
5
+ #
6
+ # To specify a vocabulary use the following methods:
7
+ # +itemtype+, +has_one+, +has_many+
8
+ class Vocabulary
9
+
10
+ class << self
11
+ # Return the properties specification
12
+ attr_reader :properties
13
+
14
+ # Return the registered vocabularies
15
+ attr_reader :vocabularies
16
+ end
17
+
18
+ @vocabularies = Set.new
19
+ @properties = {}
5
20
 
6
21
  # Register a vocabulary that can be used when parsing,
7
22
  # later vocabularies are given precedence over earlier ones
8
23
  def self.register(vocabulary)
9
- (@vocabularies ||= []) << vocabulary
24
+ @vocabularies << vocabulary
25
+ end
26
+
27
+ # Un-register a vocabulary
28
+ def self.unregister(vocabulary)
29
+ @vocabularies.delete(vocabulary)
10
30
  end
11
31
 
12
32
  # Find the last vocabulary registered that matches the itemtype
@@ -17,10 +37,44 @@ module Mida
17
37
  nil
18
38
  end
19
39
 
20
- # Return the registered vocabularies
21
- def self.vocabularies
22
- @vocabularies
40
+ def self.inherited(subclass)
41
+ register(subclass)
42
+ end
43
+
44
+ # Sets the regular expression to match against the +itemtype+
45
+ # or returns the current regular expression
46
+ def self.itemtype(regexp_arg=nil)
47
+ if regexp_arg
48
+ @itemtype = regexp_arg
49
+ else
50
+ @itemtype
51
+ end
52
+ end
53
+
54
+
55
+ # Defines the properties as only containing one value
56
+ # If want to say any property name, then use +:any+ as a name
57
+ # Within a block you can use the methods of the class +PropertyDesc+
58
+ def self.has_one(*property_names, &block)
59
+ has(:one, *property_names, &block)
60
+ end
61
+
62
+ # Defines the properties as containing many values
63
+ # If want to say any property name, then use +:any+ as a name
64
+ # Within a block you can use the methods of the class +PropertyDesc+
65
+ def self.has_many(*property_names, &block)
66
+ has(:many, *property_names, &block)
23
67
  end
24
68
 
69
+ def self.has(num, *property_names, &block)
70
+ @properties ||= {}
71
+ property_names.each_with_object(@properties) do |name, properties|
72
+ property_desc = PropertyDesc.new(num, &block)
73
+ properties[name] = property_desc.to_h
74
+ end
75
+ end
76
+
77
+ private_class_method :has
78
+
25
79
  end
26
80
  end
@@ -0,0 +1,27 @@
1
+ require 'mida/datatype'
2
+
3
+ describe Mida::DataType::Boolean do
4
+
5
+ it '#extract should raise an exception if some other text' do
6
+ test = lambda {Mida::DataType::Boolean.extract('hello')}
7
+ test.should raise_error(ArgumentError)
8
+ end
9
+
10
+ it '#extract should raise an exception if value is empty' do
11
+ test = lambda {Mida::DataType::Boolean.extract('')}
12
+ test.should raise_error(ArgumentError)
13
+ end
14
+
15
+ it '#extract should return true for "True" whatever the case' do
16
+ ['true', 'True', 'TRUE', 'tRUE'].each do |true_text|
17
+ Mida::DataType::Boolean.extract(true_text).should be_true
18
+ end
19
+ end
20
+
21
+ it '#extract should return false for "False" whatever the case' do
22
+ ['false', 'False', 'FALSE', 'fALSE'].each do |false_text|
23
+ Mida::DataType::Boolean.extract(false_text).should be_false
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,23 @@
1
+ require 'mida/datatype'
2
+
3
+ describe Mida::DataType::Float do
4
+
5
+ it '#extract should raise an exception if not a number' do
6
+ test = lambda {Mida::DataType::Float.extract('hello')}
7
+ test.should raise_error(ArgumentError)
8
+ end
9
+
10
+ it '#extract should raise an exception if value is empty' do
11
+ test = lambda {Mida::DataType::Float.extract('')}
12
+ test.should raise_error(ArgumentError)
13
+ end
14
+
15
+ it '#extract? should return the input value as a Float if a floating point' do
16
+ Mida::DataType::Float.extract("3.14").should == 3.14
17
+ end
18
+
19
+ it '#extract? should return the input value as a Float if a integer' do
20
+ Mida::DataType::Float.extract("3").should == 3
21
+ end
22
+
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'mida/datatype'
2
+
3
+ describe Mida::DataType::Integer do
4
+
5
+ it '#extract should raise an exception if not a number' do
6
+ test = lambda {Mida::DataType::Integer.extract('hello')}
7
+ test.should raise_error(ArgumentError)
8
+ end
9
+
10
+ it '#extract should raise an exception if value is empty' do
11
+ test = lambda {Mida::DataType::Integer.extract('')}
12
+ test.should raise_error(ArgumentError)
13
+ end
14
+
15
+ it '#extract? should raise an exception if a floating point number' do
16
+ test = lambda {Mida::DataType::Integer.extract('3.14')}
17
+ test.should raise_error(ArgumentError)
18
+ end
19
+
20
+ it '#extract? should return the input value as a Integer if a integer' do
21
+ Mida::DataType::Integer.extract("3").should == 3
22
+ end
23
+ end
@@ -0,0 +1,20 @@
1
+ require 'mida/datatype'
2
+
3
+ describe Mida::DataType::ISO8601Date do
4
+
5
+ it '#extract should raise an exception if some other text' do
6
+ test = lambda {Mida::DataType::ISO8601Date.extract('27th Aug 2009')}
7
+ test.should raise_error(ArgumentError)
8
+ end
9
+
10
+ it '#extract should raise an exception if value is empty' do
11
+ test = lambda {Mida::DataType::ISO8601Date.extract('')}
12
+ test.should raise_error(ArgumentError)
13
+ end
14
+
15
+ it '#extract? should return the input value' do
16
+ date = "2009-08-27T01:13:04+05:10"
17
+ Mida::DataType::ISO8601Date.extract(date).should == DateTime.parse(date)
18
+ Mida::DataType::ISO8601Date.extract(date).to_s.should == date
19
+ end
20
+ end
@@ -0,0 +1,23 @@
1
+ require 'mida/datatype'
2
+
3
+ describe Mida::DataType::Number do
4
+
5
+ it '#extract should raise an exception if not a number' do
6
+ test = lambda {Mida::DataType::Number.extract('hello')}
7
+ test.should raise_error(ArgumentError)
8
+ end
9
+
10
+ it '#extract should raise an exception if value is empty' do
11
+ test = lambda {Mida::DataType::Number.extract('')}
12
+ test.should raise_error(ArgumentError)
13
+ end
14
+
15
+ it '#extract? should return the input value as a number if a floating point' do
16
+ Mida::DataType::Number.extract("3.14").should == 3.14
17
+ end
18
+
19
+ it '#extract? should return the input value as a number if a integer' do
20
+ Mida::DataType::Number.extract("3").should == 3
21
+ end
22
+
23
+ end