mida 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/mida/itemprop.rb CHANGED
@@ -1,20 +1,11 @@
1
1
  require 'nokogiri'
2
2
  require 'uri'
3
+ require 'mida/itemscope'
3
4
 
4
5
  module Mida
5
6
 
6
- # Module that parses itemprop elements
7
- module Itemprop
8
-
9
- # Returns a Hash representing the property.
10
- # Hash is of the form {'property name' => 'value'}
11
- # [element] The itemprop element to be parsed
12
- # [page_url] The url of the page, including the filename, used to form absolute urls
13
- def self.parse(element, page_url=nil)
14
- extract_property_names(element).each_with_object({}) do |name, memo|
15
- memo[name] = extract_property(element, page_url)
16
- end
17
- end
7
+ # Class that parses itemprop elements
8
+ class Itemprop
18
9
 
19
10
  NON_TEXTCONTENT_ELEMENTS = {
20
11
  'a' => 'href', 'area' => 'href',
@@ -28,44 +19,78 @@ module Mida
28
19
 
29
20
  URL_ATTRIBUTES = ['data', 'href', 'src']
30
21
 
22
+ # A Hash representing the properties.
23
+ # Hash is of the form {'property name' => 'value'}
24
+ attr_reader :properties
25
+
26
+ # Create a new Itemprop object
27
+ # [element] The itemprop element to be parsed
28
+ # [page_url] The url of the page, including filename, used to form
29
+ # absolute urls
30
+ def initialize(element, page_url=nil)
31
+ @element, @page_url = element, page_url
32
+ @properties = extract_properties
33
+ end
34
+
35
+ # Parse the element and return a hash representing the properties.
36
+ # Hash is of the form {'property name' => 'value'}
37
+ # [element] The itemprop element to be parsed
38
+ # [page_url] The url of the page, including filename, used to form
39
+ # absolute urls
40
+ def self.parse(element, page_url=nil)
41
+ self.new(element, page_url).properties
42
+ end
43
+
44
+ private
45
+ def extract_properties
46
+ prop_names = extract_property_names
47
+ prop_names.each_with_object({}) do |name, memo|
48
+ memo[name] = extract_property
49
+ end
50
+ end
51
+
31
52
  # This returns an empty string if can't form a valid
32
53
  # absolute url as per the Microdata spec.
33
- def self.make_absolute_url(url, page_url)
54
+ def make_absolute_url(url)
34
55
  return url unless URI.parse(url).relative?
35
56
  begin
36
- URI.parse(page_url).merge(url).to_s
57
+ URI.parse(@page_url).merge(url).to_s
37
58
  rescue URI::Error
38
59
  ''
39
60
  end
40
61
  end
41
62
 
42
- def self.extract_property_names(itemprop)
43
- itemprop_attr = itemprop.attribute('itemprop')
63
+ def non_textcontent_element?(element)
64
+ NON_TEXTCONTENT_ELEMENTS.has_key?(element)
65
+ end
66
+
67
+ def url_attribute?(attribute)
68
+ URL_ATTRIBUTES.include?(attribute)
69
+ end
70
+
71
+ def extract_property_names
72
+ itemprop_attr = @element.attribute('itemprop')
44
73
  itemprop_attr ? itemprop_attr.value.split() : []
45
74
  end
46
75
 
47
- def self.extract_property_value(itemprop, page_url)
48
- element = itemprop.name
49
- if NON_TEXTCONTENT_ELEMENTS.has_key?(element)
76
+ def extract_property_value
77
+ element = @element.name
78
+ if non_textcontent_element?(element)
50
79
  attribute = NON_TEXTCONTENT_ELEMENTS[element]
51
- value = itemprop.attribute(attribute).value
52
- (URL_ATTRIBUTES.include?(attribute)) ? make_absolute_url(value, page_url) : value
80
+ value = @element.attribute(attribute).value
81
+ url_attribute?(attribute) ? make_absolute_url(value) : value
53
82
  else
54
- itemprop.inner_text
83
+ @element.inner_text.strip
55
84
  end
56
85
  end
57
86
 
58
- def self.extract_property(itemprop, page_url)
59
- if itemprop.attribute('itemscope')
60
- Mida::Item.new(itemprop, page_url)
87
+ def extract_property
88
+ if @element.attribute('itemscope')
89
+ Itemscope.new(@element, @page_url)
61
90
  else
62
- extract_property_value(itemprop, page_url)
91
+ extract_property_value
63
92
  end
64
93
  end
65
94
 
66
- private_class_method :make_absolute_url, :extract_property_names
67
- private_class_method :extract_property_value, :extract_property
68
-
69
95
  end
70
-
71
96
  end
@@ -0,0 +1,82 @@
1
+ require 'nokogiri'
2
+
3
+ module Mida
4
+
5
+ # Class that parses itemscope elements
6
+ class Itemscope
7
+
8
+ # The Type of the itemscope
9
+ attr_reader :type
10
+
11
+ # The Global Identifier of the itemscope
12
+ attr_reader :id
13
+
14
+ # A Hash representing the properties as name/values paris
15
+ # The values will be an array containing either +String+
16
+ # or <tt>Mida::Item</tt> instances
17
+ attr_reader :properties
18
+
19
+ # Create a new Itemscope object
20
+ #
21
+ # [itemscope_node] The itemscope_node that you want to parse.
22
+ # [page_url] The url of target used for form absolute url.
23
+ def initialize(itemscope_node, page_url=nil)
24
+ @itemscope_node, @page_url = itemscope_node, page_url
25
+ @type, @id = extract_attribute('itemtype'), extract_attribute('itemid')
26
+ @properties = {}
27
+ add_itemref_properties
28
+ parse_elements(extract_elements(@itemscope_node))
29
+ end
30
+
31
+ # Same as +new+ for convenience
32
+ def self.parse(itemscope, page_url=nil)
33
+ self.new itemscope, page_url
34
+ end
35
+
36
+ def ==(other)
37
+ @type == other.type && @id == other.id && @properties == other.properties
38
+ end
39
+
40
+ private
41
+
42
+ def extract_attribute(attribute)
43
+ (value = @itemscope_node.attribute(attribute)) ? value.value : nil
44
+ end
45
+
46
+ def extract_elements(itemscope)
47
+ itemscope.search('./*')
48
+ end
49
+
50
+ # Find an element with a matching id
51
+ def find_with_id(id)
52
+ @itemscope_node.search("//*[@id='#{id}']")
53
+ end
54
+
55
+ # Add any properties referred to by 'itemref'
56
+ def add_itemref_properties
57
+ itemref = extract_attribute('itemref')
58
+ if itemref
59
+ itemref.split.each {|id| parse_elements(find_with_id(id))}
60
+ end
61
+ end
62
+
63
+ def parse_elements(elements)
64
+ elements.each {|element| parse_element(element)}
65
+ end
66
+
67
+ def parse_element(element)
68
+ itemscope = element.attribute('itemscope')
69
+ itemprop = element.attribute('itemprop')
70
+ internal_elements = extract_elements(element)
71
+ add_itemprop(element) if itemscope || itemprop
72
+ parse_elements(internal_elements) if internal_elements && !itemscope
73
+ end
74
+
75
+ # Add an 'itemprop' to the properties
76
+ def add_itemprop(itemprop)
77
+ properties = Itemprop.parse(itemprop, @page_url)
78
+ properties.each { |name, value| (@properties[name] ||= []) << value }
79
+ end
80
+
81
+ end
82
+ end
@@ -0,0 +1,36 @@
1
+ module Mida
2
+
3
+ # Class used to describe a property
4
+ class PropertyDesc
5
+ def initialize(num, &block)
6
+ @num, @types = num, []
7
+ if block_given?
8
+ instance_eval(&block)
9
+ @types = [DataType::Text] unless @types.size >= 1
10
+ else
11
+ @types = [DataType::Text]
12
+ end
13
+ end
14
+
15
+ # What to extract for this property.
16
+ # This can be a datatype such as +:text+ or a +Vocabulary+.
17
+ # The types should be supplied in order of preference.
18
+ # If you want to say any type, then use +:any+ as the class
19
+ def extract(*types)
20
+ @types += types
21
+ end
22
+
23
+ # <b>DEPRECATED:</b> Please use +extract+ instead
24
+ def types(*types)
25
+ warn "[DEPRECATION] Mida::PropertyDesc#types is deprecated. "+
26
+ "Please use Mida::PropertyDesc#extract instead."
27
+ extract *types
28
+ end
29
+
30
+ def to_h
31
+ {num: @num, types: @types}
32
+ end
33
+
34
+ end
35
+
36
+ end
@@ -1,12 +1,32 @@
1
+ require 'set'
1
2
  module Mida
2
3
 
3
- # Module to register the Vocabularies with
4
- module Vocabulary
4
+ # Class used to describe a vocabulary
5
+ #
6
+ # To specify a vocabulary use the following methods:
7
+ # +itemtype+, +has_one+, +has_many+
8
+ class Vocabulary
9
+
10
+ class << self
11
+ # Return the properties specification
12
+ attr_reader :properties
13
+
14
+ # Return the registered vocabularies
15
+ attr_reader :vocabularies
16
+ end
17
+
18
+ @vocabularies = Set.new
19
+ @properties = {}
5
20
 
6
21
  # Register a vocabulary that can be used when parsing,
7
22
  # later vocabularies are given precedence over earlier ones
8
23
  def self.register(vocabulary)
9
- (@vocabularies ||= []) << vocabulary
24
+ @vocabularies << vocabulary
25
+ end
26
+
27
+ # Un-register a vocabulary
28
+ def self.unregister(vocabulary)
29
+ @vocabularies.delete(vocabulary)
10
30
  end
11
31
 
12
32
  # Find the last vocabulary registered that matches the itemtype
@@ -17,10 +37,44 @@ module Mida
17
37
  nil
18
38
  end
19
39
 
20
- # Return the registered vocabularies
21
- def self.vocabularies
22
- @vocabularies
40
+ def self.inherited(subclass)
41
+ register(subclass)
42
+ end
43
+
44
+ # Sets the regular expression to match against the +itemtype+
45
+ # or returns the current regular expression
46
+ def self.itemtype(regexp_arg=nil)
47
+ if regexp_arg
48
+ @itemtype = regexp_arg
49
+ else
50
+ @itemtype
51
+ end
52
+ end
53
+
54
+
55
+ # Defines the properties as only containing one value
56
+ # If want to say any property name, then use +:any+ as a name
57
+ # Within a block you can use the methods of the class +PropertyDesc+
58
+ def self.has_one(*property_names, &block)
59
+ has(:one, *property_names, &block)
60
+ end
61
+
62
+ # Defines the properties as containing many values
63
+ # If want to say any property name, then use +:any+ as a name
64
+ # Within a block you can use the methods of the class +PropertyDesc+
65
+ def self.has_many(*property_names, &block)
66
+ has(:many, *property_names, &block)
23
67
  end
24
68
 
69
+ def self.has(num, *property_names, &block)
70
+ @properties ||= {}
71
+ property_names.each_with_object(@properties) do |name, properties|
72
+ property_desc = PropertyDesc.new(num, &block)
73
+ properties[name] = property_desc.to_h
74
+ end
75
+ end
76
+
77
+ private_class_method :has
78
+
25
79
  end
26
80
  end
@@ -0,0 +1,27 @@
1
+ require 'mida/datatype'
2
+
3
+ describe Mida::DataType::Boolean do
4
+
5
+ it '#extract should raise an exception if some other text' do
6
+ test = lambda {Mida::DataType::Boolean.extract('hello')}
7
+ test.should raise_error(ArgumentError)
8
+ end
9
+
10
+ it '#extract should raise an exception if value is empty' do
11
+ test = lambda {Mida::DataType::Boolean.extract('')}
12
+ test.should raise_error(ArgumentError)
13
+ end
14
+
15
+ it '#extract should return true for "True" whatever the case' do
16
+ ['true', 'True', 'TRUE', 'tRUE'].each do |true_text|
17
+ Mida::DataType::Boolean.extract(true_text).should be_true
18
+ end
19
+ end
20
+
21
+ it '#extract should return false for "False" whatever the case' do
22
+ ['false', 'False', 'FALSE', 'fALSE'].each do |false_text|
23
+ Mida::DataType::Boolean.extract(false_text).should be_false
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,23 @@
1
+ require 'mida/datatype'
2
+
3
+ describe Mida::DataType::Float do
4
+
5
+ it '#extract should raise an exception if not a number' do
6
+ test = lambda {Mida::DataType::Float.extract('hello')}
7
+ test.should raise_error(ArgumentError)
8
+ end
9
+
10
+ it '#extract should raise an exception if value is empty' do
11
+ test = lambda {Mida::DataType::Float.extract('')}
12
+ test.should raise_error(ArgumentError)
13
+ end
14
+
15
+ it '#extract? should return the input value as a Float if a floating point' do
16
+ Mida::DataType::Float.extract("3.14").should == 3.14
17
+ end
18
+
19
+ it '#extract? should return the input value as a Float if a integer' do
20
+ Mida::DataType::Float.extract("3").should == 3
21
+ end
22
+
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'mida/datatype'
2
+
3
+ describe Mida::DataType::Integer do
4
+
5
+ it '#extract should raise an exception if not a number' do
6
+ test = lambda {Mida::DataType::Integer.extract('hello')}
7
+ test.should raise_error(ArgumentError)
8
+ end
9
+
10
+ it '#extract should raise an exception if value is empty' do
11
+ test = lambda {Mida::DataType::Integer.extract('')}
12
+ test.should raise_error(ArgumentError)
13
+ end
14
+
15
+ it '#extract? should raise an exception if a floating point number' do
16
+ test = lambda {Mida::DataType::Integer.extract('3.14')}
17
+ test.should raise_error(ArgumentError)
18
+ end
19
+
20
+ it '#extract? should return the input value as a Integer if a integer' do
21
+ Mida::DataType::Integer.extract("3").should == 3
22
+ end
23
+ end
@@ -0,0 +1,20 @@
1
+ require 'mida/datatype'
2
+
3
+ describe Mida::DataType::ISO8601Date do
4
+
5
+ it '#extract should raise an exception if some other text' do
6
+ test = lambda {Mida::DataType::ISO8601Date.extract('27th Aug 2009')}
7
+ test.should raise_error(ArgumentError)
8
+ end
9
+
10
+ it '#extract should raise an exception if value is empty' do
11
+ test = lambda {Mida::DataType::ISO8601Date.extract('')}
12
+ test.should raise_error(ArgumentError)
13
+ end
14
+
15
+ it '#extract? should return the input value' do
16
+ date = "2009-08-27T01:13:04+05:10"
17
+ Mida::DataType::ISO8601Date.extract(date).should == DateTime.parse(date)
18
+ Mida::DataType::ISO8601Date.extract(date).to_s.should == date
19
+ end
20
+ end
@@ -0,0 +1,23 @@
1
+ require 'mida/datatype'
2
+
3
+ describe Mida::DataType::Number do
4
+
5
+ it '#extract should raise an exception if not a number' do
6
+ test = lambda {Mida::DataType::Number.extract('hello')}
7
+ test.should raise_error(ArgumentError)
8
+ end
9
+
10
+ it '#extract should raise an exception if value is empty' do
11
+ test = lambda {Mida::DataType::Number.extract('')}
12
+ test.should raise_error(ArgumentError)
13
+ end
14
+
15
+ it '#extract? should return the input value as a number if a floating point' do
16
+ Mida::DataType::Number.extract("3.14").should == 3.14
17
+ end
18
+
19
+ it '#extract? should return the input value as a number if a integer' do
20
+ Mida::DataType::Number.extract("3").should == 3
21
+ end
22
+
23
+ end