RubyGems - mida - Versions diffs - 0.2.0 → 0.3.0 - Mend

mida 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

data/CHANGELOG.rdoc +18 -2
data/README.rdoc +22 -11
data/Rakefile +2 -2
data/lib/mida.rb +1 -1
data/lib/mida/datatype.rb +15 -0
data/lib/mida/datatype/boolean.rb +18 -0
data/lib/mida/datatype/float.rb +15 -0
data/lib/mida/datatype/integer.rb +15 -0
data/lib/mida/datatype/iso8601date.rb +17 -0
data/lib/mida/datatype/number.rb +15 -0
data/lib/mida/datatype/text.rb +13 -0
data/lib/mida/document.rb +21 -19
data/lib/mida/genericvocabulary.rb +13 -0
data/lib/mida/item.rb +83 -71
data/lib/mida/itemprop.rb +55 -30
data/lib/mida/itemscope.rb +82 -0
data/lib/mida/propertydesc.rb +36 -0
data/lib/mida/vocabulary.rb +60 -6
data/spec/datatype/boolean_spec.rb +27 -0
data/spec/datatype/float_spec.rb +23 -0
data/spec/datatype/integer_spec.rb +23 -0
data/spec/datatype/iso8601date_spec.rb +20 -0
data/spec/datatype/number_spec.rb +23 -0
data/spec/datatype/text_spec.rb +14 -0
data/spec/document_spec.rb +31 -487
data/spec/item_spec.rb +163 -472
data/spec/itemprop_spec.rb +40 -45
data/spec/itemscope_spec.rb +287 -0
data/spec/propertydesc_spec.rb +56 -0
data/spec/spec_helper.rb +13 -36
data/spec/vocabulary_spec.rb +148 -0
metadata +22 -6
data/lib/mida/vocabulary/generic.rb +0 -15
data/lib/mida/vocabularydesc.rb +0 -57
data/spec/vocabularydesc_spec.rb +0 -106

data/lib/mida/itemprop.rb CHANGED Viewed

@@ -1,20 +1,11 @@
 require 'nokogiri'
 require 'uri'
+require 'mida/itemscope'
 module Mida
-  # Module that parses itemprop elements
-  module Itemprop
-    # Returns a Hash representing the property.
-    # Hash is of the form {'property name' => 'value'}
-    # [element] The itemprop element to be parsed
-    # [page_url] The url of the page, including the filename, used to form absolute urls
-    def self.parse(element, page_url=nil)
-      extract_property_names(element).each_with_object({}) do |name, memo|
-        memo[name] = extract_property(element, page_url)
-      end
-    end
+  # Class that parses itemprop elements
+  class Itemprop
     NON_TEXTCONTENT_ELEMENTS = {
       'a' => 'href',        'area' => 'href',
@@ -28,44 +19,78 @@ module Mida
     URL_ATTRIBUTES = ['data', 'href', 'src']
+    # A Hash representing the properties.
+    # Hash is of the form {'property name' => 'value'}
+    attr_reader :properties
+    # Create a new Itemprop object
+    # [element]  The itemprop element to be parsed
+    # [page_url] The url of the page, including filename, used to form
+    #            absolute urls
+    def initialize(element, page_url=nil)
+      @element, @page_url = element, page_url
+      @properties = extract_properties
+    end
+    # Parse the element and return a hash representing the properties.
+    # Hash is of the form {'property name' => 'value'}
+    # [element]  The itemprop element to be parsed
+    # [page_url] The url of the page, including filename, used to form
+    #            absolute urls
+    def self.parse(element, page_url=nil)
+      self.new(element, page_url).properties
+    end
+  private
+    def extract_properties
+      prop_names = extract_property_names
+      prop_names.each_with_object({}) do |name, memo|
+        memo[name] = extract_property
+      end
+    end
     # This returns an empty string if can't form a valid
     # absolute url as per the Microdata spec.
-    def self.make_absolute_url(url, page_url)
+    def make_absolute_url(url)
       return url unless URI.parse(url).relative?
       begin
-        URI.parse(page_url).merge(url).to_s
+        URI.parse(@page_url).merge(url).to_s
       rescue URI::Error
         ''
       end
     end
-    def self.extract_property_names(itemprop)
-      itemprop_attr = itemprop.attribute('itemprop')
+    def non_textcontent_element?(element)
+      NON_TEXTCONTENT_ELEMENTS.has_key?(element)
+    end
+    def url_attribute?(attribute)
+      URL_ATTRIBUTES.include?(attribute)
+    end
+    def extract_property_names
+      itemprop_attr = @element.attribute('itemprop')
       itemprop_attr ? itemprop_attr.value.split() : []
     end
-    def self.extract_property_value(itemprop, page_url)
-      element = itemprop.name
-      if NON_TEXTCONTENT_ELEMENTS.has_key?(element)
+    def extract_property_value
+      element = @element.name
+      if non_textcontent_element?(element)
         attribute = NON_TEXTCONTENT_ELEMENTS[element]
-        value = itemprop.attribute(attribute).value
-        (URL_ATTRIBUTES.include?(attribute)) ? make_absolute_url(value, page_url) : value
+        value = @element.attribute(attribute).value
+        url_attribute?(attribute) ? make_absolute_url(value) : value
       else
-        itemprop.inner_text
+        @element.inner_text.strip
       end
     end
-    def self.extract_property(itemprop, page_url)
-      if itemprop.attribute('itemscope')
-        Mida::Item.new(itemprop, page_url)
+    def extract_property
+      if @element.attribute('itemscope')
+        Itemscope.new(@element, @page_url)
       else
-        extract_property_value(itemprop, page_url)
+        extract_property_value
       end
     end
-    private_class_method :make_absolute_url, :extract_property_names
-    private_class_method :extract_property_value, :extract_property
   end
 end

data/lib/mida/itemscope.rb ADDED Viewed

@@ -0,0 +1,82 @@
+require 'nokogiri'
+module Mida
+  # Class that parses itemscope elements
+  class Itemscope
+    # The Type of the itemscope
+    attr_reader :type
+    # The Global Identifier of the itemscope
+    attr_reader :id
+    # A Hash representing the properties as name/values paris
+    # The values will be an array containing either +String+
+    # or <tt>Mida::Item</tt> instances
+    attr_reader :properties
+    # Create a new Itemscope object
+    #
+    # [itemscope_node] The itemscope_node that you want to parse.
+    # [page_url] The url of target used for form absolute url.
+    def initialize(itemscope_node, page_url=nil)
+      @itemscope_node, @page_url = itemscope_node, page_url
+      @type, @id = extract_attribute('itemtype'), extract_attribute('itemid')
+      @properties = {}
+      add_itemref_properties
+      parse_elements(extract_elements(@itemscope_node))
+    end
+    # Same as +new+ for convenience
+    def self.parse(itemscope, page_url=nil)
+      self.new itemscope, page_url
+    end
+    def ==(other)
+      @type == other.type && @id == other.id && @properties == other.properties
+    end
+  private
+    def extract_attribute(attribute)
+      (value = @itemscope_node.attribute(attribute)) ? value.value : nil
+    end
+    def extract_elements(itemscope)
+      itemscope.search('./*')
+    end
+    # Find an element with a matching id
+    def find_with_id(id)
+      @itemscope_node.search("//*[@id='#{id}']")
+    end
+    # Add any properties referred to by 'itemref'
+    def add_itemref_properties
+      itemref = extract_attribute('itemref')
+      if itemref
+        itemref.split.each {|id| parse_elements(find_with_id(id))}
+      end
+    end
+    def parse_elements(elements)
+      elements.each {|element| parse_element(element)}
+    end
+    def parse_element(element)
+      itemscope = element.attribute('itemscope')
+      itemprop = element.attribute('itemprop')
+      internal_elements = extract_elements(element)
+      add_itemprop(element) if itemscope || itemprop
+      parse_elements(internal_elements) if internal_elements && !itemscope
+    end
+    # Add an 'itemprop' to the properties
+    def add_itemprop(itemprop)
+      properties = Itemprop.parse(itemprop, @page_url)
+      properties.each { |name, value| (@properties[name] ||= []) << value }
+    end
+  end
+end

data/lib/mida/propertydesc.rb ADDED Viewed

@@ -0,0 +1,36 @@
+module Mida
+  # Class used to describe a property
+  class PropertyDesc
+    def initialize(num, &block)
+      @num, @types = num, []
+      if block_given?
+        instance_eval(&block)
+        @types = [DataType::Text] unless @types.size >= 1
+      else
+        @types = [DataType::Text]
+      end
+    end
+    # What to extract for this property.
+    # This can be a datatype such as +:text+ or a +Vocabulary+.
+    # The types should be supplied in order of preference.
+    # If you want to say any type, then use +:any+ as the class
+    def extract(*types)
+      @types += types
+    end
+    # <b>DEPRECATED:</b> Please use +extract+ instead
+    def types(*types)
+      warn "[DEPRECATION] Mida::PropertyDesc#types is deprecated.  "+
+           "Please use Mida::PropertyDesc#extract instead."
+      extract *types
+    end
+    def to_h
+      {num: @num, types: @types}
+    end
+  end
+end

data/lib/mida/vocabulary.rb CHANGED Viewed

@@ -1,12 +1,32 @@
+require 'set'
 module Mida
-  # Module to register the Vocabularies with
-  module Vocabulary
+  # Class used to describe a vocabulary
+  #
+  # To specify a vocabulary use the following methods:
+  # +itemtype+, +has_one+, +has_many+
+  class Vocabulary
+    class << self
+      # Return the properties specification
+      attr_reader :properties
+      # Return the registered vocabularies
+      attr_reader :vocabularies
+    end
+    @vocabularies = Set.new
+    @properties = {}
     # Register a vocabulary that can be used when parsing,
     # later vocabularies are given precedence over earlier ones
     def self.register(vocabulary)
-      (@vocabularies ||= []) << vocabulary
+      @vocabularies << vocabulary
+    end
+    # Un-register a vocabulary
+    def self.unregister(vocabulary)
+      @vocabularies.delete(vocabulary)
     end
     # Find the last vocabulary registered that matches the itemtype
@@ -17,10 +37,44 @@ module Mida
       nil
     end
-    # Return the registered vocabularies
-    def self.vocabularies
-      @vocabularies
+    def self.inherited(subclass)
+      register(subclass)
+    end
+    # Sets the regular expression to match against the +itemtype+
+    # or returns the current regular expression
+    def self.itemtype(regexp_arg=nil)
+      if regexp_arg
+        @itemtype = regexp_arg
+      else
+        @itemtype
+      end
+    end
+    # Defines the properties as only containing one value
+    # If want to say any property name, then use +:any+ as a name
+    # Within a block you can use the methods of the class +PropertyDesc+
+    def self.has_one(*property_names, &block)
+      has(:one, *property_names, &block)
+    end
+    # Defines the properties as containing many values
+    # If want to say any property name, then use +:any+ as a name
+    # Within a block you can use the methods of the class +PropertyDesc+
+    def self.has_many(*property_names, &block)
+      has(:many, *property_names, &block)
     end
+    def self.has(num, *property_names, &block)
+      @properties ||= {}
+      property_names.each_with_object(@properties) do |name, properties|
+        property_desc = PropertyDesc.new(num, &block)
+        properties[name] = property_desc.to_h
+      end
+    end
+    private_class_method :has
   end
 end

data/spec/datatype/boolean_spec.rb ADDED Viewed

@@ -0,0 +1,27 @@
+require 'mida/datatype'
+describe Mida::DataType::Boolean do
+  it '#extract should raise an exception if some other text' do
+    test = lambda {Mida::DataType::Boolean.extract('hello')}
+    test.should raise_error(ArgumentError)
+  end
+  it '#extract should raise an exception if value is empty' do
+    test = lambda {Mida::DataType::Boolean.extract('')}
+    test.should raise_error(ArgumentError)
+  end
+  it '#extract should return true for "True" whatever the case' do
+    ['true', 'True', 'TRUE', 'tRUE'].each do |true_text|
+      Mida::DataType::Boolean.extract(true_text).should be_true
+    end
+  end
+  it '#extract should return false for "False" whatever the case' do
+    ['false', 'False', 'FALSE', 'fALSE'].each do |false_text|
+      Mida::DataType::Boolean.extract(false_text).should be_false
+    end
+  end
+end

data/spec/datatype/float_spec.rb ADDED Viewed

@@ -0,0 +1,23 @@
+require 'mida/datatype'
+describe Mida::DataType::Float do
+  it '#extract should raise an exception if not a number' do
+    test = lambda {Mida::DataType::Float.extract('hello')}
+    test.should raise_error(ArgumentError)
+  end
+  it '#extract should raise an exception if value is empty' do
+    test = lambda {Mida::DataType::Float.extract('')}
+    test.should raise_error(ArgumentError)
+  end
+  it '#extract? should return the input value as a Float if a floating point' do
+    Mida::DataType::Float.extract("3.14").should == 3.14
+  end
+  it '#extract? should return the input value as a Float if a integer' do
+    Mida::DataType::Float.extract("3").should == 3
+  end
+end

data/spec/datatype/integer_spec.rb ADDED Viewed

@@ -0,0 +1,23 @@
+require 'mida/datatype'
+describe Mida::DataType::Integer do
+  it '#extract should raise an exception if not a number' do
+    test = lambda {Mida::DataType::Integer.extract('hello')}
+    test.should raise_error(ArgumentError)
+  end
+  it '#extract should raise an exception if value is empty' do
+    test = lambda {Mida::DataType::Integer.extract('')}
+    test.should raise_error(ArgumentError)
+  end
+  it '#extract? should raise an exception if a floating point number' do
+    test = lambda {Mida::DataType::Integer.extract('3.14')}
+    test.should raise_error(ArgumentError)
+  end
+  it '#extract? should return the input value as a Integer if a integer' do
+    Mida::DataType::Integer.extract("3").should == 3
+  end
+end

data/spec/datatype/iso8601date_spec.rb ADDED Viewed

@@ -0,0 +1,20 @@
+require 'mida/datatype'
+describe Mida::DataType::ISO8601Date do
+  it '#extract should raise an exception if some other text' do
+    test = lambda {Mida::DataType::ISO8601Date.extract('27th Aug 2009')}
+    test.should raise_error(ArgumentError)
+  end
+  it '#extract should raise an exception if value is empty' do
+    test = lambda {Mida::DataType::ISO8601Date.extract('')}
+    test.should raise_error(ArgumentError)
+  end
+  it '#extract? should return the input value' do
+    date = "2009-08-27T01:13:04+05:10"
+    Mida::DataType::ISO8601Date.extract(date).should == DateTime.parse(date)
+    Mida::DataType::ISO8601Date.extract(date).to_s.should == date
+  end
+end

data/spec/datatype/number_spec.rb ADDED Viewed

@@ -0,0 +1,23 @@
+require 'mida/datatype'
+describe Mida::DataType::Number do
+  it '#extract should raise an exception if not a number' do
+    test = lambda {Mida::DataType::Number.extract('hello')}
+    test.should raise_error(ArgumentError)
+  end
+  it '#extract should raise an exception if value is empty' do
+    test = lambda {Mida::DataType::Number.extract('')}
+    test.should raise_error(ArgumentError)
+  end
+  it '#extract? should return the input value as a number if a floating point' do
+    Mida::DataType::Number.extract("3.14").should == 3.14
+  end
+  it '#extract? should return the input value as a number if a integer' do
+    Mida::DataType::Number.extract("3").should == 3
+  end
+end