otg-microformat 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
+ .DS_Store
1
2
  *.gem
2
3
  *.rbc
3
4
  .bundle
@@ -4,15 +4,26 @@ require 'andand'
4
4
 
5
5
  module Microformat
6
6
  class ItemProp
7
- def self.parse(node)
7
+ def self.parse(node, strict)
8
8
  # If the element has no itemprop attribute
9
9
  # The attribute must return null on getting and must throw an INVALID_ACCESS_ERR exception on setting.
10
10
  return nil unless node.attribute('itemprop')
11
11
 
12
12
  # If the element has an itemscope attribute
13
13
  # The attribute must return the element itself on getting and must throw an INVALID_ACCESS_ERR exception on setting.
14
- return ItemScope.new(node) if node.attribute('itemscope')
14
+ return ItemScope.new(node, strict) if node.attribute('itemscope')
15
15
 
16
+ if strict
17
+ parse_strict node
18
+ else
19
+ parse_weak node
20
+ end
21
+ end
22
+
23
+ private
24
+ ATTRIBUTES = ['content', 'src', 'href', 'data', 'datetime']
25
+
26
+ def self.parse_strict(node)
16
27
  # If the element is a meta element
17
28
  # The attribute must act as it would if it was reflecting the element's content content attribute
18
29
  return node.attribute('content').andand.value if node.name == 'meta'
@@ -37,15 +48,20 @@ module Microformat
37
48
  # The attribute must act the same as the element's textContent attribute.
38
49
  return node.text.chomp.strip
39
50
  end
51
+
52
+ def self.parse_weak(node)
53
+ ATTRIBUTES.map { |attr| node.attribute(attr).andand.value }.compact.first || node.text.chomp.strip
54
+ end
40
55
  end
41
56
 
42
57
  class ItemScope
43
58
  attr_reader :type, :id
44
59
 
45
- def initialize(node)
60
+ def initialize(node, strict)
46
61
  @type = attr 'itemtype', node
47
62
  @id = attr 'itemid', node
48
63
  @properties = {}
64
+ @strict = strict
49
65
 
50
66
  parse_elements node.search './*'
51
67
  end
@@ -63,14 +79,14 @@ module Microformat
63
79
  def parse_elements(elements)
64
80
  elements.each do |el|
65
81
  itemprop = attr('itemprop', el)
66
- prop = ItemProp.parse el
82
+ prop = ItemProp.parse el, @strict
67
83
 
68
84
  if prop
69
85
  @properties[itemprop] ||= []
70
86
  @properties[itemprop] << prop
71
- else
72
- parse_elements el.search('./*')
73
87
  end
88
+
89
+ parse_elements el.search('./*')
74
90
  end
75
91
  end
76
92
  end
@@ -88,10 +104,11 @@ module Microformat
88
104
  end
89
105
 
90
106
  class SchemaOrg
91
- def self.parse(html)
107
+ def self.parse(html, opts={})
108
+ strict = opts.has_key? :strict ? opts[:strict] : true
92
109
  html = Nokogiri::HTML.parse html unless html.respond_to? :search
93
110
  scopes = html.search('//*[@itemscope and not(@itemprop)]')
94
- .map { |node| ItemScope.new node }
111
+ .map { |node| ItemScope.new node, strict }
95
112
 
96
113
  if scopes.any?
97
114
  ItemDocument.new scopes
@@ -1,3 +1,3 @@
1
1
  module Microformat
2
- VERSION = '0.0.2'
2
+ VERSION = '0.0.3'
3
3
  end
@@ -23,4 +23,5 @@ Gem::Specification.new do |spec|
23
23
 
24
24
  spec.add_development_dependency 'bundler', '~> 1.3'
25
25
  spec.add_development_dependency 'rake'
26
+ spec.add_development_dependency 'rspec'
26
27
  end
@@ -0,0 +1,99 @@
1
+ require 'rspec'
2
+ require 'microformat'
3
+
4
+ describe Microformat::SchemaOrg do
5
+ describe 'a strict implementation' do
6
+ let(:html) {
7
+ <<-HTML
8
+ <div itemscope itemtype="http://schema.org/Product">
9
+ <h1 itemprop="name">Product</h1>
10
+ <div itemprop="offers" itemscope itemtype="http://schema.org/Offer">
11
+ <meta itemprop="currency" content="AUD" />
12
+ <meta itemprop="availability" content="OutOfStock" />
13
+ Price: $<span itemprop="price">12.99</span>
14
+ </div>
15
+ </div>
16
+ HTML
17
+ }
18
+ let(:result) { Microformat::SchemaOrg.parse html, strict: true }
19
+ let(:product) { result.products.first }
20
+
21
+ it 'should find a product' do
22
+ product.should_not be_nil
23
+ end
24
+
25
+ it 'should parse name' do
26
+ product['name'].should eq ['Product']
27
+ end
28
+
29
+ it 'should parse offer' do
30
+ product['offers'].first['currency'].should eq ['AUD']
31
+ product['offers'].first['availability'].should eq ['OutOfStock']
32
+ product['offers'].first['price'].should eq ['12.99']
33
+ end
34
+ end
35
+
36
+ describe 'a weak implementation' do
37
+ let(:html) {
38
+ <<-HTML
39
+ <div itemscope itemtype="http://schema.org/Product">
40
+ <h1 itemprop="name">Product</h1>
41
+ <div itemprop="offers" itemscope itemtype="http://schema.org/Offer">
42
+ <span itemprop="currency" content="AUD" />
43
+ <span itemprop="availability" href="OutOfStock" />
44
+ Price: $<span itemprop="price">12.99</span>
45
+ </div>
46
+ </div>
47
+ HTML
48
+ }
49
+ let(:result) { Microformat::SchemaOrg.parse html, strict: false }
50
+ let(:product) { result.products.first }
51
+
52
+ it 'should find a product' do
53
+ product.should_not be_nil
54
+ end
55
+
56
+ it 'should parse name' do
57
+ product['name'].should eq ['Product']
58
+ end
59
+
60
+ it 'should parse offer' do
61
+ product['offers'].first['currency'].should eq ['AUD']
62
+ product['offers'].first['availability'].should eq ['OutOfStock']
63
+ product['offers'].first['price'].should eq ['12.99']
64
+ end
65
+ end
66
+
67
+ describe 'a weird weak implementation' do
68
+ let(:html) {
69
+ <<-HTML
70
+ <div itemscope itemtype="http://schema.org/Product">
71
+ <h1 itemprop="name">Product</h1>
72
+ <div itemprop="offers" itemscope itemtype="http://schema.org/Offer">
73
+ <span itemprop="currency" content="AUD">
74
+ <span itemprop="availability" href="OutOfStock">
75
+ Price: $<span itemprop="price">12.99</span>
76
+ </span>
77
+ </span>
78
+ </div>
79
+ </div>
80
+ HTML
81
+ }
82
+ let(:result) { Microformat::SchemaOrg.parse html, strict: false }
83
+ let(:product) { result.products.first }
84
+
85
+ it 'should find a product' do
86
+ product.should_not be_nil
87
+ end
88
+
89
+ it 'should parse name' do
90
+ product['name'].should eq ['Product']
91
+ end
92
+
93
+ it 'should parse offer' do
94
+ product['offers'].first['currency'].should eq ['AUD']
95
+ product['offers'].first['availability'].should eq ['OutOfStock']
96
+ product['offers'].first['price'].should eq ['12.99']
97
+ end
98
+ end
99
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: otg-microformat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-05 00:00:00.000000000 Z
12
+ date: 2013-05-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -75,6 +75,22 @@ dependencies:
75
75
  - - ! '>='
76
76
  - !ruby/object:Gem::Version
77
77
  version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rspec
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
78
94
  description: A basic microformat parser for Ruby.
79
95
  email:
80
96
  - james@onthegame.com.au
@@ -90,6 +106,7 @@ files:
90
106
  - lib/microformat.rb
91
107
  - lib/microformat/version.rb
92
108
  - microformat.gemspec
109
+ - spec/schema_org_spec.rb
93
110
  homepage: http://www.onthegame.com.au/about/opensource
94
111
  licenses:
95
112
  - MIT
@@ -103,22 +120,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
103
120
  - - ! '>='
104
121
  - !ruby/object:Gem::Version
105
122
  version: '0'
106
- segments:
107
- - 0
108
- hash: 640169864049438702
109
123
  required_rubygems_version: !ruby/object:Gem::Requirement
110
124
  none: false
111
125
  requirements:
112
126
  - - ! '>='
113
127
  - !ruby/object:Gem::Version
114
128
  version: '0'
115
- segments:
116
- - 0
117
- hash: 640169864049438702
118
129
  requirements: []
119
130
  rubyforge_project:
120
131
  rubygems_version: 1.8.25
121
132
  signing_key:
122
133
  specification_version: 3
123
134
  summary: Simple Microformat parser
124
- test_files: []
135
+ test_files:
136
+ - spec/schema_org_spec.rb