otg-microformat 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/lib/microformat.rb +25 -8
- data/lib/microformat/version.rb +1 -1
- data/microformat.gemspec +1 -0
- data/spec/schema_org_spec.rb +99 -0
- metadata +21 -9
data/.gitignore
CHANGED
data/lib/microformat.rb
CHANGED
|
@@ -4,15 +4,26 @@ require 'andand'
|
|
|
4
4
|
|
|
5
5
|
module Microformat
|
|
6
6
|
class ItemProp
|
|
7
|
-
def self.parse(node)
|
|
7
|
+
def self.parse(node, strict)
|
|
8
8
|
# If the element has no itemprop attribute
|
|
9
9
|
# The attribute must return null on getting and must throw an INVALID_ACCESS_ERR exception on setting.
|
|
10
10
|
return nil unless node.attribute('itemprop')
|
|
11
11
|
|
|
12
12
|
# If the element has an itemscope attribute
|
|
13
13
|
# The attribute must return the element itself on getting and must throw an INVALID_ACCESS_ERR exception on setting.
|
|
14
|
-
return ItemScope.new(node) if node.attribute('itemscope')
|
|
14
|
+
return ItemScope.new(node, strict) if node.attribute('itemscope')
|
|
15
15
|
|
|
16
|
+
if strict
|
|
17
|
+
parse_strict node
|
|
18
|
+
else
|
|
19
|
+
parse_weak node
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
ATTRIBUTES = ['content', 'src', 'href', 'data', 'datetime']
|
|
25
|
+
|
|
26
|
+
def self.parse_strict(node)
|
|
16
27
|
# If the element is a meta element
|
|
17
28
|
# The attribute must act as it would if it was reflecting the element's content content attribute
|
|
18
29
|
return node.attribute('content').andand.value if node.name == 'meta'
|
|
@@ -37,15 +48,20 @@ module Microformat
|
|
|
37
48
|
# The attribute must act the same as the element's textContent attribute.
|
|
38
49
|
return node.text.chomp.strip
|
|
39
50
|
end
|
|
51
|
+
|
|
52
|
+
def self.parse_weak(node)
|
|
53
|
+
ATTRIBUTES.map { |attr| node.attribute(attr).andand.value }.compact.first || node.text.chomp.strip
|
|
54
|
+
end
|
|
40
55
|
end
|
|
41
56
|
|
|
42
57
|
class ItemScope
|
|
43
58
|
attr_reader :type, :id
|
|
44
59
|
|
|
45
|
-
def initialize(node)
|
|
60
|
+
def initialize(node, strict)
|
|
46
61
|
@type = attr 'itemtype', node
|
|
47
62
|
@id = attr 'itemid', node
|
|
48
63
|
@properties = {}
|
|
64
|
+
@strict = strict
|
|
49
65
|
|
|
50
66
|
parse_elements node.search './*'
|
|
51
67
|
end
|
|
@@ -63,14 +79,14 @@ module Microformat
|
|
|
63
79
|
def parse_elements(elements)
|
|
64
80
|
elements.each do |el|
|
|
65
81
|
itemprop = attr('itemprop', el)
|
|
66
|
-
prop = ItemProp.parse el
|
|
82
|
+
prop = ItemProp.parse el, @strict
|
|
67
83
|
|
|
68
84
|
if prop
|
|
69
85
|
@properties[itemprop] ||= []
|
|
70
86
|
@properties[itemprop] << prop
|
|
71
|
-
else
|
|
72
|
-
parse_elements el.search('./*')
|
|
73
87
|
end
|
|
88
|
+
|
|
89
|
+
parse_elements el.search('./*')
|
|
74
90
|
end
|
|
75
91
|
end
|
|
76
92
|
end
|
|
@@ -88,10 +104,11 @@ module Microformat
|
|
|
88
104
|
end
|
|
89
105
|
|
|
90
106
|
class SchemaOrg
|
|
91
|
-
def self.parse(html)
|
|
107
|
+
def self.parse(html, opts={})
|
|
108
|
+
strict = opts.has_key? :strict ? opts[:strict] : true
|
|
92
109
|
html = Nokogiri::HTML.parse html unless html.respond_to? :search
|
|
93
110
|
scopes = html.search('//*[@itemscope and not(@itemprop)]')
|
|
94
|
-
.map { |node| ItemScope.new node }
|
|
111
|
+
.map { |node| ItemScope.new node, strict }
|
|
95
112
|
|
|
96
113
|
if scopes.any?
|
|
97
114
|
ItemDocument.new scopes
|
data/lib/microformat/version.rb
CHANGED
data/microformat.gemspec
CHANGED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
require 'rspec'
|
|
2
|
+
require 'microformat'
|
|
3
|
+
|
|
4
|
+
describe Microformat::SchemaOrg do
|
|
5
|
+
describe 'a strict implementation' do
|
|
6
|
+
let(:html) {
|
|
7
|
+
<<-HTML
|
|
8
|
+
<div itemscope itemtype="http://schema.org/Product">
|
|
9
|
+
<h1 itemprop="name">Product</h1>
|
|
10
|
+
<div itemprop="offers" itemscope itemtype="http://schema.org/Offer">
|
|
11
|
+
<meta itemprop="currency" content="AUD" />
|
|
12
|
+
<meta itemprop="availability" content="OutOfStock" />
|
|
13
|
+
Price: $<span itemprop="price">12.99</span>
|
|
14
|
+
</div>
|
|
15
|
+
</div>
|
|
16
|
+
HTML
|
|
17
|
+
}
|
|
18
|
+
let(:result) { Microformat::SchemaOrg.parse html, strict: true }
|
|
19
|
+
let(:product) { result.products.first }
|
|
20
|
+
|
|
21
|
+
it 'should find a product' do
|
|
22
|
+
product.should_not be_nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it 'should parse name' do
|
|
26
|
+
product['name'].should eq ['Product']
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it 'should parse offer' do
|
|
30
|
+
product['offers'].first['currency'].should eq ['AUD']
|
|
31
|
+
product['offers'].first['availability'].should eq ['OutOfStock']
|
|
32
|
+
product['offers'].first['price'].should eq ['12.99']
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
describe 'a weak implementation' do
|
|
37
|
+
let(:html) {
|
|
38
|
+
<<-HTML
|
|
39
|
+
<div itemscope itemtype="http://schema.org/Product">
|
|
40
|
+
<h1 itemprop="name">Product</h1>
|
|
41
|
+
<div itemprop="offers" itemscope itemtype="http://schema.org/Offer">
|
|
42
|
+
<span itemprop="currency" content="AUD" />
|
|
43
|
+
<span itemprop="availability" href="OutOfStock" />
|
|
44
|
+
Price: $<span itemprop="price">12.99</span>
|
|
45
|
+
</div>
|
|
46
|
+
</div>
|
|
47
|
+
HTML
|
|
48
|
+
}
|
|
49
|
+
let(:result) { Microformat::SchemaOrg.parse html, strict: false }
|
|
50
|
+
let(:product) { result.products.first }
|
|
51
|
+
|
|
52
|
+
it 'should find a product' do
|
|
53
|
+
product.should_not be_nil
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
it 'should parse name' do
|
|
57
|
+
product['name'].should eq ['Product']
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it 'should parse offer' do
|
|
61
|
+
product['offers'].first['currency'].should eq ['AUD']
|
|
62
|
+
product['offers'].first['availability'].should eq ['OutOfStock']
|
|
63
|
+
product['offers'].first['price'].should eq ['12.99']
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
describe 'a weird weak implementation' do
|
|
68
|
+
let(:html) {
|
|
69
|
+
<<-HTML
|
|
70
|
+
<div itemscope itemtype="http://schema.org/Product">
|
|
71
|
+
<h1 itemprop="name">Product</h1>
|
|
72
|
+
<div itemprop="offers" itemscope itemtype="http://schema.org/Offer">
|
|
73
|
+
<span itemprop="currency" content="AUD">
|
|
74
|
+
<span itemprop="availability" href="OutOfStock">
|
|
75
|
+
Price: $<span itemprop="price">12.99</span>
|
|
76
|
+
</span>
|
|
77
|
+
</span>
|
|
78
|
+
</div>
|
|
79
|
+
</div>
|
|
80
|
+
HTML
|
|
81
|
+
}
|
|
82
|
+
let(:result) { Microformat::SchemaOrg.parse html, strict: false }
|
|
83
|
+
let(:product) { result.products.first }
|
|
84
|
+
|
|
85
|
+
it 'should find a product' do
|
|
86
|
+
product.should_not be_nil
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
it 'should parse name' do
|
|
90
|
+
product['name'].should eq ['Product']
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
it 'should parse offer' do
|
|
94
|
+
product['offers'].first['currency'].should eq ['AUD']
|
|
95
|
+
product['offers'].first['availability'].should eq ['OutOfStock']
|
|
96
|
+
product['offers'].first['price'].should eq ['12.99']
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: otg-microformat
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.3
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2013-05-
|
|
12
|
+
date: 2013-05-12 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: nokogiri
|
|
@@ -75,6 +75,22 @@ dependencies:
|
|
|
75
75
|
- - ! '>='
|
|
76
76
|
- !ruby/object:Gem::Version
|
|
77
77
|
version: '0'
|
|
78
|
+
- !ruby/object:Gem::Dependency
|
|
79
|
+
name: rspec
|
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
|
81
|
+
none: false
|
|
82
|
+
requirements:
|
|
83
|
+
- - ! '>='
|
|
84
|
+
- !ruby/object:Gem::Version
|
|
85
|
+
version: '0'
|
|
86
|
+
type: :development
|
|
87
|
+
prerelease: false
|
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
89
|
+
none: false
|
|
90
|
+
requirements:
|
|
91
|
+
- - ! '>='
|
|
92
|
+
- !ruby/object:Gem::Version
|
|
93
|
+
version: '0'
|
|
78
94
|
description: A basic microformat parser for Ruby.
|
|
79
95
|
email:
|
|
80
96
|
- james@onthegame.com.au
|
|
@@ -90,6 +106,7 @@ files:
|
|
|
90
106
|
- lib/microformat.rb
|
|
91
107
|
- lib/microformat/version.rb
|
|
92
108
|
- microformat.gemspec
|
|
109
|
+
- spec/schema_org_spec.rb
|
|
93
110
|
homepage: http://www.onthegame.com.au/about/opensource
|
|
94
111
|
licenses:
|
|
95
112
|
- MIT
|
|
@@ -103,22 +120,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
103
120
|
- - ! '>='
|
|
104
121
|
- !ruby/object:Gem::Version
|
|
105
122
|
version: '0'
|
|
106
|
-
segments:
|
|
107
|
-
- 0
|
|
108
|
-
hash: 640169864049438702
|
|
109
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
124
|
none: false
|
|
111
125
|
requirements:
|
|
112
126
|
- - ! '>='
|
|
113
127
|
- !ruby/object:Gem::Version
|
|
114
128
|
version: '0'
|
|
115
|
-
segments:
|
|
116
|
-
- 0
|
|
117
|
-
hash: 640169864049438702
|
|
118
129
|
requirements: []
|
|
119
130
|
rubyforge_project:
|
|
120
131
|
rubygems_version: 1.8.25
|
|
121
132
|
signing_key:
|
|
122
133
|
specification_version: 3
|
|
123
134
|
summary: Simple Microformat parser
|
|
124
|
-
test_files:
|
|
135
|
+
test_files:
|
|
136
|
+
- spec/schema_org_spec.rb
|