hypermicrodata 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +8 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +100 -0
- data/Rakefile +10 -0
- data/bin/hypermicrodata.rb +25 -0
- data/hypermicrodata.gemspec +28 -0
- data/lib/hypermicrodata.rb +37 -0
- data/lib/hypermicrodata/document.rb +27 -0
- data/lib/hypermicrodata/extract.rb +22 -0
- data/lib/hypermicrodata/item.rb +113 -0
- data/lib/hypermicrodata/itemprop_parser.rb +114 -0
- data/lib/hypermicrodata/link.rb +7 -0
- data/lib/hypermicrodata/property.rb +27 -0
- data/lib/hypermicrodata/rails/html_based_json_renderer.rb +35 -0
- data/lib/hypermicrodata/serializer/base.rb +24 -0
- data/lib/hypermicrodata/serializer/hal.rb +47 -0
- data/lib/hypermicrodata/serializer/jsonld.rb +44 -0
- data/lib/hypermicrodata/serializer/uber.rb +100 -0
- data/lib/hypermicrodata/submit_button.rb +105 -0
- data/lib/hypermicrodata/version.rb +3 -0
- data/lib/uberous/uber.rb +104 -0
- data/test/data/example.html +22 -0
- data/test/data/example_itemref.html +16 -0
- data/test/data/example_with_no_itemscope.html +22 -0
- data/test/test_helper.rb +3 -0
- data/test/test_itemref.rb +19 -0
- data/test/test_json.rb +15 -0
- data/test/test_parse.rb +36 -0
- metadata +139 -0
@@ -0,0 +1,114 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
# Class that parses itemprop elements
|
3
|
+
class ItempropParser
|
4
|
+
|
5
|
+
NON_TEXTCONTENT_ELEMENTS = {
|
6
|
+
'a' => 'href', 'area' => 'href',
|
7
|
+
'audio' => 'src', 'embed' => 'src',
|
8
|
+
'iframe' => 'src', 'img' => 'src',
|
9
|
+
'link' => 'href', 'meta' => 'content',
|
10
|
+
'object' => 'data', 'source' => 'src',
|
11
|
+
'time' => 'datetime', 'track' => 'src',
|
12
|
+
'video' => 'src'
|
13
|
+
}
|
14
|
+
|
15
|
+
LINK_ELEMENTS = ['a', 'area', 'audio', 'embed', 'iframe',
|
16
|
+
'img', 'link', 'source', 'track', 'video']
|
17
|
+
|
18
|
+
URL_ATTRIBUTES = ['data', 'href', 'src']
|
19
|
+
|
20
|
+
# A Hash representing the properties.
|
21
|
+
# Hash is of the form {'property name' => 'value'}
|
22
|
+
attr_reader :property
|
23
|
+
|
24
|
+
# Create a new Itemprop object
|
25
|
+
# [element] The itemprop element to be parsed
|
26
|
+
# [page_url] The url of the page, including filename, used to form
|
27
|
+
# absolute urls
|
28
|
+
def initialize(element, page_url=nil)
|
29
|
+
@element, @page_url = element, page_url
|
30
|
+
if link?
|
31
|
+
@property = Link.new(extract_property, extract_property_names, extract_rel_names)
|
32
|
+
else
|
33
|
+
@property = Property.new(extract_property, extract_property_names, extract_rel_names)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Parse the element and return a hash representing the properties.
|
38
|
+
# Hash is of the form {'property name' => 'value'}
|
39
|
+
# [element] The itemprop element to be parsed
|
40
|
+
# [page_url] The url of the page, including filename, used to form
|
41
|
+
# absolute urls
|
42
|
+
def self.parse(element, page_url=nil)
|
43
|
+
self.new(element, page_url).property
|
44
|
+
end
|
45
|
+
|
46
|
+
def link?
|
47
|
+
LINK_ELEMENTS.include?(@element.name)
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
def extract_properties
|
52
|
+
prop_names = extract_property_names
|
53
|
+
prop_names.each_with_object({}) do |name, memo|
|
54
|
+
memo[name] = extract_property
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def extract_links
|
59
|
+
rel_names = extract_rel_names
|
60
|
+
rel_names.each_with_object({}) do |name, memo|
|
61
|
+
memo[name] = extract_property_value
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# This returns an empty string if can't form a valid
|
66
|
+
# absolute url as per the Microdata spec.
|
67
|
+
def make_absolute_url(url)
|
68
|
+
return url unless URI.parse(url).relative?
|
69
|
+
begin
|
70
|
+
URI.parse(@page_url).merge(url).to_s
|
71
|
+
rescue URI::Error
|
72
|
+
url
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def non_textcontent_element?(element)
|
77
|
+
NON_TEXTCONTENT_ELEMENTS.has_key?(element)
|
78
|
+
end
|
79
|
+
|
80
|
+
def url_attribute?(attribute)
|
81
|
+
URL_ATTRIBUTES.include?(attribute)
|
82
|
+
end
|
83
|
+
|
84
|
+
def extract_property_names
|
85
|
+
itemprop_attr = @element.attribute('itemprop')
|
86
|
+
itemprop_attr ? itemprop_attr.value.split : []
|
87
|
+
end
|
88
|
+
|
89
|
+
def extract_rel_names
|
90
|
+
link_rel = @element.attribute('rel')
|
91
|
+
link? && link_rel ? link_rel.value.split : []
|
92
|
+
end
|
93
|
+
|
94
|
+
def extract_property_value
|
95
|
+
element = @element.name
|
96
|
+
if non_textcontent_element?(element)
|
97
|
+
attribute = NON_TEXTCONTENT_ELEMENTS[element]
|
98
|
+
value = @element.attribute(attribute).value
|
99
|
+
url_attribute?(attribute) ? make_absolute_url(value) : value
|
100
|
+
else
|
101
|
+
@element.inner_text.strip
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def extract_property
|
106
|
+
if @element.attribute('itemscope')
|
107
|
+
Item.new(@element, @page_url)
|
108
|
+
else
|
109
|
+
extract_property_value
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
class Property
|
3
|
+
attr_reader :value, :names, :rels
|
4
|
+
|
5
|
+
def initialize(value, names, rels = [])
|
6
|
+
@value = value
|
7
|
+
@names = names
|
8
|
+
@rels = rels
|
9
|
+
end
|
10
|
+
|
11
|
+
def item
|
12
|
+
@value if @value.is_a?(Item)
|
13
|
+
end
|
14
|
+
|
15
|
+
def item?
|
16
|
+
!!item
|
17
|
+
end
|
18
|
+
|
19
|
+
def link?
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
23
|
+
def submit_button?
|
24
|
+
false
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
module Rails
|
3
|
+
module HtmlBasedJsonRenderer
|
4
|
+
def initialize
|
5
|
+
super
|
6
|
+
@_render_based_html_options = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def set_location(location)
|
10
|
+
location_url = url_for(location)
|
11
|
+
@_render_based_html_options[:location] = location_url
|
12
|
+
response.headers['Content-Location'] = location_url
|
13
|
+
end
|
14
|
+
|
15
|
+
def set_profile_path(path)
|
16
|
+
@_render_based_html_options[:profile_path] = view_context.path_to_asset(path)
|
17
|
+
end
|
18
|
+
|
19
|
+
def render_based_html(*args)
|
20
|
+
lookup_context.formats.first
|
21
|
+
if m = lookup_context.formats.first.to_s.match(/json$/)
|
22
|
+
json_format = m.pre_match.to_sym
|
23
|
+
json = Hypermicrodata::Extract.new(render_to_string(formats: :html), @_render_based_html_options).to_json(json_format)
|
24
|
+
render(json: json)
|
25
|
+
else
|
26
|
+
render(*args)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def default_render(*args)
|
31
|
+
render_based_html(*args)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
module Serializer
|
3
|
+
class Base
|
4
|
+
def initialize(document, location = nil, profile_path = nil)
|
5
|
+
@document = document
|
6
|
+
@location = location
|
7
|
+
@profile_path = profile_path
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_json(options = {})
|
11
|
+
MultiJson.dump(serialize, options)
|
12
|
+
end
|
13
|
+
|
14
|
+
def serialize
|
15
|
+
# return hash or array suitable for application/json
|
16
|
+
if @document.items
|
17
|
+
@document.items.map(&:to_hash)
|
18
|
+
else
|
19
|
+
[]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
module Serializer
|
3
|
+
class Hal < Base
|
4
|
+
def serialize
|
5
|
+
items = @document.items
|
6
|
+
if items.length == 1
|
7
|
+
hal_resource = item_to_resource(items.first, @location)
|
8
|
+
else
|
9
|
+
hal_resource = Halibut::Core::Resource.new(@location)
|
10
|
+
items.each do |item|
|
11
|
+
embedded_resource = item_to_resource(item)
|
12
|
+
item.type.each do |type|
|
13
|
+
hal_resource.add_embedded_resource(type, embedded_resource)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
hal_resource.add_link('profile', @profile_path) if @profile_path
|
18
|
+
hal_resource.to_hash
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def item_to_resource(item, self_url = nil)
|
23
|
+
resource = Halibut::Core::Resource.new(self_url)
|
24
|
+
item.properties.each do |name, same_name_properties|
|
25
|
+
same_name_properties.each do |property|
|
26
|
+
if property.item
|
27
|
+
subresource = item_to_resource(property.item)
|
28
|
+
resource.add_embedded_resource(name, subresource)
|
29
|
+
else
|
30
|
+
resource.set_property(name, property.value)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
resource.add_link('self', item.id) if item.id
|
35
|
+
Array(item.type).each do |type|
|
36
|
+
resource.add_link('type', type)
|
37
|
+
end
|
38
|
+
item.links.each do |rel, same_rel_links|
|
39
|
+
same_rel_links.each do |link|
|
40
|
+
resource.add_link(rel, link.value)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
resource
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
module Serializer
|
3
|
+
class Jsonld < Base
|
4
|
+
|
5
|
+
end
|
6
|
+
|
7
|
+
# json-ld patch
|
8
|
+
# HTMLのリンクセマンティクスを保存できてないなぁ
|
9
|
+
# JSON-LDにするのやめるべきかも。いろいろめんどくさい。
|
10
|
+
module JsonldSerializer
|
11
|
+
def to_hash
|
12
|
+
hash = {}
|
13
|
+
hash[:id] = unwrap(id) if id
|
14
|
+
re_schema_org = %r|^http://schema\.org/|i
|
15
|
+
if type.all?{|t| t.match(re_schema_org) }
|
16
|
+
hash['@context'] = 'http://schema.org'
|
17
|
+
hash['@type'] = unwrap(type.map{|t| t.sub(re_schema_org, '') })
|
18
|
+
else
|
19
|
+
hash['@type'] = unwrap(type)
|
20
|
+
end
|
21
|
+
properties.each do |name, values|
|
22
|
+
final_values = values.map do |value|
|
23
|
+
if value.is_a?(Hypermicrodata::Item)
|
24
|
+
value.to_hash
|
25
|
+
else
|
26
|
+
value
|
27
|
+
end
|
28
|
+
end
|
29
|
+
hash[name] = unwrap(final_values)
|
30
|
+
end
|
31
|
+
hash
|
32
|
+
end
|
33
|
+
|
34
|
+
def unwrap(values)
|
35
|
+
if values.is_a?(Array) && values.length == 1
|
36
|
+
values.first
|
37
|
+
else
|
38
|
+
values
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
# Hypermicrodata::Item.send :prepend, JsonldSerializer
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
module Serializer
|
3
|
+
class Uber < Base
|
4
|
+
ACTION_MAPPINGS = {
|
5
|
+
'GET' => 'read',
|
6
|
+
'POST' => 'append',
|
7
|
+
'PUT' => 'replace',
|
8
|
+
'DELETE' => 'remove',
|
9
|
+
'PATCH' => 'partial'
|
10
|
+
}.freeze
|
11
|
+
|
12
|
+
def serialize
|
13
|
+
items = @document.items
|
14
|
+
if items.length == 1
|
15
|
+
root_data = item_to_nested_data(items.first)
|
16
|
+
if @location
|
17
|
+
root_data.rel = 'self'
|
18
|
+
root_data.url = @location
|
19
|
+
end
|
20
|
+
root_data.add_link('profile', @profile_path) if @profile_path
|
21
|
+
uber = Uberous::Uber.new([root_data])
|
22
|
+
else
|
23
|
+
data_collection = @document.items.map do |item|
|
24
|
+
item_to_nested_data(item).tap do |data|
|
25
|
+
data.name = generate_short_name(item.type) if item.type
|
26
|
+
end
|
27
|
+
end
|
28
|
+
uber = Uberous::Uber.new(data_collection)
|
29
|
+
uber.add_link('self', @location) if @location
|
30
|
+
uber.add_link('profile', @profile_path) if @profile_path
|
31
|
+
end
|
32
|
+
uber.to_hash
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
def item_to_nested_data(item, self_name = nil)
|
37
|
+
parent_data = Uberous::Data.new
|
38
|
+
if item.id
|
39
|
+
parent_data.url = item.id
|
40
|
+
parent_data.rel = self_name if self_name # consider a link relation
|
41
|
+
parent_data.name = generate_short_name(item.type) if item.type
|
42
|
+
else
|
43
|
+
parent_data.name = self_name if self_name # consider a semantic descriptor
|
44
|
+
end
|
45
|
+
item.all_properties_and_links.each do |property|
|
46
|
+
rel = property.rels.join(' ') unless property.rels.empty?
|
47
|
+
if property.item?
|
48
|
+
# TODO: name複数の場合のduplicateをなくす
|
49
|
+
property.names.each do |name|
|
50
|
+
child_data = item_to_nested_data(property.item, name)
|
51
|
+
parent_data.add_data(child_data)
|
52
|
+
end
|
53
|
+
# itemかつlinkというのはたぶんない
|
54
|
+
elsif property.submit_button?
|
55
|
+
attrs = { rel: rel, url: property.action_url, model: property.query_string, action: action_name(property.method) }
|
56
|
+
attrs[:model] = "?#{attrs[:model]}" if %w(read remove).include?(attrs[:action])
|
57
|
+
attrs.reject! { |_, value| value.nil? }
|
58
|
+
if property.names.empty?
|
59
|
+
child_data = Uberous::Data.new(attrs)
|
60
|
+
parent_data.add_data(child_data)
|
61
|
+
else
|
62
|
+
property.names.each do |name|
|
63
|
+
child_data = Uberous::Data.new(attrs.merge(name: name))
|
64
|
+
parent_data.add_data(child_data)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
elsif property.link?
|
68
|
+
attrs = { rel: rel, url: property.value }
|
69
|
+
attrs.reject! { |_, value| value.nil? }
|
70
|
+
if property.names.empty?
|
71
|
+
child_data = Uberous::Data.new(attrs)
|
72
|
+
parent_data.add_data(child_data)
|
73
|
+
else
|
74
|
+
property.names.each do |name|
|
75
|
+
child_data = Uberous::Data.new(attrs.merge(name: name))
|
76
|
+
parent_data.add_data(child_data)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
else # only value
|
80
|
+
property.names.each do |name|
|
81
|
+
child_data = Uberous::Data.new(name: name, value: property.value)
|
82
|
+
parent_data.add_data(child_data)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
parent_data
|
88
|
+
end
|
89
|
+
|
90
|
+
def generate_short_name(item_types)
|
91
|
+
# TODO: これでいいのか?
|
92
|
+
Array(item_types).first.sub(%r|^http://schema\.org/|, '') if item_types
|
93
|
+
end
|
94
|
+
|
95
|
+
def action_name(method_name)
|
96
|
+
ACTION_MAPPINGS[method_name.to_s.upcase] || method_name.to_s.downcase
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
class SubmitButton < Property
|
3
|
+
attr_reader :form, :method
|
4
|
+
|
5
|
+
def initialize(button, form)
|
6
|
+
@button = button
|
7
|
+
@form = form.dup
|
8
|
+
@excluded_fields = {}
|
9
|
+
setup!
|
10
|
+
end
|
11
|
+
|
12
|
+
def value
|
13
|
+
"#{action_url}?#{query_string}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def action_url
|
17
|
+
@form.action
|
18
|
+
end
|
19
|
+
|
20
|
+
def params
|
21
|
+
@form.build_query
|
22
|
+
end
|
23
|
+
|
24
|
+
def query_string
|
25
|
+
build_query_string(params)
|
26
|
+
end
|
27
|
+
|
28
|
+
def names
|
29
|
+
(@button.node['itemprop'] || '').split(' ')
|
30
|
+
end
|
31
|
+
|
32
|
+
def rels
|
33
|
+
rel = (@button.node['rel'] || @button.node['data-rel'] || @button.dom_class || '')
|
34
|
+
rel.split(' ')
|
35
|
+
end
|
36
|
+
|
37
|
+
def item
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def link?
|
42
|
+
true
|
43
|
+
end
|
44
|
+
|
45
|
+
def submit_button?
|
46
|
+
true
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def setup!
|
51
|
+
if method_field = @form.fields.find { |f| f.name == '_method' }
|
52
|
+
# overload POST
|
53
|
+
@method = method_field.value.upcase
|
54
|
+
@excluded_fields['_method'] = method_field
|
55
|
+
else
|
56
|
+
@method = @form.method
|
57
|
+
end
|
58
|
+
@form.add_button_to_query(@button) # formをdupしているのでOK
|
59
|
+
end
|
60
|
+
|
61
|
+
def template_fields
|
62
|
+
@template_fields ||= begin
|
63
|
+
fields = @form.fields.reject {|field| field.is_a?(Mechanize::Form::Hidden) }
|
64
|
+
Hash[fields.map {|field| [field.name, field] }]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def build_query_string(parameters)
|
69
|
+
parameters.map do |name, value|
|
70
|
+
if field = template_fields[name]
|
71
|
+
[CGI.escape(name.to_s), "{#{field.type}}"].join("=")
|
72
|
+
elsif !@excluded_fields[name]
|
73
|
+
# WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
|
74
|
+
[CGI.escape(name.to_s), CGI.escape(value.to_s)].join("=")
|
75
|
+
end
|
76
|
+
end.compact.join('&')
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class FormParser
|
81
|
+
attr_reader :submit_buttons
|
82
|
+
|
83
|
+
def initialize(element, page_url = nil)
|
84
|
+
@element, @page_url = element, page_url
|
85
|
+
form = Mechanize::Form.new(element)
|
86
|
+
@submit_buttons = form.submits.map do |button|
|
87
|
+
SubmitButton.new(button, form)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.parse(element, page_url = nil)
|
92
|
+
self.new(element, page_url).submit_buttons
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Patch for bug
|
98
|
+
Mechanize::Form.class_eval do
|
99
|
+
# Returns all buttons of type Submit
|
100
|
+
def submits
|
101
|
+
@submits ||= buttons.select {|f|
|
102
|
+
f.class == Mechanize::Form::Submit || (f.class == Mechanize::Form::Button && (f.type.nil? || f.type == 'submit'))
|
103
|
+
}
|
104
|
+
end
|
105
|
+
end
|