hypermicrodata 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +8 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +100 -0
- data/Rakefile +10 -0
- data/bin/hypermicrodata.rb +25 -0
- data/hypermicrodata.gemspec +28 -0
- data/lib/hypermicrodata.rb +37 -0
- data/lib/hypermicrodata/document.rb +27 -0
- data/lib/hypermicrodata/extract.rb +22 -0
- data/lib/hypermicrodata/item.rb +113 -0
- data/lib/hypermicrodata/itemprop_parser.rb +114 -0
- data/lib/hypermicrodata/link.rb +7 -0
- data/lib/hypermicrodata/property.rb +27 -0
- data/lib/hypermicrodata/rails/html_based_json_renderer.rb +35 -0
- data/lib/hypermicrodata/serializer/base.rb +24 -0
- data/lib/hypermicrodata/serializer/hal.rb +47 -0
- data/lib/hypermicrodata/serializer/jsonld.rb +44 -0
- data/lib/hypermicrodata/serializer/uber.rb +100 -0
- data/lib/hypermicrodata/submit_button.rb +105 -0
- data/lib/hypermicrodata/version.rb +3 -0
- data/lib/uberous/uber.rb +104 -0
- data/test/data/example.html +22 -0
- data/test/data/example_itemref.html +16 -0
- data/test/data/example_with_no_itemscope.html +22 -0
- data/test/test_helper.rb +3 -0
- data/test/test_itemref.rb +19 -0
- data/test/test_json.rb +15 -0
- data/test/test_parse.rb +36 -0
- metadata +139 -0
@@ -0,0 +1,114 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
# Class that parses itemprop elements
|
3
|
+
class ItempropParser
|
4
|
+
|
5
|
+
NON_TEXTCONTENT_ELEMENTS = {
|
6
|
+
'a' => 'href', 'area' => 'href',
|
7
|
+
'audio' => 'src', 'embed' => 'src',
|
8
|
+
'iframe' => 'src', 'img' => 'src',
|
9
|
+
'link' => 'href', 'meta' => 'content',
|
10
|
+
'object' => 'data', 'source' => 'src',
|
11
|
+
'time' => 'datetime', 'track' => 'src',
|
12
|
+
'video' => 'src'
|
13
|
+
}
|
14
|
+
|
15
|
+
LINK_ELEMENTS = ['a', 'area', 'audio', 'embed', 'iframe',
|
16
|
+
'img', 'link', 'source', 'track', 'video']
|
17
|
+
|
18
|
+
URL_ATTRIBUTES = ['data', 'href', 'src']
|
19
|
+
|
20
|
+
# A Hash representing the properties.
|
21
|
+
# Hash is of the form {'property name' => 'value'}
|
22
|
+
attr_reader :property
|
23
|
+
|
24
|
+
# Create a new Itemprop object
|
25
|
+
# [element] The itemprop element to be parsed
|
26
|
+
# [page_url] The url of the page, including filename, used to form
|
27
|
+
# absolute urls
|
28
|
+
def initialize(element, page_url=nil)
|
29
|
+
@element, @page_url = element, page_url
|
30
|
+
if link?
|
31
|
+
@property = Link.new(extract_property, extract_property_names, extract_rel_names)
|
32
|
+
else
|
33
|
+
@property = Property.new(extract_property, extract_property_names, extract_rel_names)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Parse the element and return a hash representing the properties.
|
38
|
+
# Hash is of the form {'property name' => 'value'}
|
39
|
+
# [element] The itemprop element to be parsed
|
40
|
+
# [page_url] The url of the page, including filename, used to form
|
41
|
+
# absolute urls
|
42
|
+
def self.parse(element, page_url=nil)
|
43
|
+
self.new(element, page_url).property
|
44
|
+
end
|
45
|
+
|
46
|
+
def link?
|
47
|
+
LINK_ELEMENTS.include?(@element.name)
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
def extract_properties
|
52
|
+
prop_names = extract_property_names
|
53
|
+
prop_names.each_with_object({}) do |name, memo|
|
54
|
+
memo[name] = extract_property
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def extract_links
|
59
|
+
rel_names = extract_rel_names
|
60
|
+
rel_names.each_with_object({}) do |name, memo|
|
61
|
+
memo[name] = extract_property_value
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# This returns an empty string if can't form a valid
|
66
|
+
# absolute url as per the Microdata spec.
|
67
|
+
def make_absolute_url(url)
|
68
|
+
return url unless URI.parse(url).relative?
|
69
|
+
begin
|
70
|
+
URI.parse(@page_url).merge(url).to_s
|
71
|
+
rescue URI::Error
|
72
|
+
url
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def non_textcontent_element?(element)
|
77
|
+
NON_TEXTCONTENT_ELEMENTS.has_key?(element)
|
78
|
+
end
|
79
|
+
|
80
|
+
def url_attribute?(attribute)
|
81
|
+
URL_ATTRIBUTES.include?(attribute)
|
82
|
+
end
|
83
|
+
|
84
|
+
def extract_property_names
|
85
|
+
itemprop_attr = @element.attribute('itemprop')
|
86
|
+
itemprop_attr ? itemprop_attr.value.split : []
|
87
|
+
end
|
88
|
+
|
89
|
+
def extract_rel_names
|
90
|
+
link_rel = @element.attribute('rel')
|
91
|
+
link? && link_rel ? link_rel.value.split : []
|
92
|
+
end
|
93
|
+
|
94
|
+
def extract_property_value
|
95
|
+
element = @element.name
|
96
|
+
if non_textcontent_element?(element)
|
97
|
+
attribute = NON_TEXTCONTENT_ELEMENTS[element]
|
98
|
+
value = @element.attribute(attribute).value
|
99
|
+
url_attribute?(attribute) ? make_absolute_url(value) : value
|
100
|
+
else
|
101
|
+
@element.inner_text.strip
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def extract_property
|
106
|
+
if @element.attribute('itemscope')
|
107
|
+
Item.new(@element, @page_url)
|
108
|
+
else
|
109
|
+
extract_property_value
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
class Property
|
3
|
+
attr_reader :value, :names, :rels
|
4
|
+
|
5
|
+
def initialize(value, names, rels = [])
|
6
|
+
@value = value
|
7
|
+
@names = names
|
8
|
+
@rels = rels
|
9
|
+
end
|
10
|
+
|
11
|
+
def item
|
12
|
+
@value if @value.is_a?(Item)
|
13
|
+
end
|
14
|
+
|
15
|
+
def item?
|
16
|
+
!!item
|
17
|
+
end
|
18
|
+
|
19
|
+
def link?
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
23
|
+
def submit_button?
|
24
|
+
false
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
module Rails
|
3
|
+
module HtmlBasedJsonRenderer
|
4
|
+
def initialize
|
5
|
+
super
|
6
|
+
@_render_based_html_options = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def set_location(location)
|
10
|
+
location_url = url_for(location)
|
11
|
+
@_render_based_html_options[:location] = location_url
|
12
|
+
response.headers['Content-Location'] = location_url
|
13
|
+
end
|
14
|
+
|
15
|
+
def set_profile_path(path)
|
16
|
+
@_render_based_html_options[:profile_path] = view_context.path_to_asset(path)
|
17
|
+
end
|
18
|
+
|
19
|
+
def render_based_html(*args)
|
20
|
+
lookup_context.formats.first
|
21
|
+
if m = lookup_context.formats.first.to_s.match(/json$/)
|
22
|
+
json_format = m.pre_match.to_sym
|
23
|
+
json = Hypermicrodata::Extract.new(render_to_string(formats: :html), @_render_based_html_options).to_json(json_format)
|
24
|
+
render(json: json)
|
25
|
+
else
|
26
|
+
render(*args)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def default_render(*args)
|
31
|
+
render_based_html(*args)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
module Serializer
|
3
|
+
class Base
|
4
|
+
def initialize(document, location = nil, profile_path = nil)
|
5
|
+
@document = document
|
6
|
+
@location = location
|
7
|
+
@profile_path = profile_path
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_json(options = {})
|
11
|
+
MultiJson.dump(serialize, options)
|
12
|
+
end
|
13
|
+
|
14
|
+
def serialize
|
15
|
+
# return hash or array suitable for application/json
|
16
|
+
if @document.items
|
17
|
+
@document.items.map(&:to_hash)
|
18
|
+
else
|
19
|
+
[]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
module Serializer
|
3
|
+
class Hal < Base
|
4
|
+
def serialize
|
5
|
+
items = @document.items
|
6
|
+
if items.length == 1
|
7
|
+
hal_resource = item_to_resource(items.first, @location)
|
8
|
+
else
|
9
|
+
hal_resource = Halibut::Core::Resource.new(@location)
|
10
|
+
items.each do |item|
|
11
|
+
embedded_resource = item_to_resource(item)
|
12
|
+
item.type.each do |type|
|
13
|
+
hal_resource.add_embedded_resource(type, embedded_resource)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
hal_resource.add_link('profile', @profile_path) if @profile_path
|
18
|
+
hal_resource.to_hash
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def item_to_resource(item, self_url = nil)
|
23
|
+
resource = Halibut::Core::Resource.new(self_url)
|
24
|
+
item.properties.each do |name, same_name_properties|
|
25
|
+
same_name_properties.each do |property|
|
26
|
+
if property.item
|
27
|
+
subresource = item_to_resource(property.item)
|
28
|
+
resource.add_embedded_resource(name, subresource)
|
29
|
+
else
|
30
|
+
resource.set_property(name, property.value)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
resource.add_link('self', item.id) if item.id
|
35
|
+
Array(item.type).each do |type|
|
36
|
+
resource.add_link('type', type)
|
37
|
+
end
|
38
|
+
item.links.each do |rel, same_rel_links|
|
39
|
+
same_rel_links.each do |link|
|
40
|
+
resource.add_link(rel, link.value)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
resource
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
module Serializer
|
3
|
+
class Jsonld < Base
|
4
|
+
|
5
|
+
end
|
6
|
+
|
7
|
+
# json-ld patch
|
8
|
+
# HTMLのリンクセマンティクスを保存できてないなぁ
|
9
|
+
# JSON-LDにするのやめるべきかも。いろいろめんどくさい。
|
10
|
+
module JsonldSerializer
|
11
|
+
def to_hash
|
12
|
+
hash = {}
|
13
|
+
hash[:id] = unwrap(id) if id
|
14
|
+
re_schema_org = %r|^http://schema\.org/|i
|
15
|
+
if type.all?{|t| t.match(re_schema_org) }
|
16
|
+
hash['@context'] = 'http://schema.org'
|
17
|
+
hash['@type'] = unwrap(type.map{|t| t.sub(re_schema_org, '') })
|
18
|
+
else
|
19
|
+
hash['@type'] = unwrap(type)
|
20
|
+
end
|
21
|
+
properties.each do |name, values|
|
22
|
+
final_values = values.map do |value|
|
23
|
+
if value.is_a?(Hypermicrodata::Item)
|
24
|
+
value.to_hash
|
25
|
+
else
|
26
|
+
value
|
27
|
+
end
|
28
|
+
end
|
29
|
+
hash[name] = unwrap(final_values)
|
30
|
+
end
|
31
|
+
hash
|
32
|
+
end
|
33
|
+
|
34
|
+
def unwrap(values)
|
35
|
+
if values.is_a?(Array) && values.length == 1
|
36
|
+
values.first
|
37
|
+
else
|
38
|
+
values
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
# Hypermicrodata::Item.send :prepend, JsonldSerializer
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
module Serializer
|
3
|
+
class Uber < Base
|
4
|
+
ACTION_MAPPINGS = {
|
5
|
+
'GET' => 'read',
|
6
|
+
'POST' => 'append',
|
7
|
+
'PUT' => 'replace',
|
8
|
+
'DELETE' => 'remove',
|
9
|
+
'PATCH' => 'partial'
|
10
|
+
}.freeze
|
11
|
+
|
12
|
+
def serialize
|
13
|
+
items = @document.items
|
14
|
+
if items.length == 1
|
15
|
+
root_data = item_to_nested_data(items.first)
|
16
|
+
if @location
|
17
|
+
root_data.rel = 'self'
|
18
|
+
root_data.url = @location
|
19
|
+
end
|
20
|
+
root_data.add_link('profile', @profile_path) if @profile_path
|
21
|
+
uber = Uberous::Uber.new([root_data])
|
22
|
+
else
|
23
|
+
data_collection = @document.items.map do |item|
|
24
|
+
item_to_nested_data(item).tap do |data|
|
25
|
+
data.name = generate_short_name(item.type) if item.type
|
26
|
+
end
|
27
|
+
end
|
28
|
+
uber = Uberous::Uber.new(data_collection)
|
29
|
+
uber.add_link('self', @location) if @location
|
30
|
+
uber.add_link('profile', @profile_path) if @profile_path
|
31
|
+
end
|
32
|
+
uber.to_hash
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
def item_to_nested_data(item, self_name = nil)
|
37
|
+
parent_data = Uberous::Data.new
|
38
|
+
if item.id
|
39
|
+
parent_data.url = item.id
|
40
|
+
parent_data.rel = self_name if self_name # consider a link relation
|
41
|
+
parent_data.name = generate_short_name(item.type) if item.type
|
42
|
+
else
|
43
|
+
parent_data.name = self_name if self_name # consider a semantic descriptor
|
44
|
+
end
|
45
|
+
item.all_properties_and_links.each do |property|
|
46
|
+
rel = property.rels.join(' ') unless property.rels.empty?
|
47
|
+
if property.item?
|
48
|
+
# TODO: name複数の場合のduplicateをなくす
|
49
|
+
property.names.each do |name|
|
50
|
+
child_data = item_to_nested_data(property.item, name)
|
51
|
+
parent_data.add_data(child_data)
|
52
|
+
end
|
53
|
+
# itemかつlinkというのはたぶんない
|
54
|
+
elsif property.submit_button?
|
55
|
+
attrs = { rel: rel, url: property.action_url, model: property.query_string, action: action_name(property.method) }
|
56
|
+
attrs[:model] = "?#{attrs[:model]}" if %w(read remove).include?(attrs[:action])
|
57
|
+
attrs.reject! { |_, value| value.nil? }
|
58
|
+
if property.names.empty?
|
59
|
+
child_data = Uberous::Data.new(attrs)
|
60
|
+
parent_data.add_data(child_data)
|
61
|
+
else
|
62
|
+
property.names.each do |name|
|
63
|
+
child_data = Uberous::Data.new(attrs.merge(name: name))
|
64
|
+
parent_data.add_data(child_data)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
elsif property.link?
|
68
|
+
attrs = { rel: rel, url: property.value }
|
69
|
+
attrs.reject! { |_, value| value.nil? }
|
70
|
+
if property.names.empty?
|
71
|
+
child_data = Uberous::Data.new(attrs)
|
72
|
+
parent_data.add_data(child_data)
|
73
|
+
else
|
74
|
+
property.names.each do |name|
|
75
|
+
child_data = Uberous::Data.new(attrs.merge(name: name))
|
76
|
+
parent_data.add_data(child_data)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
else # only value
|
80
|
+
property.names.each do |name|
|
81
|
+
child_data = Uberous::Data.new(name: name, value: property.value)
|
82
|
+
parent_data.add_data(child_data)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
parent_data
|
88
|
+
end
|
89
|
+
|
90
|
+
def generate_short_name(item_types)
|
91
|
+
# TODO: これでいいのか?
|
92
|
+
Array(item_types).first.sub(%r|^http://schema\.org/|, '') if item_types
|
93
|
+
end
|
94
|
+
|
95
|
+
def action_name(method_name)
|
96
|
+
ACTION_MAPPINGS[method_name.to_s.upcase] || method_name.to_s.downcase
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Hypermicrodata
|
2
|
+
class SubmitButton < Property
|
3
|
+
attr_reader :form, :method
|
4
|
+
|
5
|
+
def initialize(button, form)
|
6
|
+
@button = button
|
7
|
+
@form = form.dup
|
8
|
+
@excluded_fields = {}
|
9
|
+
setup!
|
10
|
+
end
|
11
|
+
|
12
|
+
def value
|
13
|
+
"#{action_url}?#{query_string}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def action_url
|
17
|
+
@form.action
|
18
|
+
end
|
19
|
+
|
20
|
+
def params
|
21
|
+
@form.build_query
|
22
|
+
end
|
23
|
+
|
24
|
+
def query_string
|
25
|
+
build_query_string(params)
|
26
|
+
end
|
27
|
+
|
28
|
+
def names
|
29
|
+
(@button.node['itemprop'] || '').split(' ')
|
30
|
+
end
|
31
|
+
|
32
|
+
def rels
|
33
|
+
rel = (@button.node['rel'] || @button.node['data-rel'] || @button.dom_class || '')
|
34
|
+
rel.split(' ')
|
35
|
+
end
|
36
|
+
|
37
|
+
def item
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def link?
|
42
|
+
true
|
43
|
+
end
|
44
|
+
|
45
|
+
def submit_button?
|
46
|
+
true
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def setup!
|
51
|
+
if method_field = @form.fields.find { |f| f.name == '_method' }
|
52
|
+
# overload POST
|
53
|
+
@method = method_field.value.upcase
|
54
|
+
@excluded_fields['_method'] = method_field
|
55
|
+
else
|
56
|
+
@method = @form.method
|
57
|
+
end
|
58
|
+
@form.add_button_to_query(@button) # formをdupしているのでOK
|
59
|
+
end
|
60
|
+
|
61
|
+
def template_fields
|
62
|
+
@template_fields ||= begin
|
63
|
+
fields = @form.fields.reject {|field| field.is_a?(Mechanize::Form::Hidden) }
|
64
|
+
Hash[fields.map {|field| [field.name, field] }]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def build_query_string(parameters)
|
69
|
+
parameters.map do |name, value|
|
70
|
+
if field = template_fields[name]
|
71
|
+
[CGI.escape(name.to_s), "{#{field.type}}"].join("=")
|
72
|
+
elsif !@excluded_fields[name]
|
73
|
+
# WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
|
74
|
+
[CGI.escape(name.to_s), CGI.escape(value.to_s)].join("=")
|
75
|
+
end
|
76
|
+
end.compact.join('&')
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class FormParser
|
81
|
+
attr_reader :submit_buttons
|
82
|
+
|
83
|
+
def initialize(element, page_url = nil)
|
84
|
+
@element, @page_url = element, page_url
|
85
|
+
form = Mechanize::Form.new(element)
|
86
|
+
@submit_buttons = form.submits.map do |button|
|
87
|
+
SubmitButton.new(button, form)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.parse(element, page_url = nil)
|
92
|
+
self.new(element, page_url).submit_buttons
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Patch for bug
|
98
|
+
Mechanize::Form.class_eval do
|
99
|
+
# Returns all buttons of type Submit
|
100
|
+
def submits
|
101
|
+
@submits ||= buttons.select {|f|
|
102
|
+
f.class == Mechanize::Form::Submit || (f.class == Mechanize::Form::Button && (f.type.nil? || f.type == 'submit'))
|
103
|
+
}
|
104
|
+
end
|
105
|
+
end
|