hypermicrodata 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,114 @@
1
+ module Hypermicrodata
2
+ # Class that parses itemprop elements
3
+ class ItempropParser
4
+
5
+ NON_TEXTCONTENT_ELEMENTS = {
6
+ 'a' => 'href', 'area' => 'href',
7
+ 'audio' => 'src', 'embed' => 'src',
8
+ 'iframe' => 'src', 'img' => 'src',
9
+ 'link' => 'href', 'meta' => 'content',
10
+ 'object' => 'data', 'source' => 'src',
11
+ 'time' => 'datetime', 'track' => 'src',
12
+ 'video' => 'src'
13
+ }
14
+
15
+ LINK_ELEMENTS = ['a', 'area', 'audio', 'embed', 'iframe',
16
+ 'img', 'link', 'source', 'track', 'video']
17
+
18
+ URL_ATTRIBUTES = ['data', 'href', 'src']
19
+
20
+ # A Hash representing the properties.
21
+ # Hash is of the form {'property name' => 'value'}
22
+ attr_reader :property
23
+
24
+ # Create a new Itemprop object
25
+ # [element] The itemprop element to be parsed
26
+ # [page_url] The url of the page, including filename, used to form
27
+ # absolute urls
28
+ def initialize(element, page_url=nil)
29
+ @element, @page_url = element, page_url
30
+ if link?
31
+ @property = Link.new(extract_property, extract_property_names, extract_rel_names)
32
+ else
33
+ @property = Property.new(extract_property, extract_property_names, extract_rel_names)
34
+ end
35
+ end
36
+
37
+ # Parse the element and return a hash representing the properties.
38
+ # Hash is of the form {'property name' => 'value'}
39
+ # [element] The itemprop element to be parsed
40
+ # [page_url] The url of the page, including filename, used to form
41
+ # absolute urls
42
+ def self.parse(element, page_url=nil)
43
+ self.new(element, page_url).property
44
+ end
45
+
46
+ def link?
47
+ LINK_ELEMENTS.include?(@element.name)
48
+ end
49
+
50
+ private
51
+ def extract_properties
52
+ prop_names = extract_property_names
53
+ prop_names.each_with_object({}) do |name, memo|
54
+ memo[name] = extract_property
55
+ end
56
+ end
57
+
58
+ def extract_links
59
+ rel_names = extract_rel_names
60
+ rel_names.each_with_object({}) do |name, memo|
61
+ memo[name] = extract_property_value
62
+ end
63
+ end
64
+
65
+ # This returns an empty string if can't form a valid
66
+ # absolute url as per the Microdata spec.
67
+ def make_absolute_url(url)
68
+ return url unless URI.parse(url).relative?
69
+ begin
70
+ URI.parse(@page_url).merge(url).to_s
71
+ rescue URI::Error
72
+ url
73
+ end
74
+ end
75
+
76
+ def non_textcontent_element?(element)
77
+ NON_TEXTCONTENT_ELEMENTS.has_key?(element)
78
+ end
79
+
80
+ def url_attribute?(attribute)
81
+ URL_ATTRIBUTES.include?(attribute)
82
+ end
83
+
84
+ def extract_property_names
85
+ itemprop_attr = @element.attribute('itemprop')
86
+ itemprop_attr ? itemprop_attr.value.split : []
87
+ end
88
+
89
+ def extract_rel_names
90
+ link_rel = @element.attribute('rel')
91
+ link? && link_rel ? link_rel.value.split : []
92
+ end
93
+
94
+ def extract_property_value
95
+ element = @element.name
96
+ if non_textcontent_element?(element)
97
+ attribute = NON_TEXTCONTENT_ELEMENTS[element]
98
+ value = @element.attribute(attribute).value
99
+ url_attribute?(attribute) ? make_absolute_url(value) : value
100
+ else
101
+ @element.inner_text.strip
102
+ end
103
+ end
104
+
105
+ def extract_property
106
+ if @element.attribute('itemscope')
107
+ Item.new(@element, @page_url)
108
+ else
109
+ extract_property_value
110
+ end
111
+ end
112
+
113
+ end
114
+ end
@@ -0,0 +1,7 @@
1
+ module Hypermicrodata
2
+ class Link < Property
3
+ def link?
4
+ true
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,27 @@
1
+ module Hypermicrodata
2
+ class Property
3
+ attr_reader :value, :names, :rels
4
+
5
+ def initialize(value, names, rels = [])
6
+ @value = value
7
+ @names = names
8
+ @rels = rels
9
+ end
10
+
11
+ def item
12
+ @value if @value.is_a?(Item)
13
+ end
14
+
15
+ def item?
16
+ !!item
17
+ end
18
+
19
+ def link?
20
+ false
21
+ end
22
+
23
+ def submit_button?
24
+ false
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,35 @@
1
+ module Hypermicrodata
2
+ module Rails
3
+ module HtmlBasedJsonRenderer
4
+ def initialize
5
+ super
6
+ @_render_based_html_options = {}
7
+ end
8
+
9
+ def set_location(location)
10
+ location_url = url_for(location)
11
+ @_render_based_html_options[:location] = location_url
12
+ response.headers['Content-Location'] = location_url
13
+ end
14
+
15
+ def set_profile_path(path)
16
+ @_render_based_html_options[:profile_path] = view_context.path_to_asset(path)
17
+ end
18
+
19
+ def render_based_html(*args)
20
+ lookup_context.formats.first
21
+ if m = lookup_context.formats.first.to_s.match(/json$/)
22
+ json_format = m.pre_match.to_sym
23
+ json = Hypermicrodata::Extract.new(render_to_string(formats: :html), @_render_based_html_options).to_json(json_format)
24
+ render(json: json)
25
+ else
26
+ render(*args)
27
+ end
28
+ end
29
+
30
+ def default_render(*args)
31
+ render_based_html(*args)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,24 @@
1
+ module Hypermicrodata
2
+ module Serializer
3
+ class Base
4
+ def initialize(document, location = nil, profile_path = nil)
5
+ @document = document
6
+ @location = location
7
+ @profile_path = profile_path
8
+ end
9
+
10
+ def to_json(options = {})
11
+ MultiJson.dump(serialize, options)
12
+ end
13
+
14
+ def serialize
15
+ # return hash or array suitable for application/json
16
+ if @document.items
17
+ @document.items.map(&:to_hash)
18
+ else
19
+ []
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,47 @@
1
+ module Hypermicrodata
2
+ module Serializer
3
+ class Hal < Base
4
+ def serialize
5
+ items = @document.items
6
+ if items.length == 1
7
+ hal_resource = item_to_resource(items.first, @location)
8
+ else
9
+ hal_resource = Halibut::Core::Resource.new(@location)
10
+ items.each do |item|
11
+ embedded_resource = item_to_resource(item)
12
+ item.type.each do |type|
13
+ hal_resource.add_embedded_resource(type, embedded_resource)
14
+ end
15
+ end
16
+ end
17
+ hal_resource.add_link('profile', @profile_path) if @profile_path
18
+ hal_resource.to_hash
19
+ end
20
+
21
+ private
22
+ def item_to_resource(item, self_url = nil)
23
+ resource = Halibut::Core::Resource.new(self_url)
24
+ item.properties.each do |name, same_name_properties|
25
+ same_name_properties.each do |property|
26
+ if property.item
27
+ subresource = item_to_resource(property.item)
28
+ resource.add_embedded_resource(name, subresource)
29
+ else
30
+ resource.set_property(name, property.value)
31
+ end
32
+ end
33
+ end
34
+ resource.add_link('self', item.id) if item.id
35
+ Array(item.type).each do |type|
36
+ resource.add_link('type', type)
37
+ end
38
+ item.links.each do |rel, same_rel_links|
39
+ same_rel_links.each do |link|
40
+ resource.add_link(rel, link.value)
41
+ end
42
+ end
43
+ resource
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,44 @@
1
+ module Hypermicrodata
2
+ module Serializer
3
+ class Jsonld < Base
4
+
5
+ end
6
+
7
+ # json-ld patch
8
+ # HTMLのリンクセマンティクスを保存できてないなぁ
9
+ # JSON-LDにするのやめるべきかも。いろいろめんどくさい。
10
+ module JsonldSerializer
11
+ def to_hash
12
+ hash = {}
13
+ hash[:id] = unwrap(id) if id
14
+ re_schema_org = %r|^http://schema\.org/|i
15
+ if type.all?{|t| t.match(re_schema_org) }
16
+ hash['@context'] = 'http://schema.org'
17
+ hash['@type'] = unwrap(type.map{|t| t.sub(re_schema_org, '') })
18
+ else
19
+ hash['@type'] = unwrap(type)
20
+ end
21
+ properties.each do |name, values|
22
+ final_values = values.map do |value|
23
+ if value.is_a?(Hypermicrodata::Item)
24
+ value.to_hash
25
+ else
26
+ value
27
+ end
28
+ end
29
+ hash[name] = unwrap(final_values)
30
+ end
31
+ hash
32
+ end
33
+
34
+ def unwrap(values)
35
+ if values.is_a?(Array) && values.length == 1
36
+ values.first
37
+ else
38
+ values
39
+ end
40
+ end
41
+ end
42
+ # Hypermicrodata::Item.send :prepend, JsonldSerializer
43
+ end
44
+ end
@@ -0,0 +1,100 @@
1
+ module Hypermicrodata
2
+ module Serializer
3
+ class Uber < Base
4
+ ACTION_MAPPINGS = {
5
+ 'GET' => 'read',
6
+ 'POST' => 'append',
7
+ 'PUT' => 'replace',
8
+ 'DELETE' => 'remove',
9
+ 'PATCH' => 'partial'
10
+ }.freeze
11
+
12
+ def serialize
13
+ items = @document.items
14
+ if items.length == 1
15
+ root_data = item_to_nested_data(items.first)
16
+ if @location
17
+ root_data.rel = 'self'
18
+ root_data.url = @location
19
+ end
20
+ root_data.add_link('profile', @profile_path) if @profile_path
21
+ uber = Uberous::Uber.new([root_data])
22
+ else
23
+ data_collection = @document.items.map do |item|
24
+ item_to_nested_data(item).tap do |data|
25
+ data.name = generate_short_name(item.type) if item.type
26
+ end
27
+ end
28
+ uber = Uberous::Uber.new(data_collection)
29
+ uber.add_link('self', @location) if @location
30
+ uber.add_link('profile', @profile_path) if @profile_path
31
+ end
32
+ uber.to_hash
33
+ end
34
+
35
+ private
36
+ def item_to_nested_data(item, self_name = nil)
37
+ parent_data = Uberous::Data.new
38
+ if item.id
39
+ parent_data.url = item.id
40
+ parent_data.rel = self_name if self_name # consider a link relation
41
+ parent_data.name = generate_short_name(item.type) if item.type
42
+ else
43
+ parent_data.name = self_name if self_name # consider a semantic descriptor
44
+ end
45
+ item.all_properties_and_links.each do |property|
46
+ rel = property.rels.join(' ') unless property.rels.empty?
47
+ if property.item?
48
+ # TODO: name複数の場合のduplicateをなくす
49
+ property.names.each do |name|
50
+ child_data = item_to_nested_data(property.item, name)
51
+ parent_data.add_data(child_data)
52
+ end
53
+ # itemかつlinkというのはたぶんない
54
+ elsif property.submit_button?
55
+ attrs = { rel: rel, url: property.action_url, model: property.query_string, action: action_name(property.method) }
56
+ attrs[:model] = "?#{attrs[:model]}" if %w(read remove).include?(attrs[:action])
57
+ attrs.reject! { |_, value| value.nil? }
58
+ if property.names.empty?
59
+ child_data = Uberous::Data.new(attrs)
60
+ parent_data.add_data(child_data)
61
+ else
62
+ property.names.each do |name|
63
+ child_data = Uberous::Data.new(attrs.merge(name: name))
64
+ parent_data.add_data(child_data)
65
+ end
66
+ end
67
+ elsif property.link?
68
+ attrs = { rel: rel, url: property.value }
69
+ attrs.reject! { |_, value| value.nil? }
70
+ if property.names.empty?
71
+ child_data = Uberous::Data.new(attrs)
72
+ parent_data.add_data(child_data)
73
+ else
74
+ property.names.each do |name|
75
+ child_data = Uberous::Data.new(attrs.merge(name: name))
76
+ parent_data.add_data(child_data)
77
+ end
78
+ end
79
+ else # only value
80
+ property.names.each do |name|
81
+ child_data = Uberous::Data.new(name: name, value: property.value)
82
+ parent_data.add_data(child_data)
83
+ end
84
+ end
85
+ end
86
+
87
+ parent_data
88
+ end
89
+
90
+ def generate_short_name(item_types)
91
+ # TODO: これでいいのか?
92
+ Array(item_types).first.sub(%r|^http://schema\.org/|, '') if item_types
93
+ end
94
+
95
+ def action_name(method_name)
96
+ ACTION_MAPPINGS[method_name.to_s.upcase] || method_name.to_s.downcase
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,105 @@
1
+ module Hypermicrodata
2
+ class SubmitButton < Property
3
+ attr_reader :form, :method
4
+
5
+ def initialize(button, form)
6
+ @button = button
7
+ @form = form.dup
8
+ @excluded_fields = {}
9
+ setup!
10
+ end
11
+
12
+ def value
13
+ "#{action_url}?#{query_string}"
14
+ end
15
+
16
+ def action_url
17
+ @form.action
18
+ end
19
+
20
+ def params
21
+ @form.build_query
22
+ end
23
+
24
+ def query_string
25
+ build_query_string(params)
26
+ end
27
+
28
+ def names
29
+ (@button.node['itemprop'] || '').split(' ')
30
+ end
31
+
32
+ def rels
33
+ rel = (@button.node['rel'] || @button.node['data-rel'] || @button.dom_class || '')
34
+ rel.split(' ')
35
+ end
36
+
37
+ def item
38
+ nil
39
+ end
40
+
41
+ def link?
42
+ true
43
+ end
44
+
45
+ def submit_button?
46
+ true
47
+ end
48
+
49
+ private
50
+ def setup!
51
+ if method_field = @form.fields.find { |f| f.name == '_method' }
52
+ # overload POST
53
+ @method = method_field.value.upcase
54
+ @excluded_fields['_method'] = method_field
55
+ else
56
+ @method = @form.method
57
+ end
58
+ @form.add_button_to_query(@button) # formをdupしているのでOK
59
+ end
60
+
61
+ def template_fields
62
+ @template_fields ||= begin
63
+ fields = @form.fields.reject {|field| field.is_a?(Mechanize::Form::Hidden) }
64
+ Hash[fields.map {|field| [field.name, field] }]
65
+ end
66
+ end
67
+
68
+ def build_query_string(parameters)
69
+ parameters.map do |name, value|
70
+ if field = template_fields[name]
71
+ [CGI.escape(name.to_s), "{#{field.type}}"].join("=")
72
+ elsif !@excluded_fields[name]
73
+ # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
74
+ [CGI.escape(name.to_s), CGI.escape(value.to_s)].join("=")
75
+ end
76
+ end.compact.join('&')
77
+ end
78
+ end
79
+
80
+ class FormParser
81
+ attr_reader :submit_buttons
82
+
83
+ def initialize(element, page_url = nil)
84
+ @element, @page_url = element, page_url
85
+ form = Mechanize::Form.new(element)
86
+ @submit_buttons = form.submits.map do |button|
87
+ SubmitButton.new(button, form)
88
+ end
89
+ end
90
+
91
+ def self.parse(element, page_url = nil)
92
+ self.new(element, page_url).submit_buttons
93
+ end
94
+ end
95
+ end
96
+
97
+ # Patch for bug
98
+ Mechanize::Form.class_eval do
99
+ # Returns all buttons of type Submit
100
+ def submits
101
+ @submits ||= buttons.select {|f|
102
+ f.class == Mechanize::Form::Submit || (f.class == Mechanize::Form::Button && (f.type.nil? || f.type == 'submit'))
103
+ }
104
+ end
105
+ end