hypermicrodata 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,114 @@
1
+ module Hypermicrodata
2
+ # Class that parses itemprop elements
3
+ class ItempropParser
4
+
5
+ NON_TEXTCONTENT_ELEMENTS = {
6
+ 'a' => 'href', 'area' => 'href',
7
+ 'audio' => 'src', 'embed' => 'src',
8
+ 'iframe' => 'src', 'img' => 'src',
9
+ 'link' => 'href', 'meta' => 'content',
10
+ 'object' => 'data', 'source' => 'src',
11
+ 'time' => 'datetime', 'track' => 'src',
12
+ 'video' => 'src'
13
+ }
14
+
15
+ LINK_ELEMENTS = ['a', 'area', 'audio', 'embed', 'iframe',
16
+ 'img', 'link', 'source', 'track', 'video']
17
+
18
+ URL_ATTRIBUTES = ['data', 'href', 'src']
19
+
20
+ # A Hash representing the properties.
21
+ # Hash is of the form {'property name' => 'value'}
22
+ attr_reader :property
23
+
24
+ # Create a new Itemprop object
25
+ # [element] The itemprop element to be parsed
26
+ # [page_url] The url of the page, including filename, used to form
27
+ # absolute urls
28
+ def initialize(element, page_url=nil)
29
+ @element, @page_url = element, page_url
30
+ if link?
31
+ @property = Link.new(extract_property, extract_property_names, extract_rel_names)
32
+ else
33
+ @property = Property.new(extract_property, extract_property_names, extract_rel_names)
34
+ end
35
+ end
36
+
37
+ # Parse the element and return a hash representing the properties.
38
+ # Hash is of the form {'property name' => 'value'}
39
+ # [element] The itemprop element to be parsed
40
+ # [page_url] The url of the page, including filename, used to form
41
+ # absolute urls
42
+ def self.parse(element, page_url=nil)
43
+ self.new(element, page_url).property
44
+ end
45
+
46
+ def link?
47
+ LINK_ELEMENTS.include?(@element.name)
48
+ end
49
+
50
+ private
51
+ def extract_properties
52
+ prop_names = extract_property_names
53
+ prop_names.each_with_object({}) do |name, memo|
54
+ memo[name] = extract_property
55
+ end
56
+ end
57
+
58
+ def extract_links
59
+ rel_names = extract_rel_names
60
+ rel_names.each_with_object({}) do |name, memo|
61
+ memo[name] = extract_property_value
62
+ end
63
+ end
64
+
65
+ # This returns an empty string if can't form a valid
66
+ # absolute url as per the Microdata spec.
67
+ def make_absolute_url(url)
68
+ return url unless URI.parse(url).relative?
69
+ begin
70
+ URI.parse(@page_url).merge(url).to_s
71
+ rescue URI::Error
72
+ url
73
+ end
74
+ end
75
+
76
+ def non_textcontent_element?(element)
77
+ NON_TEXTCONTENT_ELEMENTS.has_key?(element)
78
+ end
79
+
80
+ def url_attribute?(attribute)
81
+ URL_ATTRIBUTES.include?(attribute)
82
+ end
83
+
84
+ def extract_property_names
85
+ itemprop_attr = @element.attribute('itemprop')
86
+ itemprop_attr ? itemprop_attr.value.split : []
87
+ end
88
+
89
+ def extract_rel_names
90
+ link_rel = @element.attribute('rel')
91
+ link? && link_rel ? link_rel.value.split : []
92
+ end
93
+
94
+ def extract_property_value
95
+ element = @element.name
96
+ if non_textcontent_element?(element)
97
+ attribute = NON_TEXTCONTENT_ELEMENTS[element]
98
+ value = @element.attribute(attribute).value
99
+ url_attribute?(attribute) ? make_absolute_url(value) : value
100
+ else
101
+ @element.inner_text.strip
102
+ end
103
+ end
104
+
105
+ def extract_property
106
+ if @element.attribute('itemscope')
107
+ Item.new(@element, @page_url)
108
+ else
109
+ extract_property_value
110
+ end
111
+ end
112
+
113
+ end
114
+ end
@@ -0,0 +1,7 @@
1
+ module Hypermicrodata
2
+ class Link < Property
3
+ def link?
4
+ true
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,27 @@
1
+ module Hypermicrodata
2
+ class Property
3
+ attr_reader :value, :names, :rels
4
+
5
+ def initialize(value, names, rels = [])
6
+ @value = value
7
+ @names = names
8
+ @rels = rels
9
+ end
10
+
11
+ def item
12
+ @value if @value.is_a?(Item)
13
+ end
14
+
15
+ def item?
16
+ !!item
17
+ end
18
+
19
+ def link?
20
+ false
21
+ end
22
+
23
+ def submit_button?
24
+ false
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,35 @@
1
+ module Hypermicrodata
2
+ module Rails
3
+ module HtmlBasedJsonRenderer
4
+ def initialize
5
+ super
6
+ @_render_based_html_options = {}
7
+ end
8
+
9
+ def set_location(location)
10
+ location_url = url_for(location)
11
+ @_render_based_html_options[:location] = location_url
12
+ response.headers['Content-Location'] = location_url
13
+ end
14
+
15
+ def set_profile_path(path)
16
+ @_render_based_html_options[:profile_path] = view_context.path_to_asset(path)
17
+ end
18
+
19
+ def render_based_html(*args)
20
+ lookup_context.formats.first
21
+ if m = lookup_context.formats.first.to_s.match(/json$/)
22
+ json_format = m.pre_match.to_sym
23
+ json = Hypermicrodata::Extract.new(render_to_string(formats: :html), @_render_based_html_options).to_json(json_format)
24
+ render(json: json)
25
+ else
26
+ render(*args)
27
+ end
28
+ end
29
+
30
+ def default_render(*args)
31
+ render_based_html(*args)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,24 @@
1
+ module Hypermicrodata
2
+ module Serializer
3
+ class Base
4
+ def initialize(document, location = nil, profile_path = nil)
5
+ @document = document
6
+ @location = location
7
+ @profile_path = profile_path
8
+ end
9
+
10
+ def to_json(options = {})
11
+ MultiJson.dump(serialize, options)
12
+ end
13
+
14
+ def serialize
15
+ # return hash or array suitable for application/json
16
+ if @document.items
17
+ @document.items.map(&:to_hash)
18
+ else
19
+ []
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,47 @@
1
+ module Hypermicrodata
2
+ module Serializer
3
+ class Hal < Base
4
+ def serialize
5
+ items = @document.items
6
+ if items.length == 1
7
+ hal_resource = item_to_resource(items.first, @location)
8
+ else
9
+ hal_resource = Halibut::Core::Resource.new(@location)
10
+ items.each do |item|
11
+ embedded_resource = item_to_resource(item)
12
+ item.type.each do |type|
13
+ hal_resource.add_embedded_resource(type, embedded_resource)
14
+ end
15
+ end
16
+ end
17
+ hal_resource.add_link('profile', @profile_path) if @profile_path
18
+ hal_resource.to_hash
19
+ end
20
+
21
+ private
22
+ def item_to_resource(item, self_url = nil)
23
+ resource = Halibut::Core::Resource.new(self_url)
24
+ item.properties.each do |name, same_name_properties|
25
+ same_name_properties.each do |property|
26
+ if property.item
27
+ subresource = item_to_resource(property.item)
28
+ resource.add_embedded_resource(name, subresource)
29
+ else
30
+ resource.set_property(name, property.value)
31
+ end
32
+ end
33
+ end
34
+ resource.add_link('self', item.id) if item.id
35
+ Array(item.type).each do |type|
36
+ resource.add_link('type', type)
37
+ end
38
+ item.links.each do |rel, same_rel_links|
39
+ same_rel_links.each do |link|
40
+ resource.add_link(rel, link.value)
41
+ end
42
+ end
43
+ resource
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,44 @@
1
+ module Hypermicrodata
2
+ module Serializer
3
+ class Jsonld < Base
4
+
5
+ end
6
+
7
+ # json-ld patch
8
+ # HTMLのリンクセマンティクスを保存できてないなぁ
9
+ # JSON-LDにするのやめるべきかも。いろいろめんどくさい。
10
+ module JsonldSerializer
11
+ def to_hash
12
+ hash = {}
13
+ hash[:id] = unwrap(id) if id
14
+ re_schema_org = %r|^http://schema\.org/|i
15
+ if type.all?{|t| t.match(re_schema_org) }
16
+ hash['@context'] = 'http://schema.org'
17
+ hash['@type'] = unwrap(type.map{|t| t.sub(re_schema_org, '') })
18
+ else
19
+ hash['@type'] = unwrap(type)
20
+ end
21
+ properties.each do |name, values|
22
+ final_values = values.map do |value|
23
+ if value.is_a?(Hypermicrodata::Item)
24
+ value.to_hash
25
+ else
26
+ value
27
+ end
28
+ end
29
+ hash[name] = unwrap(final_values)
30
+ end
31
+ hash
32
+ end
33
+
34
+ def unwrap(values)
35
+ if values.is_a?(Array) && values.length == 1
36
+ values.first
37
+ else
38
+ values
39
+ end
40
+ end
41
+ end
42
+ # Hypermicrodata::Item.send :prepend, JsonldSerializer
43
+ end
44
+ end
@@ -0,0 +1,100 @@
1
+ module Hypermicrodata
2
+ module Serializer
3
+ class Uber < Base
4
+ ACTION_MAPPINGS = {
5
+ 'GET' => 'read',
6
+ 'POST' => 'append',
7
+ 'PUT' => 'replace',
8
+ 'DELETE' => 'remove',
9
+ 'PATCH' => 'partial'
10
+ }.freeze
11
+
12
+ def serialize
13
+ items = @document.items
14
+ if items.length == 1
15
+ root_data = item_to_nested_data(items.first)
16
+ if @location
17
+ root_data.rel = 'self'
18
+ root_data.url = @location
19
+ end
20
+ root_data.add_link('profile', @profile_path) if @profile_path
21
+ uber = Uberous::Uber.new([root_data])
22
+ else
23
+ data_collection = @document.items.map do |item|
24
+ item_to_nested_data(item).tap do |data|
25
+ data.name = generate_short_name(item.type) if item.type
26
+ end
27
+ end
28
+ uber = Uberous::Uber.new(data_collection)
29
+ uber.add_link('self', @location) if @location
30
+ uber.add_link('profile', @profile_path) if @profile_path
31
+ end
32
+ uber.to_hash
33
+ end
34
+
35
+ private
36
+ def item_to_nested_data(item, self_name = nil)
37
+ parent_data = Uberous::Data.new
38
+ if item.id
39
+ parent_data.url = item.id
40
+ parent_data.rel = self_name if self_name # consider a link relation
41
+ parent_data.name = generate_short_name(item.type) if item.type
42
+ else
43
+ parent_data.name = self_name if self_name # consider a semantic descriptor
44
+ end
45
+ item.all_properties_and_links.each do |property|
46
+ rel = property.rels.join(' ') unless property.rels.empty?
47
+ if property.item?
48
+ # TODO: name複数の場合のduplicateをなくす
49
+ property.names.each do |name|
50
+ child_data = item_to_nested_data(property.item, name)
51
+ parent_data.add_data(child_data)
52
+ end
53
+ # itemかつlinkというのはたぶんない
54
+ elsif property.submit_button?
55
+ attrs = { rel: rel, url: property.action_url, model: property.query_string, action: action_name(property.method) }
56
+ attrs[:model] = "?#{attrs[:model]}" if %w(read remove).include?(attrs[:action])
57
+ attrs.reject! { |_, value| value.nil? }
58
+ if property.names.empty?
59
+ child_data = Uberous::Data.new(attrs)
60
+ parent_data.add_data(child_data)
61
+ else
62
+ property.names.each do |name|
63
+ child_data = Uberous::Data.new(attrs.merge(name: name))
64
+ parent_data.add_data(child_data)
65
+ end
66
+ end
67
+ elsif property.link?
68
+ attrs = { rel: rel, url: property.value }
69
+ attrs.reject! { |_, value| value.nil? }
70
+ if property.names.empty?
71
+ child_data = Uberous::Data.new(attrs)
72
+ parent_data.add_data(child_data)
73
+ else
74
+ property.names.each do |name|
75
+ child_data = Uberous::Data.new(attrs.merge(name: name))
76
+ parent_data.add_data(child_data)
77
+ end
78
+ end
79
+ else # only value
80
+ property.names.each do |name|
81
+ child_data = Uberous::Data.new(name: name, value: property.value)
82
+ parent_data.add_data(child_data)
83
+ end
84
+ end
85
+ end
86
+
87
+ parent_data
88
+ end
89
+
90
+ def generate_short_name(item_types)
91
+ # TODO: これでいいのか?
92
+ Array(item_types).first.sub(%r|^http://schema\.org/|, '') if item_types
93
+ end
94
+
95
+ def action_name(method_name)
96
+ ACTION_MAPPINGS[method_name.to_s.upcase] || method_name.to_s.downcase
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,105 @@
1
+ module Hypermicrodata
2
+ class SubmitButton < Property
3
+ attr_reader :form, :method
4
+
5
+ def initialize(button, form)
6
+ @button = button
7
+ @form = form.dup
8
+ @excluded_fields = {}
9
+ setup!
10
+ end
11
+
12
+ def value
13
+ "#{action_url}?#{query_string}"
14
+ end
15
+
16
+ def action_url
17
+ @form.action
18
+ end
19
+
20
+ def params
21
+ @form.build_query
22
+ end
23
+
24
+ def query_string
25
+ build_query_string(params)
26
+ end
27
+
28
+ def names
29
+ (@button.node['itemprop'] || '').split(' ')
30
+ end
31
+
32
+ def rels
33
+ rel = (@button.node['rel'] || @button.node['data-rel'] || @button.dom_class || '')
34
+ rel.split(' ')
35
+ end
36
+
37
+ def item
38
+ nil
39
+ end
40
+
41
+ def link?
42
+ true
43
+ end
44
+
45
+ def submit_button?
46
+ true
47
+ end
48
+
49
+ private
50
+ def setup!
51
+ if method_field = @form.fields.find { |f| f.name == '_method' }
52
+ # overload POST
53
+ @method = method_field.value.upcase
54
+ @excluded_fields['_method'] = method_field
55
+ else
56
+ @method = @form.method
57
+ end
58
+ @form.add_button_to_query(@button) # formをdupしているのでOK
59
+ end
60
+
61
+ def template_fields
62
+ @template_fields ||= begin
63
+ fields = @form.fields.reject {|field| field.is_a?(Mechanize::Form::Hidden) }
64
+ Hash[fields.map {|field| [field.name, field] }]
65
+ end
66
+ end
67
+
68
+ def build_query_string(parameters)
69
+ parameters.map do |name, value|
70
+ if field = template_fields[name]
71
+ [CGI.escape(name.to_s), "{#{field.type}}"].join("=")
72
+ elsif !@excluded_fields[name]
73
+ # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
74
+ [CGI.escape(name.to_s), CGI.escape(value.to_s)].join("=")
75
+ end
76
+ end.compact.join('&')
77
+ end
78
+ end
79
+
80
+ class FormParser
81
+ attr_reader :submit_buttons
82
+
83
+ def initialize(element, page_url = nil)
84
+ @element, @page_url = element, page_url
85
+ form = Mechanize::Form.new(element)
86
+ @submit_buttons = form.submits.map do |button|
87
+ SubmitButton.new(button, form)
88
+ end
89
+ end
90
+
91
+ def self.parse(element, page_url = nil)
92
+ self.new(element, page_url).submit_buttons
93
+ end
94
+ end
95
+ end
96
+
97
+ # Patch for bug
98
+ Mechanize::Form.class_eval do
99
+ # Returns all buttons of type Submit
100
+ def submits
101
+ @submits ||= buttons.select {|f|
102
+ f.class == Mechanize::Form::Submit || (f.class == Mechanize::Form::Button && (f.type.nil? || f.type == 'submit'))
103
+ }
104
+ end
105
+ end