micromicro 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +14 -0
  3. data/.gitignore +34 -0
  4. data/.gitmodules +3 -0
  5. data/.reek.yml +8 -0
  6. data/.rspec +2 -0
  7. data/.rubocop +3 -0
  8. data/.rubocop.yml +25 -0
  9. data/.ruby-version +1 -0
  10. data/.simplecov +11 -0
  11. data/.travis.yml +19 -0
  12. data/CHANGELOG.md +5 -0
  13. data/CONTRIBUTING.md +37 -0
  14. data/Gemfile +14 -0
  15. data/LICENSE +21 -0
  16. data/README.md +122 -0
  17. data/Rakefile +18 -0
  18. data/lib/micro_micro/collections/base_collection.rb +37 -0
  19. data/lib/micro_micro/collections/items_collection.rb +10 -0
  20. data/lib/micro_micro/collections/properties_collection.rb +18 -0
  21. data/lib/micro_micro/collections/relations_collection.rb +23 -0
  22. data/lib/micro_micro/document.rb +71 -0
  23. data/lib/micro_micro/implied_property.rb +25 -0
  24. data/lib/micro_micro/item.rb +151 -0
  25. data/lib/micro_micro/parsers/base_property_parser.rb +33 -0
  26. data/lib/micro_micro/parsers/date_time_parser.rb +85 -0
  27. data/lib/micro_micro/parsers/date_time_property_parser.rb +65 -0
  28. data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +28 -0
  29. data/lib/micro_micro/parsers/implied_name_property_parser.rb +78 -0
  30. data/lib/micro_micro/parsers/implied_photo_property_parser.rb +69 -0
  31. data/lib/micro_micro/parsers/implied_url_property_parser.rb +61 -0
  32. data/lib/micro_micro/parsers/plain_text_property_parser.rb +39 -0
  33. data/lib/micro_micro/parsers/url_property_parser.rb +75 -0
  34. data/lib/micro_micro/parsers/value_class_pattern_parser.rb +92 -0
  35. data/lib/micro_micro/property.rb +116 -0
  36. data/lib/micro_micro/relation.rb +78 -0
  37. data/lib/micro_micro/version.rb +3 -0
  38. data/lib/micromicro.rb +39 -0
  39. data/micromicro.gemspec +28 -0
  40. metadata +128 -0
@@ -0,0 +1,78 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class ImpliedNamePropertyParser < BasePropertyParser
4
+ # @see microformats2 Parsing Specification section 1.3.5
5
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
+ HTML_ELEMENTS_MAP = {
7
+ 'area' => 'alt',
8
+ 'img' => 'alt',
9
+ 'abbr' => 'title'
10
+ }.freeze
11
+
12
+ # @return [String]
13
+ def value
14
+ @value ||= unresolved_value.strip
15
+ end
16
+
17
+ private
18
+
19
+ # @return [Array<String>]
20
+ def attribute_values
21
+ @attribute_values ||= begin
22
+ HTML_ELEMENTS_MAP.map do |element, attribute|
23
+ node[attribute] if node.matches?("#{element}[#{attribute}]")
24
+ end.compact
25
+ end
26
+ end
27
+
28
+ # @return [Nokogiri::XML::Element, nil]
29
+ def child_node
30
+ @child_node ||= node.at_css('> :only-child')
31
+ end
32
+
33
+ # @return [Array<String>]
34
+ def child_node_attribute_values
35
+ @child_node_attribute_values ||= begin
36
+ HTML_ELEMENTS_MAP.map do |element, attribute|
37
+ child_node[attribute] if child_node.matches?("#{element}[#{attribute}]")
38
+ end.compact
39
+ end
40
+ end
41
+
42
+ # @return [Nokogiri::XML::Element, nil]
43
+ def grandchild_node
44
+ @grandchild_node ||= child_node.at_css('> :only-child')
45
+ end
46
+
47
+ # @return [Array<String>]
48
+ def grandchild_node_attribute_values
49
+ @grandchild_node_attribute_values ||= begin
50
+ HTML_ELEMENTS_MAP.map do |element, attribute|
51
+ grandchild_node[attribute] if grandchild_node.matches?("#{element}[#{attribute}]")
52
+ end.compact
53
+ end
54
+ end
55
+
56
+ # @return [Boolean]
57
+ def parse_child_node?
58
+ child_node && !Item.item_node?(child_node)
59
+ end
60
+
61
+ # @return [Boolean]
62
+ def parse_grandchild_node?
63
+ parse_child_node? && grandchild_node && !Item.item_node?(grandchild_node)
64
+ end
65
+
66
+ # @return [String]
67
+ def unresolved_value
68
+ return attribute_values.first if attribute_values.any?
69
+ return child_node_attribute_values.first if parse_child_node? && child_node_attribute_values.any?
70
+ return grandchild_node_attribute_values.first if parse_grandchild_node? && grandchild_node_attribute_values.any?
71
+
72
+ serialized_node.css('img').each { |img| img.content = img['alt'] }
73
+
74
+ serialized_node.text
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,69 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class ImpliedPhotoPropertyParser < BasePropertyParser
4
+ # @see microformats2 Parsing Specification section 1.3.5
5
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
+ HTML_ELEMENTS_MAP = {
7
+ 'img' => 'src',
8
+ 'object' => 'data'
9
+ }.freeze
10
+
11
+ # @return [String, nil]
12
+ def value
13
+ @value ||= begin
14
+ return unless resolved_value
15
+ return resolved_value unless value_node.matches?('img[alt]')
16
+
17
+ {
18
+ value: resolved_value,
19
+ alt: value_node['alt'].strip
20
+ }
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ # @return [Array<String>]
27
+ def attribute_values
28
+ @attribute_values ||= begin
29
+ HTML_ELEMENTS_MAP.map do |element, attribute|
30
+ node if node.matches?("#{element}[#{attribute}]")
31
+ end.compact
32
+ end
33
+ end
34
+
35
+ # @return [String, nil]
36
+ def resolved_value
37
+ @resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip) if unresolved_value
38
+ end
39
+
40
+ # @return [String, nil]
41
+ def unresolved_value
42
+ @unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
43
+ end
44
+
45
+ # @return [Nokogiri::XML::Element, nil]
46
+ def value_node
47
+ @value_node ||= begin
48
+ return attribute_values.first if attribute_values.any?
49
+
50
+ HTML_ELEMENTS_MAP.each do |element, attribute|
51
+ child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
52
+
53
+ return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
54
+ end
55
+
56
+ if node.element_children.one? && !Item.item_node?(node.first_element_child)
57
+ HTML_ELEMENTS_MAP.each do |element, attribute|
58
+ child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
59
+
60
+ return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
61
+ end
62
+ end
63
+
64
+ nil
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,61 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class ImpliedUrlPropertyParser < BasePropertyParser
4
+ # @see microformats2 Parsing Specification section 1.3.5
5
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
+ HTML_ELEMENTS_MAP = {
7
+ 'a' => 'href',
8
+ 'area' => 'href'
9
+ }.freeze
10
+
11
+ # @return [String, nil]
12
+ def value
13
+ @value ||= resolved_value
14
+ end
15
+
16
+ private
17
+
18
+ # @return [Array<String>]
19
+ def attribute_values
20
+ @attribute_values ||= begin
21
+ HTML_ELEMENTS_MAP.map do |element, attribute|
22
+ node if node.matches?("#{element}[#{attribute}]")
23
+ end.compact
24
+ end
25
+ end
26
+
27
+ # @return [String, nil]
28
+ def resolved_value
29
+ @resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip) if unresolved_value
30
+ end
31
+
32
+ # @return [String, nil]
33
+ def unresolved_value
34
+ @unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
35
+ end
36
+
37
+ # @return [Nokogiri::XML::Element, nil]
38
+ def value_node
39
+ @value_node ||= begin
40
+ return attribute_values.first if attribute_values.any?
41
+
42
+ HTML_ELEMENTS_MAP.each do |element, attribute|
43
+ child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
44
+
45
+ return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
46
+ end
47
+
48
+ if node.element_children.one? && !Item.item_node?(node.first_element_child)
49
+ HTML_ELEMENTS_MAP.each do |element, attribute|
50
+ child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
51
+
52
+ return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
53
+ end
54
+ end
55
+
56
+ nil
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,39 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class PlainTextPropertyParser < BasePropertyParser
4
+ # @see microformats2 Parsing Specification section 1.3.1
5
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
6
+ HTML_ATTRIBUTES_MAP = {
7
+ 'title' => %w[abbr link],
8
+ 'value' => %w[data input],
9
+ 'alt' => %w[area img]
10
+ }.freeze
11
+
12
+ # @return [String]
13
+ def value
14
+ @value ||= begin
15
+ return value_class_pattern_parser.value if value_class_pattern_parser.value?
16
+ return attribute_values.first if attribute_values.any?
17
+
18
+ super
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ # @return [Array<String>]
25
+ def attribute_values
26
+ @attribute_values ||= begin
27
+ HTML_ATTRIBUTES_MAP.map do |attribute, names|
28
+ node[attribute] if names.include?(node.name) && node[attribute]
29
+ end.compact
30
+ end
31
+ end
32
+
33
+ # @return [MicroMicro::Parsers::ValueClassPatternParser]
34
+ def value_class_pattern_parser
35
+ @value_class_pattern_parser ||= ValueClassPatternParser.new(node)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,75 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class UrlPropertyParser < BasePropertyParser
4
+ # @see microformats2 Parsing Specification section 1.3.2
5
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
6
+ HTML_ATTRIBUTES_MAP = {
7
+ 'href' => %w[a area link],
8
+ 'src' => %w[audio iframe img source video],
9
+ 'poster' => %w[video],
10
+ 'data' => %w[object]
11
+ }.freeze
12
+
13
+ # @see microformats2 Parsing Specification section 1.3.2
14
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
15
+ EXTENDED_HTML_ATTRIBUTES_MAP = {
16
+ 'title' => %w[abbr],
17
+ 'value' => %w[data input]
18
+ }.freeze
19
+
20
+ # @see microformats2 Parsing Specification section 1.5
21
+ # @see http://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
22
+ #
23
+ # @return [String, Hash{Symbol => String}]
24
+ def value
25
+ @value ||= begin
26
+ return resolved_value unless node.matches?('img[alt]')
27
+
28
+ {
29
+ value: resolved_value,
30
+ alt: node['alt'].strip
31
+ }
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def attribute_values
38
+ @attribute_values ||= begin
39
+ HTML_ATTRIBUTES_MAP.map do |attribute, names|
40
+ node[attribute] if names.include?(node.name) && node[attribute]
41
+ end.compact
42
+ end
43
+ end
44
+
45
+ def extended_attribute_values
46
+ @extended_attribute_values ||= begin
47
+ EXTENDED_HTML_ATTRIBUTES_MAP.map do |attribute, names|
48
+ node[attribute] if names.include?(node.name) && node[attribute]
49
+ end
50
+ end.compact
51
+ end
52
+
53
+ # @return [String]
54
+ def resolved_value
55
+ @resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip)
56
+ end
57
+
58
+ # @return [String]
59
+ def unresolved_value
60
+ @unresolved_value ||= begin
61
+ return attribute_values.first if attribute_values.any?
62
+ return value_class_pattern_parser.value if value_class_pattern_parser.value?
63
+ return extended_attribute_values.first if extended_attribute_values.any?
64
+
65
+ serialized_node.text
66
+ end
67
+ end
68
+
69
+ # @return [MicroMicro::Parsers::ValueClassPatternParser]
70
+ def value_class_pattern_parser
71
+ @value_class_pattern_parser ||= ValueClassPatternParser.new(node)
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,92 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class ValueClassPatternParser
4
+ # @see Value Class Pattern sections 3 and 4
5
+ # @see http://microformats.org/wiki/value-class-pattern#Basic_Parsing
6
+ # @see http://microformats.org/wiki/value-class-pattern#Date_and_time_values
7
+ HTML_ATTRIBUTES_MAP = {
8
+ 'alt' => %w[area img],
9
+ 'value' => %w[data],
10
+ 'title' => %w[abbr],
11
+ 'datetime' => %w[del ins time]
12
+ }.freeze
13
+
14
+ # @param context [Nokogiri::XML::Element]
15
+ # @param separator [String]
16
+ def initialize(node, separator = '')
17
+ @node = node
18
+ @separator = separator
19
+ end
20
+
21
+ # @return [String, nil]
22
+ def value
23
+ @value ||= values.join(separator).strip if values?
24
+ end
25
+
26
+ # @return [Boolean]
27
+ def value?
28
+ value.present?
29
+ end
30
+
31
+ # @return [Array<String>]
32
+ def values
33
+ @values ||= value_nodes.map { |value_node| self.class.value_from(value_node) }.select(&:present?)
34
+ end
35
+
36
+ # @return [Boolean]
37
+ def values?
38
+ values.any?
39
+ end
40
+
41
+ # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
42
+ # @param node_set [Nokogiri::XML::NodeSet]
43
+ # @return [Nokogiri::XML::NodeSet]
44
+ def self.nodes_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
45
+ context.each { |node| nodes_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
46
+
47
+ if context.is_a?(Nokogiri::XML::Element) && !Document.ignore_node?(context)
48
+ if value_class_node?(context) || value_title_node?(context)
49
+ node_set << context
50
+ else
51
+ nodes_from(context.element_children, node_set)
52
+ end
53
+ end
54
+
55
+ node_set
56
+ end
57
+
58
+ # @param node [Nokogiri::XML::Element]
59
+ # @return [Boolean]
60
+ def self.value_class_node?(node)
61
+ node.classes.include?('value')
62
+ end
63
+
64
+ # @param node [Nokogiri::XML::Element]
65
+ # @return [String, nil]
66
+ def self.value_from(node)
67
+ return node['title'] if value_title_node?(node)
68
+
69
+ HTML_ATTRIBUTES_MAP.each do |attribute, names|
70
+ return node[attribute] if names.include?(node.name) && node[attribute]
71
+ end
72
+
73
+ node.text
74
+ end
75
+
76
+ # @param node [Nokogiri::XML::Element]
77
+ # @return [Boolean]
78
+ def self.value_title_node?(node)
79
+ node.classes.include?('value-title')
80
+ end
81
+
82
+ private
83
+
84
+ attr_reader :node, :separator
85
+
86
+ # @return [Nokogiri::XML::NodeSet]
87
+ def value_nodes
88
+ @value_nodes ||= self.class.nodes_from(node)
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,116 @@
1
+ module MicroMicro
2
+ class Property
3
+ PROPERTY_PARSERS_MAP = {
4
+ 'dt' => Parsers::DateTimePropertyParser,
5
+ 'e' => Parsers::EmbeddedMarkupPropertyParser,
6
+ 'p' => Parsers::PlainTextPropertyParser,
7
+ 'u' => Parsers::UrlPropertyParser
8
+ }.freeze
9
+
10
+ attr_accessor :collection
11
+ attr_reader :name, :node, :prefix
12
+
13
+ # @param node [Nokogiri::XML::Element]
14
+ # @param name [String]
15
+ # @param prefix [String<dt, e, p, u>]
16
+ def initialize(node, name:, prefix:)
17
+ @node = node
18
+ @name = name
19
+ @prefix = prefix
20
+ end
21
+
22
+ # @return [Boolean]
23
+ def implied?
24
+ false
25
+ end
26
+
27
+ # @return [String]
28
+ def inspect
29
+ format(%(#<#{self.class.name}:%#0x name: #{name.inspect}, prefix: #{prefix.inspect}, value: #{value.inspect}>), object_id)
30
+ end
31
+
32
+ # @return [Boolean]
33
+ def item_node?
34
+ @item_node ||= Item.item_node?(node)
35
+ end
36
+
37
+ # @return [String, Hash, MicroMicro::Item]
38
+ def value
39
+ @value ||= begin
40
+ return parser.value unless item_node?
41
+
42
+ item.value = item_value
43
+
44
+ item
45
+ end
46
+ end
47
+
48
+ # @return [Boolean]
49
+ def value?
50
+ value.present?
51
+ end
52
+
53
+ # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
54
+ # @param node_set [Nokogiri::XML::NodeSet]
55
+ # @return [Nokogiri::XML::NodeSet]
56
+ def self.nodes_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
57
+ context.each { |node| nodes_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
58
+
59
+ if context.is_a?(Nokogiri::XML::Element) && !Document.ignore_node?(context)
60
+ node_set << context if property_node?(context)
61
+
62
+ nodes_from(context.element_children, node_set) unless Item.item_node?(context)
63
+ end
64
+
65
+ node_set
66
+ end
67
+
68
+ # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
69
+ # @return [Array<MicroMicro::Property>]
70
+ def self.properties_from(context)
71
+ nodes_from(context).map do |node|
72
+ types_from(node).map { |prefix, name| new(node, name: name, prefix: prefix) }
73
+ end.flatten
74
+ end
75
+
76
+ # @param node [Nokogiri::XML::Element]
77
+ # @return [Boolean]
78
+ def self.property_node?(node)
79
+ types_from(node).any?
80
+ end
81
+
82
+ # @param node [Nokogiri::XML::Element]
83
+ # @return [Array<Array(String, String)>]
84
+ #
85
+ # @example
86
+ # node = Nokogiri::HTML('<a href="https://sixtwothree.org" class="p-name u-url">Jason Garber</a>').at_css('a')
87
+ # MicroMicro::Property.types_from(node) #=> [['p', 'name'], ['u', 'url']]
88
+ def self.types_from(node)
89
+ node.classes.select { |token| token.match?(/^(?:dt|e|p|u)(?:\-[0-9a-z]+)?(?:\-[a-z]+)+$/) }.map { |token| token.split(/\-/, 2) }.uniq
90
+ end
91
+
92
+ private
93
+
94
+ # @return [MicroMicro::Item, nil]
95
+ def item
96
+ @item ||= Item.new(node) if item_node?
97
+ end
98
+
99
+ # @reutrn [String, nil]
100
+ def item_value
101
+ return unless item_node?
102
+
103
+ obj_by_prefix = case prefix
104
+ when 'e' then item
105
+ when 'p' then item.properties.find { |property| property.name == 'name' }
106
+ when 'u' then item.properties.find { |property| property.name == 'url' }
107
+ end
108
+
109
+ (obj_by_prefix || parser).value
110
+ end
111
+
112
+ def parser
113
+ @parser ||= PROPERTY_PARSERS_MAP[prefix].new(self)
114
+ end
115
+ end
116
+ end