micromicro 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +14 -0
  3. data/.gitignore +34 -0
  4. data/.gitmodules +3 -0
  5. data/.reek.yml +8 -0
  6. data/.rspec +2 -0
  7. data/.rubocop +3 -0
  8. data/.rubocop.yml +25 -0
  9. data/.ruby-version +1 -0
  10. data/.simplecov +11 -0
  11. data/.travis.yml +19 -0
  12. data/CHANGELOG.md +5 -0
  13. data/CONTRIBUTING.md +37 -0
  14. data/Gemfile +14 -0
  15. data/LICENSE +21 -0
  16. data/README.md +122 -0
  17. data/Rakefile +18 -0
  18. data/lib/micro_micro/collections/base_collection.rb +37 -0
  19. data/lib/micro_micro/collections/items_collection.rb +10 -0
  20. data/lib/micro_micro/collections/properties_collection.rb +18 -0
  21. data/lib/micro_micro/collections/relations_collection.rb +23 -0
  22. data/lib/micro_micro/document.rb +71 -0
  23. data/lib/micro_micro/implied_property.rb +25 -0
  24. data/lib/micro_micro/item.rb +151 -0
  25. data/lib/micro_micro/parsers/base_property_parser.rb +33 -0
  26. data/lib/micro_micro/parsers/date_time_parser.rb +85 -0
  27. data/lib/micro_micro/parsers/date_time_property_parser.rb +65 -0
  28. data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +28 -0
  29. data/lib/micro_micro/parsers/implied_name_property_parser.rb +78 -0
  30. data/lib/micro_micro/parsers/implied_photo_property_parser.rb +69 -0
  31. data/lib/micro_micro/parsers/implied_url_property_parser.rb +61 -0
  32. data/lib/micro_micro/parsers/plain_text_property_parser.rb +39 -0
  33. data/lib/micro_micro/parsers/url_property_parser.rb +75 -0
  34. data/lib/micro_micro/parsers/value_class_pattern_parser.rb +92 -0
  35. data/lib/micro_micro/property.rb +116 -0
  36. data/lib/micro_micro/relation.rb +78 -0
  37. data/lib/micro_micro/version.rb +3 -0
  38. data/lib/micromicro.rb +39 -0
  39. data/micromicro.gemspec +28 -0
  40. metadata +128 -0
@@ -0,0 +1,78 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class ImpliedNamePropertyParser < BasePropertyParser
4
+ # @see microformats2 Parsing Specification section 1.3.5
5
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
+ HTML_ELEMENTS_MAP = {
7
+ 'area' => 'alt',
8
+ 'img' => 'alt',
9
+ 'abbr' => 'title'
10
+ }.freeze
11
+
12
+ # @return [String]
13
+ def value
14
+ @value ||= unresolved_value.strip
15
+ end
16
+
17
+ private
18
+
19
+ # @return [Array<String>]
20
+ def attribute_values
21
+ @attribute_values ||= begin
22
+ HTML_ELEMENTS_MAP.map do |element, attribute|
23
+ node[attribute] if node.matches?("#{element}[#{attribute}]")
24
+ end.compact
25
+ end
26
+ end
27
+
28
+ # @return [Nokogiri::XML::Element, nil]
29
+ def child_node
30
+ @child_node ||= node.at_css('> :only-child')
31
+ end
32
+
33
+ # @return [Array<String>]
34
+ def child_node_attribute_values
35
+ @child_node_attribute_values ||= begin
36
+ HTML_ELEMENTS_MAP.map do |element, attribute|
37
+ child_node[attribute] if child_node.matches?("#{element}[#{attribute}]")
38
+ end.compact
39
+ end
40
+ end
41
+
42
+ # @return [Nokogiri::XML::Element, nil]
43
+ def grandchild_node
44
+ @grandchild_node ||= child_node.at_css('> :only-child')
45
+ end
46
+
47
+ # @return [Array<String>]
48
+ def grandchild_node_attribute_values
49
+ @grandchild_node_attribute_values ||= begin
50
+ HTML_ELEMENTS_MAP.map do |element, attribute|
51
+ grandchild_node[attribute] if grandchild_node.matches?("#{element}[#{attribute}]")
52
+ end.compact
53
+ end
54
+ end
55
+
56
+ # @return [Boolean]
57
+ def parse_child_node?
58
+ child_node && !Item.item_node?(child_node)
59
+ end
60
+
61
+ # @return [Boolean]
62
+ def parse_grandchild_node?
63
+ parse_child_node? && grandchild_node && !Item.item_node?(grandchild_node)
64
+ end
65
+
66
+ # @return [String]
67
+ def unresolved_value
68
+ return attribute_values.first if attribute_values.any?
69
+ return child_node_attribute_values.first if parse_child_node? && child_node_attribute_values.any?
70
+ return grandchild_node_attribute_values.first if parse_grandchild_node? && grandchild_node_attribute_values.any?
71
+
72
+ serialized_node.css('img').each { |img| img.content = img['alt'] }
73
+
74
+ serialized_node.text
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,69 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class ImpliedPhotoPropertyParser < BasePropertyParser
4
+ # @see microformats2 Parsing Specification section 1.3.5
5
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
+ HTML_ELEMENTS_MAP = {
7
+ 'img' => 'src',
8
+ 'object' => 'data'
9
+ }.freeze
10
+
11
+ # @return [String, nil]
12
+ def value
13
+ @value ||= begin
14
+ return unless resolved_value
15
+ return resolved_value unless value_node.matches?('img[alt]')
16
+
17
+ {
18
+ value: resolved_value,
19
+ alt: value_node['alt'].strip
20
+ }
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ # @return [Array<String>]
27
+ def attribute_values
28
+ @attribute_values ||= begin
29
+ HTML_ELEMENTS_MAP.map do |element, attribute|
30
+ node if node.matches?("#{element}[#{attribute}]")
31
+ end.compact
32
+ end
33
+ end
34
+
35
+ # @return [String, nil]
36
+ def resolved_value
37
+ @resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip) if unresolved_value
38
+ end
39
+
40
+ # @return [String, nil]
41
+ def unresolved_value
42
+ @unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
43
+ end
44
+
45
+ # @return [Nokogiri::XML::Element, nil]
46
+ def value_node
47
+ @value_node ||= begin
48
+ return attribute_values.first if attribute_values.any?
49
+
50
+ HTML_ELEMENTS_MAP.each do |element, attribute|
51
+ child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
52
+
53
+ return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
54
+ end
55
+
56
+ if node.element_children.one? && !Item.item_node?(node.first_element_child)
57
+ HTML_ELEMENTS_MAP.each do |element, attribute|
58
+ child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
59
+
60
+ return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
61
+ end
62
+ end
63
+
64
+ nil
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,61 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class ImpliedUrlPropertyParser < BasePropertyParser
4
+ # @see microformats2 Parsing Specification section 1.3.5
5
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
6
+ HTML_ELEMENTS_MAP = {
7
+ 'a' => 'href',
8
+ 'area' => 'href'
9
+ }.freeze
10
+
11
+ # @return [String, nil]
12
+ def value
13
+ @value ||= resolved_value
14
+ end
15
+
16
+ private
17
+
18
+ # @return [Array<String>]
19
+ def attribute_values
20
+ @attribute_values ||= begin
21
+ HTML_ELEMENTS_MAP.map do |element, attribute|
22
+ node if node.matches?("#{element}[#{attribute}]")
23
+ end.compact
24
+ end
25
+ end
26
+
27
+ # @return [String, nil]
28
+ def resolved_value
29
+ @resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip) if unresolved_value
30
+ end
31
+
32
+ # @return [String, nil]
33
+ def unresolved_value
34
+ @unresolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
35
+ end
36
+
37
+ # @return [Nokogiri::XML::Element, nil]
38
+ def value_node
39
+ @value_node ||= begin
40
+ return attribute_values.first if attribute_values.any?
41
+
42
+ HTML_ELEMENTS_MAP.each do |element, attribute|
43
+ child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
44
+
45
+ return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
46
+ end
47
+
48
+ if node.element_children.one? && !Item.item_node?(node.first_element_child)
49
+ HTML_ELEMENTS_MAP.each do |element, attribute|
50
+ child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
51
+
52
+ return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
53
+ end
54
+ end
55
+
56
+ nil
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,39 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class PlainTextPropertyParser < BasePropertyParser
4
+ # @see microformats2 Parsing Specification section 1.3.1
5
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
6
+ HTML_ATTRIBUTES_MAP = {
7
+ 'title' => %w[abbr link],
8
+ 'value' => %w[data input],
9
+ 'alt' => %w[area img]
10
+ }.freeze
11
+
12
+ # @return [String]
13
+ def value
14
+ @value ||= begin
15
+ return value_class_pattern_parser.value if value_class_pattern_parser.value?
16
+ return attribute_values.first if attribute_values.any?
17
+
18
+ super
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ # @return [Array<String>]
25
+ def attribute_values
26
+ @attribute_values ||= begin
27
+ HTML_ATTRIBUTES_MAP.map do |attribute, names|
28
+ node[attribute] if names.include?(node.name) && node[attribute]
29
+ end.compact
30
+ end
31
+ end
32
+
33
+ # @return [MicroMicro::Parsers::ValueClassPatternParser]
34
+ def value_class_pattern_parser
35
+ @value_class_pattern_parser ||= ValueClassPatternParser.new(node)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,75 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class UrlPropertyParser < BasePropertyParser
4
+ # @see microformats2 Parsing Specification section 1.3.2
5
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
6
+ HTML_ATTRIBUTES_MAP = {
7
+ 'href' => %w[a area link],
8
+ 'src' => %w[audio iframe img source video],
9
+ 'poster' => %w[video],
10
+ 'data' => %w[object]
11
+ }.freeze
12
+
13
+ # @see microformats2 Parsing Specification section 1.3.2
14
+ # @see http://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
15
+ EXTENDED_HTML_ATTRIBUTES_MAP = {
16
+ 'title' => %w[abbr],
17
+ 'value' => %w[data input]
18
+ }.freeze
19
+
20
+ # @see microformats2 Parsing Specification section 1.5
21
+ # @see http://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
22
+ #
23
+ # @return [String, Hash{Symbol => String}]
24
+ def value
25
+ @value ||= begin
26
+ return resolved_value unless node.matches?('img[alt]')
27
+
28
+ {
29
+ value: resolved_value,
30
+ alt: node['alt'].strip
31
+ }
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def attribute_values
38
+ @attribute_values ||= begin
39
+ HTML_ATTRIBUTES_MAP.map do |attribute, names|
40
+ node[attribute] if names.include?(node.name) && node[attribute]
41
+ end.compact
42
+ end
43
+ end
44
+
45
+ def extended_attribute_values
46
+ @extended_attribute_values ||= begin
47
+ EXTENDED_HTML_ATTRIBUTES_MAP.map do |attribute, names|
48
+ node[attribute] if names.include?(node.name) && node[attribute]
49
+ end
50
+ end.compact
51
+ end
52
+
53
+ # @return [String]
54
+ def resolved_value
55
+ @resolved_value ||= Absolutely.to_abs(base: node.document.url, relative: unresolved_value.strip)
56
+ end
57
+
58
+ # @return [String]
59
+ def unresolved_value
60
+ @unresolved_value ||= begin
61
+ return attribute_values.first if attribute_values.any?
62
+ return value_class_pattern_parser.value if value_class_pattern_parser.value?
63
+ return extended_attribute_values.first if extended_attribute_values.any?
64
+
65
+ serialized_node.text
66
+ end
67
+ end
68
+
69
+ # @return [MicroMicro::Parsers::ValueClassPatternParser]
70
+ def value_class_pattern_parser
71
+ @value_class_pattern_parser ||= ValueClassPatternParser.new(node)
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,92 @@
1
+ module MicroMicro
2
+ module Parsers
3
+ class ValueClassPatternParser
4
+ # @see Value Class Pattern sections 3 and 4
5
+ # @see http://microformats.org/wiki/value-class-pattern#Basic_Parsing
6
+ # @see http://microformats.org/wiki/value-class-pattern#Date_and_time_values
7
+ HTML_ATTRIBUTES_MAP = {
8
+ 'alt' => %w[area img],
9
+ 'value' => %w[data],
10
+ 'title' => %w[abbr],
11
+ 'datetime' => %w[del ins time]
12
+ }.freeze
13
+
14
+ # @param context [Nokogiri::XML::Element]
15
+ # @param separator [String]
16
+ def initialize(node, separator = '')
17
+ @node = node
18
+ @separator = separator
19
+ end
20
+
21
+ # @return [String, nil]
22
+ def value
23
+ @value ||= values.join(separator).strip if values?
24
+ end
25
+
26
+ # @return [Boolean]
27
+ def value?
28
+ value.present?
29
+ end
30
+
31
+ # @return [Array<String>]
32
+ def values
33
+ @values ||= value_nodes.map { |value_node| self.class.value_from(value_node) }.select(&:present?)
34
+ end
35
+
36
+ # @return [Boolean]
37
+ def values?
38
+ values.any?
39
+ end
40
+
41
+ # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
42
+ # @param node_set [Nokogiri::XML::NodeSet]
43
+ # @return [Nokogiri::XML::NodeSet]
44
+ def self.nodes_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
45
+ context.each { |node| nodes_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
46
+
47
+ if context.is_a?(Nokogiri::XML::Element) && !Document.ignore_node?(context)
48
+ if value_class_node?(context) || value_title_node?(context)
49
+ node_set << context
50
+ else
51
+ nodes_from(context.element_children, node_set)
52
+ end
53
+ end
54
+
55
+ node_set
56
+ end
57
+
58
+ # @param node [Nokogiri::XML::Element]
59
+ # @return [Boolean]
60
+ def self.value_class_node?(node)
61
+ node.classes.include?('value')
62
+ end
63
+
64
+ # @param node [Nokogiri::XML::Element]
65
+ # @return [String, nil]
66
+ def self.value_from(node)
67
+ return node['title'] if value_title_node?(node)
68
+
69
+ HTML_ATTRIBUTES_MAP.each do |attribute, names|
70
+ return node[attribute] if names.include?(node.name) && node[attribute]
71
+ end
72
+
73
+ node.text
74
+ end
75
+
76
+ # @param node [Nokogiri::XML::Element]
77
+ # @return [Boolean]
78
+ def self.value_title_node?(node)
79
+ node.classes.include?('value-title')
80
+ end
81
+
82
+ private
83
+
84
+ attr_reader :node, :separator
85
+
86
+ # @return [Nokogiri::XML::NodeSet]
87
+ def value_nodes
88
+ @value_nodes ||= self.class.nodes_from(node)
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,116 @@
1
+ module MicroMicro
2
+ class Property
3
+ PROPERTY_PARSERS_MAP = {
4
+ 'dt' => Parsers::DateTimePropertyParser,
5
+ 'e' => Parsers::EmbeddedMarkupPropertyParser,
6
+ 'p' => Parsers::PlainTextPropertyParser,
7
+ 'u' => Parsers::UrlPropertyParser
8
+ }.freeze
9
+
10
+ attr_accessor :collection
11
+ attr_reader :name, :node, :prefix
12
+
13
+ # @param node [Nokogiri::XML::Element]
14
+ # @param name [String]
15
+ # @param prefix [String<dt, e, p, u>]
16
+ def initialize(node, name:, prefix:)
17
+ @node = node
18
+ @name = name
19
+ @prefix = prefix
20
+ end
21
+
22
+ # @return [Boolean]
23
+ def implied?
24
+ false
25
+ end
26
+
27
+ # @return [String]
28
+ def inspect
29
+ format(%(#<#{self.class.name}:%#0x name: #{name.inspect}, prefix: #{prefix.inspect}, value: #{value.inspect}>), object_id)
30
+ end
31
+
32
+ # @return [Boolean]
33
+ def item_node?
34
+ @item_node ||= Item.item_node?(node)
35
+ end
36
+
37
+ # @return [String, Hash, MicroMicro::Item]
38
+ def value
39
+ @value ||= begin
40
+ return parser.value unless item_node?
41
+
42
+ item.value = item_value
43
+
44
+ item
45
+ end
46
+ end
47
+
48
+ # @return [Boolean]
49
+ def value?
50
+ value.present?
51
+ end
52
+
53
+ # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
54
+ # @param node_set [Nokogiri::XML::NodeSet]
55
+ # @return [Nokogiri::XML::NodeSet]
56
+ def self.nodes_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
57
+ context.each { |node| nodes_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
58
+
59
+ if context.is_a?(Nokogiri::XML::Element) && !Document.ignore_node?(context)
60
+ node_set << context if property_node?(context)
61
+
62
+ nodes_from(context.element_children, node_set) unless Item.item_node?(context)
63
+ end
64
+
65
+ node_set
66
+ end
67
+
68
+ # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
69
+ # @return [Array<MicroMicro::Property>]
70
+ def self.properties_from(context)
71
+ nodes_from(context).map do |node|
72
+ types_from(node).map { |prefix, name| new(node, name: name, prefix: prefix) }
73
+ end.flatten
74
+ end
75
+
76
+ # @param node [Nokogiri::XML::Element]
77
+ # @return [Boolean]
78
+ def self.property_node?(node)
79
+ types_from(node).any?
80
+ end
81
+
82
+ # @param node [Nokogiri::XML::Element]
83
+ # @return [Array<Array(String, String)>]
84
+ #
85
+ # @example
86
+ # node = Nokogiri::HTML('<a href="https://sixtwothree.org" class="p-name u-url">Jason Garber</a>').at_css('a')
87
+ # MicroMicro::Property.types_from(node) #=> [['p', 'name'], ['u', 'url']]
88
+ def self.types_from(node)
89
+ node.classes.select { |token| token.match?(/^(?:dt|e|p|u)(?:\-[0-9a-z]+)?(?:\-[a-z]+)+$/) }.map { |token| token.split(/\-/, 2) }.uniq
90
+ end
91
+
92
+ private
93
+
94
+ # @return [MicroMicro::Item, nil]
95
+ def item
96
+ @item ||= Item.new(node) if item_node?
97
+ end
98
+
99
+ # @reutrn [String, nil]
100
+ def item_value
101
+ return unless item_node?
102
+
103
+ obj_by_prefix = case prefix
104
+ when 'e' then item
105
+ when 'p' then item.properties.find { |property| property.name == 'name' }
106
+ when 'u' then item.properties.find { |property| property.name == 'url' }
107
+ end
108
+
109
+ (obj_by_prefix || parser).value
110
+ end
111
+
112
+ def parser
113
+ @parser ||= PROPERTY_PARSERS_MAP[prefix].new(self)
114
+ end
115
+ end
116
+ end