micromicro 1.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -1
- data/CONTRIBUTING.md +3 -3
- data/README.md +9 -9
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +7 -1
- data/lib/micro_micro/collections/items_collection.rb +3 -1
- data/lib/micro_micro/collections/properties_collection.rb +12 -0
- data/lib/micro_micro/collections/relationships_collection.rb +10 -9
- data/lib/micro_micro/document.rb +10 -98
- data/lib/micro_micro/helpers.rb +82 -0
- data/lib/micro_micro/implied_property.rb +2 -0
- data/lib/micro_micro/item.rb +53 -60
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +4 -12
- data/lib/micro_micro/parsers/date_time_parser.rb +60 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +7 -6
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +3 -2
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +14 -16
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +19 -43
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +11 -30
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +3 -1
- data/lib/micro_micro/parsers/url_property_parser.rb +20 -12
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +27 -42
- data/lib/micro_micro/property.rb +68 -56
- data/lib/micro_micro/relationship.rb +15 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +31 -26
- data/micromicro.gemspec +11 -6
- metadata +22 -19
| @@ -0,0 +1,29 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module MicroMicro
         | 
| 4 | 
            +
              module Parsers
         | 
| 5 | 
            +
                class BaseImpliedPropertyParser < BasePropertyParser
         | 
| 6 | 
            +
                  private
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                  # @return [String, nil]
         | 
| 9 | 
            +
                  def attribute_value
         | 
| 10 | 
            +
                    candidate_node[self.class::HTML_ELEMENTS_MAP[candidate_node.name]] if candidate_node
         | 
| 11 | 
            +
                  end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                  # @return [Nokogiri::XML::Element, nil]
         | 
| 14 | 
            +
                  def candidate_node
         | 
| 15 | 
            +
                    @candidate_node ||=
         | 
| 16 | 
            +
                      candidate_nodes.find do |node|
         | 
| 17 | 
            +
                        self.class::HTML_ELEMENTS_MAP.filter_map do |name, attribute|
         | 
| 18 | 
            +
                          node if name == node.name && node[attribute]
         | 
| 19 | 
            +
                        end.any?
         | 
| 20 | 
            +
                      end
         | 
| 21 | 
            +
                  end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                  # @return [Nokogiri::XML::NodeSet]
         | 
| 24 | 
            +
                  def candidate_nodes
         | 
| 25 | 
            +
                    Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
         | 
| 26 | 
            +
                  end
         | 
| 27 | 
            +
                end
         | 
| 28 | 
            +
              end
         | 
| 29 | 
            +
            end
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class BasePropertyParser
         | 
| @@ -12,20 +14,10 @@ module MicroMicro | |
| 12 14 | 
             
                  #
         | 
| 13 15 | 
             
                  # @return [String]
         | 
| 14 16 | 
             
                  def value
         | 
| 15 | 
            -
                    @value ||= | 
| 16 | 
            -
                       | 
| 17 | 
            +
                    @value ||=
         | 
| 18 | 
            +
                      Helpers.text_content_from(node) do |context|
         | 
| 17 19 | 
             
                        context.css('img').each { |img| img.content = " #{img['alt'] || img['src']} " }
         | 
| 18 20 | 
             
                      end
         | 
| 19 | 
            -
                    end
         | 
| 20 | 
            -
                  end
         | 
| 21 | 
            -
             | 
| 22 | 
            -
                  # @param node [Nokogiri::XML::Element]
         | 
| 23 | 
            -
                  # @param attributes_map [Hash{String => Array}]
         | 
| 24 | 
            -
                  # @return [Array]
         | 
| 25 | 
            -
                  def self.attribute_value_from(node, attributes_map)
         | 
| 26 | 
            -
                    attributes_map.map do |attribute, names|
         | 
| 27 | 
            -
                      node[attribute] if names.include?(node.name) && node[attribute]
         | 
| 28 | 
            -
                    end.compact.first
         | 
| 29 21 | 
             
                  end
         | 
| 30 22 |  | 
| 31 23 | 
             
                  private
         | 
| @@ -1,78 +1,113 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class DateTimeParser
         | 
| 4 | 
            -
                  # @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
         | 
| 5 | 
            -
                  #
         | 
| 6 6 | 
             
                  # Regexp pattern matching YYYY-MM-DD and YYY-DDD
         | 
| 7 | 
            -
                  DATE_REGEXP_PATTERN = '(?<year>\d{4})- | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
                  # Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
         | 
| 11 | 
            -
                  TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'.freeze
         | 
| 7 | 
            +
                  DATE_REGEXP_PATTERN = '(?<year>\d{4})-' \
         | 
| 8 | 
            +
                                        '((?<ordinal>3[0-6]{2}|[0-2]\d{2})|(?<month>0\d|1[0-2])-' \
         | 
| 9 | 
            +
                                        '(?<day>3[0-1]|[0-2]\d))'
         | 
| 12 10 |  | 
| 13 | 
            -
                   | 
| 11 | 
            +
                  # Regexp pattern matching HH:MM and HH:MM:SS
         | 
| 12 | 
            +
                  TIME_REGEXP_PATTERN = '(?<hours>2[0-3]|[0-1]?\d)' \
         | 
| 13 | 
            +
                                        '(?::(?<minutes>[0-5]\d))?' \
         | 
| 14 | 
            +
                                        '(?::(?<seconds>[0-5]\d))?' \
         | 
| 15 | 
            +
                                        '(?:\s*?(?<abbreviation>[apPP]\.?[mM]\.?))?'
         | 
| 14 16 |  | 
| 15 | 
            -
                  #  | 
| 17 | 
            +
                  # Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
         | 
| 18 | 
            +
                  TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  # Regexp for extracting named captures from a datetime-esque String.
         | 
| 21 | 
            +
                  DATE_TIME_TIMEZONE_REGEXP = /
         | 
| 22 | 
            +
                    \A
         | 
| 23 | 
            +
                    (?=.)
         | 
| 24 | 
            +
                    (?:#{DATE_REGEXP_PATTERN})?
         | 
| 25 | 
            +
                    (?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?
         | 
| 26 | 
            +
                    \z
         | 
| 27 | 
            +
                  /x.freeze
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  # Parse a string for date and/or time values according to the Microformats
         | 
| 30 | 
            +
                  # Value Class Pattern date and time parsing specification.
         | 
| 31 | 
            +
                  #
         | 
| 32 | 
            +
                  # @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
         | 
| 33 | 
            +
                  #
         | 
| 34 | 
            +
                  # @param string [String, #to_s]
         | 
| 16 35 | 
             
                  def initialize(string)
         | 
| 17 | 
            -
                    @string = string
         | 
| 36 | 
            +
                    @string = string.to_s
         | 
| 18 37 | 
             
                  end
         | 
| 19 38 |  | 
| 20 | 
            -
                   | 
| 39 | 
            +
                  # Define getter and predicate methods for all possible named captures
         | 
| 40 | 
            +
                  # returned by the DATE_TIME_TIMEZONE_REGEXP regular expression.
         | 
| 41 | 
            +
                  [
         | 
| 42 | 
            +
                    :year, :ordinal, :month, :day,
         | 
| 43 | 
            +
                    :hours, :minutes, :seconds,
         | 
| 44 | 
            +
                    :abbreviation, :zulu, :offset
         | 
| 45 | 
            +
                  ].each do |name|
         | 
| 21 46 | 
             
                    define_method(name) { values[name] }
         | 
| 22 47 | 
             
                    define_method("#{name}?") { public_send(name).present? }
         | 
| 23 48 | 
             
                  end
         | 
| 24 49 |  | 
| 50 | 
            +
                  # @return [String, nil]
         | 
| 25 51 | 
             
                  def normalized_calendar_date
         | 
| 26 52 | 
             
                    @normalized_calendar_date ||= "#{year}-#{month}-#{day}" if year? && month? && day?
         | 
| 27 53 | 
             
                  end
         | 
| 28 54 |  | 
| 55 | 
            +
                  # @return [String, nil]
         | 
| 29 56 | 
             
                  def normalized_date
         | 
| 30 57 | 
             
                    @normalized_date ||= normalized_calendar_date || normalized_ordinal_date
         | 
| 31 58 | 
             
                  end
         | 
| 32 59 |  | 
| 60 | 
            +
                  # @return [String, nil]
         | 
| 33 61 | 
             
                  def normalized_hours
         | 
| 34 | 
            -
                    @normalized_hours ||= | 
| 35 | 
            -
                       | 
| 36 | 
            -
             | 
| 62 | 
            +
                    @normalized_hours ||=
         | 
| 63 | 
            +
                      if hours?
         | 
| 64 | 
            +
                        return (hours.to_i + 12).to_s if abbreviation&.tr('.', '')&.downcase == 'pm'
         | 
| 37 65 |  | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 66 | 
            +
                        format('%<hours>02d', hours: hours)
         | 
| 67 | 
            +
                      end
         | 
| 40 68 | 
             
                  end
         | 
| 41 69 |  | 
| 70 | 
            +
                  # @return [String]
         | 
| 42 71 | 
             
                  def normalized_minutes
         | 
| 43 72 | 
             
                    @normalized_minutes ||= minutes || '00'
         | 
| 44 73 | 
             
                  end
         | 
| 45 74 |  | 
| 75 | 
            +
                  # @return [String, nil]
         | 
| 46 76 | 
             
                  def normalized_ordinal_date
         | 
| 47 77 | 
             
                    @normalized_ordinal_date ||= "#{year}-#{ordinal}" if year? && ordinal?
         | 
| 48 78 | 
             
                  end
         | 
| 49 79 |  | 
| 80 | 
            +
                  # @return [String, nil]
         | 
| 50 81 | 
             
                  def normalized_time
         | 
| 51 82 | 
             
                    @normalized_time ||= [normalized_hours, normalized_minutes, seconds].compact.join(':') if normalized_hours
         | 
| 52 83 | 
             
                  end
         | 
| 53 84 |  | 
| 85 | 
            +
                  # @return [String, nil]
         | 
| 54 86 | 
             
                  def normalized_timezone
         | 
| 55 87 | 
             
                    @normalized_timezone ||= zulu || offset&.tr(':', '')
         | 
| 56 88 | 
             
                  end
         | 
| 57 89 |  | 
| 58 | 
            -
                  # @return [String]
         | 
| 90 | 
            +
                  # @return [String, nil]
         | 
| 59 91 | 
             
                  def value
         | 
| 60 | 
            -
                    @value ||= | 
| 92 | 
            +
                    @value ||=
         | 
| 93 | 
            +
                      if normalized_date || normalized_time || normalized_timezone
         | 
| 94 | 
            +
                        "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
         | 
| 95 | 
            +
                      end
         | 
| 61 96 | 
             
                  end
         | 
| 62 97 |  | 
| 63 98 | 
             
                  # @return [Hash{Symbol => String, nil}]
         | 
| 64 99 | 
             
                  def values
         | 
| 65 | 
            -
                    @values ||= | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 71 | 
            -
                    string&.match(/^(?:#{DATE_REGEXP_PATTERN})?(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?$/)&.named_captures.to_h.symbolize_keys
         | 
| 100 | 
            +
                    @values ||=
         | 
| 101 | 
            +
                      if string.match?(DATE_TIME_TIMEZONE_REGEXP)
         | 
| 102 | 
            +
                        string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.symbolize_keys
         | 
| 103 | 
            +
                      else
         | 
| 104 | 
            +
                        {}
         | 
| 105 | 
            +
                      end
         | 
| 72 106 | 
             
                  end
         | 
| 73 107 |  | 
| 74 108 | 
             
                  private
         | 
| 75 109 |  | 
| 110 | 
            +
                  # @return [String]
         | 
| 76 111 | 
             
                  attr_reader :string
         | 
| 77 112 | 
             
                end
         | 
| 78 113 | 
             
              end
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class DateTimePropertyParser < BasePropertyParser
         | 
| @@ -20,16 +22,15 @@ module MicroMicro | |
| 20 22 | 
             
                  #
         | 
| 21 23 | 
             
                  # @return [MicroMicro::Parsers::DateTimeParser, nil]
         | 
| 22 24 | 
             
                  def adopted_date_time_parser
         | 
| 23 | 
            -
                    @adopted_date_time_parser ||= | 
| 24 | 
            -
                       | 
| 25 | 
            -
             | 
| 26 | 
            -
                       | 
| 27 | 
            -
                    end
         | 
| 25 | 
            +
                    @adopted_date_time_parser ||=
         | 
| 26 | 
            +
                      (property.prev_all.reverse + property.next_all).filter_map do |prop|
         | 
| 27 | 
            +
                        DateTimeParser.new(prop.value) if prop.date_time_property?
         | 
| 28 | 
            +
                      end.find(&:normalized_date)
         | 
| 28 29 | 
             
                  end
         | 
| 29 30 |  | 
| 30 31 | 
             
                  # @return [String, nil]
         | 
| 31 32 | 
             
                  def attribute_value
         | 
| 32 | 
            -
                     | 
| 33 | 
            +
                    Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
         | 
| 33 34 | 
             
                  end
         | 
| 34 35 |  | 
| 35 36 | 
             
                  # @return [MicroMicro::Parsers::DateTimeParser]
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class EmbeddedMarkupPropertyParser < BasePropertyParser
         | 
| @@ -5,12 +7,11 @@ module MicroMicro | |
| 5 7 | 
             
                  #
         | 
| 6 8 | 
             
                  # @return [Hash{Symbol => String}]
         | 
| 7 9 | 
             
                  def value
         | 
| 8 | 
            -
                    @value ||= | 
| 10 | 
            +
                    @value ||=
         | 
| 9 11 | 
             
                      {
         | 
| 10 12 | 
             
                        html: node.inner_html.strip,
         | 
| 11 13 | 
             
                        value: super
         | 
| 12 14 | 
             
                      }
         | 
| 13 | 
            -
                    end
         | 
| 14 15 | 
             
                  end
         | 
| 15 16 | 
             
                end
         | 
| 16 17 | 
             
              end
         | 
| @@ -1,9 +1,12 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 | 
            -
                class ImpliedNamePropertyParser <  | 
| 4 | 
            -
                   | 
| 5 | 
            -
                    ' | 
| 6 | 
            -
                    ' | 
| 5 | 
            +
                class ImpliedNamePropertyParser < BaseImpliedPropertyParser
         | 
| 6 | 
            +
                  HTML_ELEMENTS_MAP = {
         | 
| 7 | 
            +
                    'img'  => 'alt',
         | 
| 8 | 
            +
                    'area' => 'alt',
         | 
| 9 | 
            +
                    'abbr' => 'title'
         | 
| 7 10 | 
             
                  }.freeze
         | 
| 8 11 |  | 
| 9 12 | 
             
                  # @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
         | 
| @@ -15,24 +18,19 @@ module MicroMicro | |
| 15 18 |  | 
| 16 19 | 
             
                  private
         | 
| 17 20 |  | 
| 18 | 
            -
                  # @return [Nokogiri::XML::NodeSet]
         | 
| 19 | 
            -
                  def candidate_nodes
         | 
| 20 | 
            -
                    @candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
         | 
| 21 | 
            -
                  end
         | 
| 22 | 
            -
             | 
| 23 21 | 
             
                  # @return [Array]
         | 
| 24 22 | 
             
                  def child_nodes
         | 
| 25 | 
            -
                    [ | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
                  def attribute_value
         | 
| 30 | 
            -
                    candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
         | 
| 23 | 
            +
                    [
         | 
| 24 | 
            +
                      node.at_css('> :only-child'),
         | 
| 25 | 
            +
                      node.at_css('> :only-child > :only-child')
         | 
| 26 | 
            +
                    ].compact.reject { |child_node| Helpers.item_node?(child_node) }
         | 
| 31 27 | 
             
                  end
         | 
| 32 28 |  | 
| 33 29 | 
             
                  # @return [String]
         | 
| 34 30 | 
             
                  def text_content
         | 
| 35 | 
            -
                     | 
| 31 | 
            +
                    Helpers.text_content_from(node) do |context|
         | 
| 32 | 
            +
                      context.css('img').each { |img| img.content = img['alt'] }
         | 
| 33 | 
            +
                    end
         | 
| 36 34 | 
             
                  end
         | 
| 37 35 | 
             
                end
         | 
| 38 36 | 
             
              end
         | 
| @@ -1,6 +1,10 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 | 
            -
                class ImpliedPhotoPropertyParser <  | 
| 5 | 
            +
                class ImpliedPhotoPropertyParser < BaseImpliedPropertyParser
         | 
| 6 | 
            +
                  CSS_SELECTORS_ARRAY = ['> img[src]:only-of-type', '> object[data]:only-of-type'].freeze
         | 
| 7 | 
            +
             | 
| 4 8 | 
             
                  HTML_ELEMENTS_MAP = {
         | 
| 5 9 | 
             
                    'img'    => 'src',
         | 
| 6 10 | 
             
                    'object' => 'data'
         | 
| @@ -11,54 +15,26 @@ module MicroMicro | |
| 11 15 | 
             
                  #
         | 
| 12 16 | 
             
                  # @return [String, Hash{Symbol => String}, nil]
         | 
| 13 17 | 
             
                  def value
         | 
| 14 | 
            -
                    @value ||= | 
| 15 | 
            -
                       | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 18 | 
            +
                    @value ||=
         | 
| 19 | 
            +
                      if attribute_value
         | 
| 20 | 
            +
                        return attribute_value unless candidate_node.matches?('img[alt]')
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                        {
         | 
| 23 | 
            +
                          value: attribute_value,
         | 
| 24 | 
            +
                          alt: candidate_node['alt'].strip
         | 
| 25 | 
            +
                        }
         | 
| 26 | 
            +
                      end
         | 
| 23 27 | 
             
                  end
         | 
| 24 28 |  | 
| 25 29 | 
             
                  private
         | 
| 26 30 |  | 
| 27 | 
            -
                  # @return [Array | 
| 28 | 
            -
                  def  | 
| 29 | 
            -
                     | 
| 30 | 
            -
                      HTML_ELEMENTS_MAP.map do |element, attribute|
         | 
| 31 | 
            -
                        node if node.matches?("#{element}[#{attribute}]")
         | 
| 32 | 
            -
                      end.compact
         | 
| 33 | 
            -
                    end
         | 
| 34 | 
            -
                  end
         | 
| 35 | 
            -
             | 
| 36 | 
            -
                  # @return [String, nil]
         | 
| 37 | 
            -
                  def resolved_value
         | 
| 38 | 
            -
                    @resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
         | 
| 39 | 
            -
                  end
         | 
| 31 | 
            +
                  # @return [Array]
         | 
| 32 | 
            +
                  def child_nodes
         | 
| 33 | 
            +
                    nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
         | 
| 40 34 |  | 
| 41 | 
            -
             | 
| 42 | 
            -
                  def value_node
         | 
| 43 | 
            -
                    @value_node ||= begin
         | 
| 44 | 
            -
                      return attribute_values.first if attribute_values.any?
         | 
| 45 | 
            -
             | 
| 46 | 
            -
                      HTML_ELEMENTS_MAP.each do |element, attribute|
         | 
| 47 | 
            -
                        child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
         | 
| 48 | 
            -
             | 
| 49 | 
            -
                        return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
         | 
| 50 | 
            -
                      end
         | 
| 51 | 
            -
             | 
| 52 | 
            -
                      if node.element_children.one? && !Item.item_node?(node.first_element_child)
         | 
| 53 | 
            -
                        HTML_ELEMENTS_MAP.each do |element, attribute|
         | 
| 54 | 
            -
                          child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
         | 
| 55 | 
            -
             | 
| 56 | 
            -
                          return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
         | 
| 57 | 
            -
                        end
         | 
| 58 | 
            -
                      end
         | 
| 35 | 
            +
                    nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
         | 
| 59 36 |  | 
| 60 | 
            -
             | 
| 61 | 
            -
                    end
         | 
| 37 | 
            +
                    nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
         | 
| 62 38 | 
             
                  end
         | 
| 63 39 | 
             
                end
         | 
| 64 40 | 
             
              end
         | 
| @@ -1,6 +1,10 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 | 
            -
                class ImpliedUrlPropertyParser <  | 
| 5 | 
            +
                class ImpliedUrlPropertyParser < BaseImpliedPropertyParser
         | 
| 6 | 
            +
                  CSS_SELECTORS_ARRAY = ['> a[href]:only-of-type', '> area[href]:only-of-type'].freeze
         | 
| 7 | 
            +
             | 
| 4 8 | 
             
                  HTML_ELEMENTS_MAP = {
         | 
| 5 9 | 
             
                    'a'    => 'href',
         | 
| 6 10 | 
             
                    'area' => 'href'
         | 
| @@ -10,41 +14,18 @@ module MicroMicro | |
| 10 14 | 
             
                  #
         | 
| 11 15 | 
             
                  # @return [String, nil]
         | 
| 12 16 | 
             
                  def value
         | 
| 13 | 
            -
                    @value ||=  | 
| 17 | 
            +
                    @value ||= attribute_value
         | 
| 14 18 | 
             
                  end
         | 
| 15 19 |  | 
| 16 20 | 
             
                  private
         | 
| 17 21 |  | 
| 18 | 
            -
                  # @return [Array | 
| 19 | 
            -
                  def  | 
| 20 | 
            -
                     | 
| 21 | 
            -
                      HTML_ELEMENTS_MAP.map do |element, attribute|
         | 
| 22 | 
            -
                        node if node.matches?("#{element}[#{attribute}]")
         | 
| 23 | 
            -
                      end.compact
         | 
| 24 | 
            -
                    end
         | 
| 25 | 
            -
                  end
         | 
| 26 | 
            -
             | 
| 27 | 
            -
                  # @return [Nokogiri::XML::Element, nil]
         | 
| 28 | 
            -
                  def value_node
         | 
| 29 | 
            -
                    @value_node ||= begin
         | 
| 30 | 
            -
                      return attribute_values.first if attribute_values.any?
         | 
| 31 | 
            -
             | 
| 32 | 
            -
                      HTML_ELEMENTS_MAP.each do |element, attribute|
         | 
| 33 | 
            -
                        child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
         | 
| 34 | 
            -
             | 
| 35 | 
            -
                        return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
         | 
| 36 | 
            -
                      end
         | 
| 37 | 
            -
             | 
| 38 | 
            -
                      if node.element_children.one? && !Item.item_node?(node.first_element_child)
         | 
| 39 | 
            -
                        HTML_ELEMENTS_MAP.each do |element, attribute|
         | 
| 40 | 
            -
                          child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
         | 
| 22 | 
            +
                  # @return [Array]
         | 
| 23 | 
            +
                  def child_nodes
         | 
| 24 | 
            +
                    nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
         | 
| 41 25 |  | 
| 42 | 
            -
             | 
| 43 | 
            -
                        end
         | 
| 44 | 
            -
                      end
         | 
| 26 | 
            +
                    nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
         | 
| 45 27 |  | 
| 46 | 
            -
             | 
| 47 | 
            -
                    end
         | 
| 28 | 
            +
                    nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
         | 
| 48 29 | 
             
                  end
         | 
| 49 30 | 
             
                end
         | 
| 50 31 | 
             
              end
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class PlainTextPropertyParser < BasePropertyParser
         | 
| @@ -18,7 +20,7 @@ module MicroMicro | |
| 18 20 |  | 
| 19 21 | 
             
                  # @return [String, nil]
         | 
| 20 22 | 
             
                  def attribute_value
         | 
| 21 | 
            -
                     | 
| 23 | 
            +
                    Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
         | 
| 22 24 | 
             
                  end
         | 
| 23 25 |  | 
| 24 26 | 
             
                  # @return [String, nil]
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class UrlPropertyParser < BasePropertyParser
         | 
| @@ -18,36 +20,42 @@ module MicroMicro | |
| 18 20 | 
             
                  #
         | 
| 19 21 | 
             
                  # @return [String, Hash{Symbol => String}]
         | 
| 20 22 | 
             
                  def value
         | 
| 21 | 
            -
                    @value ||= | 
| 22 | 
            -
                       | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
                         | 
| 27 | 
            -
                       | 
| 28 | 
            -
             | 
| 23 | 
            +
                    @value ||=
         | 
| 24 | 
            +
                      if node.matches?('img[alt]')
         | 
| 25 | 
            +
                        {
         | 
| 26 | 
            +
                          value: resolved_value,
         | 
| 27 | 
            +
                          alt: node['alt'].strip
         | 
| 28 | 
            +
                        }
         | 
| 29 | 
            +
                      else
         | 
| 30 | 
            +
                        resolved_value
         | 
| 31 | 
            +
                      end
         | 
| 29 32 | 
             
                  end
         | 
| 30 33 |  | 
| 31 34 | 
             
                  private
         | 
| 32 35 |  | 
| 33 36 | 
             
                  # @return [String, nil]
         | 
| 34 37 | 
             
                  def attribute_value
         | 
| 35 | 
            -
                     | 
| 38 | 
            +
                    Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
         | 
| 36 39 | 
             
                  end
         | 
| 37 40 |  | 
| 38 41 | 
             
                  # @return [String, nil]
         | 
| 39 42 | 
             
                  def extended_attribute_value
         | 
| 40 | 
            -
                     | 
| 43 | 
            +
                    Helpers.attribute_value_from(node, EXTENDED_HTML_ATTRIBUTES_MAP)
         | 
| 41 44 | 
             
                  end
         | 
| 42 45 |  | 
| 43 46 | 
             
                  # @return [String]
         | 
| 44 47 | 
             
                  def resolved_value
         | 
| 45 | 
            -
                    @resolved_value ||=  | 
| 48 | 
            +
                    @resolved_value ||= node.document.resolve_relative_url(unresolved_value.strip)
         | 
| 49 | 
            +
                  end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                  # @return [String]
         | 
| 52 | 
            +
                  def text_content
         | 
| 53 | 
            +
                    Helpers.text_content_from(node)
         | 
| 46 54 | 
             
                  end
         | 
| 47 55 |  | 
| 48 56 | 
             
                  # @return [String]
         | 
| 49 57 | 
             
                  def unresolved_value
         | 
| 50 | 
            -
                    attribute_value || value_class_pattern_value || extended_attribute_value ||  | 
| 58 | 
            +
                    attribute_value || value_class_pattern_value || extended_attribute_value || text_content
         | 
| 51 59 | 
             
                  end
         | 
| 52 60 |  | 
| 53 61 | 
             
                  # @return [String, nil]
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class ValueClassPatternParser
         | 
| @@ -10,72 +12,55 @@ module MicroMicro | |
| 10 12 | 
             
                    'datetime' => %w[del ins time]
         | 
| 11 13 | 
             
                  }.freeze
         | 
| 12 14 |  | 
| 13 | 
            -
                  # @param context [Nokogiri::XML::Element]
         | 
| 14 | 
            -
                  # @param separator [String]
         | 
| 15 | 
            -
                  def initialize(node, separator = '')
         | 
| 16 | 
            -
                    @node = node
         | 
| 17 | 
            -
                    @separator = separator
         | 
| 18 | 
            -
                  end
         | 
| 19 | 
            -
             | 
| 20 | 
            -
                  # @return [String, nil]
         | 
| 21 | 
            -
                  def value
         | 
| 22 | 
            -
                    @value ||= values.join(separator).strip if values.any?
         | 
| 23 | 
            -
                  end
         | 
| 24 | 
            -
             | 
| 25 | 
            -
                  # @return [Array<String>]
         | 
| 26 | 
            -
                  def values
         | 
| 27 | 
            -
                    @values ||= value_nodes.map { |value_node| self.class.value_from(value_node) }.select(&:present?)
         | 
| 28 | 
            -
                  end
         | 
| 29 | 
            -
             | 
| 30 15 | 
             
                  # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
         | 
| 31 16 | 
             
                  # @param node_set [Nokogiri::XML::NodeSet]
         | 
| 32 17 | 
             
                  # @return [Nokogiri::XML::NodeSet]
         | 
| 33 | 
            -
                  def self. | 
| 34 | 
            -
                    context.each { |node|  | 
| 18 | 
            +
                  def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
         | 
| 19 | 
            +
                    context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
         | 
| 35 20 |  | 
| 36 | 
            -
                    if context.is_a?(Nokogiri::XML::Element) && ! | 
| 37 | 
            -
                      if value_class_node?(context) || value_title_node?(context)
         | 
| 21 | 
            +
                    if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
         | 
| 22 | 
            +
                      if Helpers.value_class_node?(context) || Helpers.value_title_node?(context)
         | 
| 38 23 | 
             
                        node_set << context
         | 
| 39 24 | 
             
                      else
         | 
| 40 | 
            -
                         | 
| 25 | 
            +
                        node_set_from(context.element_children, node_set)
         | 
| 41 26 | 
             
                      end
         | 
| 42 27 | 
             
                    end
         | 
| 43 28 |  | 
| 44 29 | 
             
                    node_set
         | 
| 45 30 | 
             
                  end
         | 
| 46 31 |  | 
| 47 | 
            -
                  # @param node [Nokogiri::XML::Element]
         | 
| 48 | 
            -
                  # @return [Boolean]
         | 
| 49 | 
            -
                  def self.value_class_node?(node)
         | 
| 50 | 
            -
                    node.classes.include?('value')
         | 
| 51 | 
            -
                  end
         | 
| 52 | 
            -
             | 
| 53 32 | 
             
                  # @param node [Nokogiri::XML::Element]
         | 
| 54 33 | 
             
                  # @return [String, nil]
         | 
| 55 34 | 
             
                  def self.value_from(node)
         | 
| 56 | 
            -
                    return node['title'] if value_title_node?(node)
         | 
| 35 | 
            +
                    return node['title'] if Helpers.value_title_node?(node)
         | 
| 57 36 |  | 
| 58 | 
            -
                     | 
| 59 | 
            -
             | 
| 60 | 
            -
                    end
         | 
| 37 | 
            +
                    Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP) || node.text
         | 
| 38 | 
            +
                  end
         | 
| 61 39 |  | 
| 62 | 
            -
             | 
| 40 | 
            +
                  # @param context [Nokogiri::XML::Element]
         | 
| 41 | 
            +
                  # @param separator [String]
         | 
| 42 | 
            +
                  def initialize(node, separator = '')
         | 
| 43 | 
            +
                    @node = node
         | 
| 44 | 
            +
                    @separator = separator
         | 
| 63 45 | 
             
                  end
         | 
| 64 46 |  | 
| 65 | 
            -
                  # @ | 
| 66 | 
            -
                   | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 47 | 
            +
                  # @return [String, nil]
         | 
| 48 | 
            +
                  def value
         | 
| 49 | 
            +
                    @value ||= values.join(separator).strip if values.any?
         | 
| 50 | 
            +
                  end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                  # @return [Array<String>]
         | 
| 53 | 
            +
                  def values
         | 
| 54 | 
            +
                    @values ||=
         | 
| 55 | 
            +
                      self.class
         | 
| 56 | 
            +
                          .node_set_from(node)
         | 
| 57 | 
            +
                          .map { |value_node| self.class.value_from(value_node) }
         | 
| 58 | 
            +
                          .select(&:present?)
         | 
| 69 59 | 
             
                  end
         | 
| 70 60 |  | 
| 71 61 | 
             
                  private
         | 
| 72 62 |  | 
| 73 63 | 
             
                  attr_reader :node, :separator
         | 
| 74 | 
            -
             | 
| 75 | 
            -
                  # @return [Nokogiri::XML::NodeSet]
         | 
| 76 | 
            -
                  def value_nodes
         | 
| 77 | 
            -
                    @value_nodes ||= self.class.nodes_from(node)
         | 
| 78 | 
            -
                  end
         | 
| 79 64 | 
             
                end
         | 
| 80 65 | 
             
              end
         | 
| 81 66 | 
             
            end
         |