micromicro 1.1.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -1
- data/CONTRIBUTING.md +3 -3
- data/README.md +9 -102
- data/lib/micro_micro/collectible.rb +2 -0
- data/lib/micro_micro/collections/base_collection.rb +8 -1
- data/lib/micro_micro/collections/items_collection.rb +84 -1
- data/lib/micro_micro/collections/properties_collection.rb +111 -0
- data/lib/micro_micro/collections/relationships_collection.rb +85 -6
- data/lib/micro_micro/document.rb +21 -103
- data/lib/micro_micro/helpers.rb +94 -0
- data/lib/micro_micro/implied_property.rb +15 -0
- data/lib/micro_micro/item.rb +93 -79
- data/lib/micro_micro/parsers/base_implied_property_parser.rb +29 -0
- data/lib/micro_micro/parsers/base_property_parser.rb +6 -12
- data/lib/micro_micro/parsers/date_time_parser.rb +61 -25
- data/lib/micro_micro/parsers/date_time_property_parser.rb +10 -6
- data/lib/micro_micro/parsers/embedded_markup_property_parser.rb +4 -2
- data/lib/micro_micro/parsers/implied_name_property_parser.rb +15 -16
- data/lib/micro_micro/parsers/implied_photo_property_parser.rb +21 -43
- data/lib/micro_micro/parsers/implied_url_property_parser.rb +12 -30
- data/lib/micro_micro/parsers/plain_text_property_parser.rb +4 -1
- data/lib/micro_micro/parsers/url_property_parser.rb +22 -12
- data/lib/micro_micro/parsers/value_class_pattern_parser.rb +29 -42
- data/lib/micro_micro/property.rb +126 -56
- data/lib/micro_micro/relationship.rb +38 -13
- data/lib/micro_micro/version.rb +3 -1
- data/lib/micromicro.rb +32 -26
- data/micromicro.gemspec +11 -6
- metadata +22 -19
| @@ -1,78 +1,114 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class DateTimeParser
         | 
| 4 | 
            -
                  # @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
         | 
| 5 | 
            -
                  #
         | 
| 6 6 | 
             
                  # Regexp pattern matching YYYY-MM-DD and YYY-DDD
         | 
| 7 | 
            -
                  DATE_REGEXP_PATTERN = '(?<year>\d{4})- | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
                  # Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
         | 
| 11 | 
            -
                  TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'.freeze
         | 
| 7 | 
            +
                  DATE_REGEXP_PATTERN = '(?<year>\d{4})-' \
         | 
| 8 | 
            +
                                        '((?<ordinal>3[0-6]{2}|[0-2]\d{2})|(?<month>0\d|1[0-2])-' \
         | 
| 9 | 
            +
                                        '(?<day>3[0-1]|[0-2]\d))'
         | 
| 12 10 |  | 
| 13 | 
            -
                   | 
| 11 | 
            +
                  # Regexp pattern matching HH:MM and HH:MM:SS
         | 
| 12 | 
            +
                  TIME_REGEXP_PATTERN = '(?<hours>2[0-3]|[0-1]?\d)' \
         | 
| 13 | 
            +
                                        '(?::(?<minutes>[0-5]\d))?' \
         | 
| 14 | 
            +
                                        '(?::(?<seconds>[0-5]\d))?' \
         | 
| 15 | 
            +
                                        '(?:\s*?(?<abbreviation>[apPP]\.?[mM]\.?))?'
         | 
| 14 16 |  | 
| 15 | 
            -
                  #  | 
| 17 | 
            +
                  # Regexp pattern matching +/-(XX:YY|XXYY|XX) or the literal string Z
         | 
| 18 | 
            +
                  TIMEZONE_REGEXP_PATTERN = '(?<zulu>Z)|(?<offset>(?:\+|-)(?:1[0-2]|0?\d)(?::?[0-5]\d)?)'
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  # Regexp for extracting named captures from a datetime-esque String.
         | 
| 21 | 
            +
                  DATE_TIME_TIMEZONE_REGEXP = /
         | 
| 22 | 
            +
                    \A
         | 
| 23 | 
            +
                    (?=.)
         | 
| 24 | 
            +
                    (?:#{DATE_REGEXP_PATTERN})?
         | 
| 25 | 
            +
                    (?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?
         | 
| 26 | 
            +
                    \z
         | 
| 27 | 
            +
                  /x.freeze
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  # Parse a string for date and/or time values according to the Microformats
         | 
| 30 | 
            +
                  # Value Class Pattern date and time parsing specification.
         | 
| 31 | 
            +
                  #
         | 
| 32 | 
            +
                  # @see https://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
         | 
| 33 | 
            +
                  #   microformats.org: Value Class Pattern § Date and time parsing
         | 
| 34 | 
            +
                  #
         | 
| 35 | 
            +
                  # @param string [String, #to_s]
         | 
| 16 36 | 
             
                  def initialize(string)
         | 
| 17 | 
            -
                    @string = string
         | 
| 37 | 
            +
                    @string = string.to_s
         | 
| 18 38 | 
             
                  end
         | 
| 19 39 |  | 
| 20 | 
            -
                   | 
| 40 | 
            +
                  # Define getter and predicate methods for all possible named captures
         | 
| 41 | 
            +
                  # returned by the DATE_TIME_TIMEZONE_REGEXP regular expression.
         | 
| 42 | 
            +
                  [
         | 
| 43 | 
            +
                    :year, :ordinal, :month, :day,
         | 
| 44 | 
            +
                    :hours, :minutes, :seconds,
         | 
| 45 | 
            +
                    :abbreviation, :zulu, :offset
         | 
| 46 | 
            +
                  ].each do |name|
         | 
| 21 47 | 
             
                    define_method(name) { values[name] }
         | 
| 22 48 | 
             
                    define_method("#{name}?") { public_send(name).present? }
         | 
| 23 49 | 
             
                  end
         | 
| 24 50 |  | 
| 51 | 
            +
                  # @return [String, nil]
         | 
| 25 52 | 
             
                  def normalized_calendar_date
         | 
| 26 53 | 
             
                    @normalized_calendar_date ||= "#{year}-#{month}-#{day}" if year? && month? && day?
         | 
| 27 54 | 
             
                  end
         | 
| 28 55 |  | 
| 56 | 
            +
                  # @return [String, nil]
         | 
| 29 57 | 
             
                  def normalized_date
         | 
| 30 58 | 
             
                    @normalized_date ||= normalized_calendar_date || normalized_ordinal_date
         | 
| 31 59 | 
             
                  end
         | 
| 32 60 |  | 
| 61 | 
            +
                  # @return [String, nil]
         | 
| 33 62 | 
             
                  def normalized_hours
         | 
| 34 | 
            -
                    @normalized_hours ||= | 
| 35 | 
            -
                       | 
| 36 | 
            -
             | 
| 63 | 
            +
                    @normalized_hours ||=
         | 
| 64 | 
            +
                      if hours?
         | 
| 65 | 
            +
                        return (hours.to_i + 12).to_s if abbreviation&.tr('.', '')&.downcase == 'pm'
         | 
| 37 66 |  | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 67 | 
            +
                        format('%<hours>02d', hours: hours)
         | 
| 68 | 
            +
                      end
         | 
| 40 69 | 
             
                  end
         | 
| 41 70 |  | 
| 71 | 
            +
                  # @return [String]
         | 
| 42 72 | 
             
                  def normalized_minutes
         | 
| 43 73 | 
             
                    @normalized_minutes ||= minutes || '00'
         | 
| 44 74 | 
             
                  end
         | 
| 45 75 |  | 
| 76 | 
            +
                  # @return [String, nil]
         | 
| 46 77 | 
             
                  def normalized_ordinal_date
         | 
| 47 78 | 
             
                    @normalized_ordinal_date ||= "#{year}-#{ordinal}" if year? && ordinal?
         | 
| 48 79 | 
             
                  end
         | 
| 49 80 |  | 
| 81 | 
            +
                  # @return [String, nil]
         | 
| 50 82 | 
             
                  def normalized_time
         | 
| 51 83 | 
             
                    @normalized_time ||= [normalized_hours, normalized_minutes, seconds].compact.join(':') if normalized_hours
         | 
| 52 84 | 
             
                  end
         | 
| 53 85 |  | 
| 86 | 
            +
                  # @return [String, nil]
         | 
| 54 87 | 
             
                  def normalized_timezone
         | 
| 55 88 | 
             
                    @normalized_timezone ||= zulu || offset&.tr(':', '')
         | 
| 56 89 | 
             
                  end
         | 
| 57 90 |  | 
| 58 | 
            -
                  # @return [String]
         | 
| 91 | 
            +
                  # @return [String, nil]
         | 
| 59 92 | 
             
                  def value
         | 
| 60 | 
            -
                    @value ||= | 
| 93 | 
            +
                    @value ||=
         | 
| 94 | 
            +
                      if normalized_date || normalized_time || normalized_timezone
         | 
| 95 | 
            +
                        "#{normalized_date} #{normalized_time}#{normalized_timezone}".strip
         | 
| 96 | 
            +
                      end
         | 
| 61 97 | 
             
                  end
         | 
| 62 98 |  | 
| 63 99 | 
             
                  # @return [Hash{Symbol => String, nil}]
         | 
| 64 100 | 
             
                  def values
         | 
| 65 | 
            -
                    @values ||= | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 71 | 
            -
                    string&.match(/^(?:#{DATE_REGEXP_PATTERN})?(?:\s?#{TIME_REGEXP_PATTERN}(?:#{TIMEZONE_REGEXP_PATTERN})?)?$/)&.named_captures.to_h.symbolize_keys
         | 
| 101 | 
            +
                    @values ||=
         | 
| 102 | 
            +
                      if string.match?(DATE_TIME_TIMEZONE_REGEXP)
         | 
| 103 | 
            +
                        string.match(DATE_TIME_TIMEZONE_REGEXP).named_captures.symbolize_keys
         | 
| 104 | 
            +
                      else
         | 
| 105 | 
            +
                        {}
         | 
| 106 | 
            +
                      end
         | 
| 72 107 | 
             
                  end
         | 
| 73 108 |  | 
| 74 109 | 
             
                  private
         | 
| 75 110 |  | 
| 111 | 
            +
                  # @return [String]
         | 
| 76 112 | 
             
                  attr_reader :string
         | 
| 77 113 | 
             
                end
         | 
| 78 114 | 
             
              end
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class DateTimePropertyParser < BasePropertyParser
         | 
| @@ -8,6 +10,7 @@ module MicroMicro | |
| 8 10 | 
             
                  }.freeze
         | 
| 9 11 |  | 
| 10 12 | 
             
                  # @see https://microformats.org/wiki/microformats2-parsing#parsing_a_dt-_property
         | 
| 13 | 
            +
                  #   microformats.org: microformats2 parsing specification § Parsing a +dt-+ property
         | 
| 11 14 | 
             
                  #
         | 
| 12 15 | 
             
                  # @return [String]
         | 
| 13 16 | 
             
                  def value
         | 
| @@ -17,19 +20,19 @@ module MicroMicro | |
| 17 20 | 
             
                  private
         | 
| 18 21 |  | 
| 19 22 | 
             
                  # @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
         | 
| 23 | 
            +
                  #   microformats.org: Value Class Pattern § microformats2 parsers implied date
         | 
| 20 24 | 
             
                  #
         | 
| 21 25 | 
             
                  # @return [MicroMicro::Parsers::DateTimeParser, nil]
         | 
| 22 26 | 
             
                  def adopted_date_time_parser
         | 
| 23 | 
            -
                    @adopted_date_time_parser ||= | 
| 24 | 
            -
                       | 
| 25 | 
            -
             | 
| 26 | 
            -
                       | 
| 27 | 
            -
                    end
         | 
| 27 | 
            +
                    @adopted_date_time_parser ||=
         | 
| 28 | 
            +
                      (property.prev_all.reverse + property.next_all).filter_map do |prop|
         | 
| 29 | 
            +
                        DateTimeParser.new(prop.value) if prop.date_time_property?
         | 
| 30 | 
            +
                      end.find(&:normalized_date)
         | 
| 28 31 | 
             
                  end
         | 
| 29 32 |  | 
| 30 33 | 
             
                  # @return [String, nil]
         | 
| 31 34 | 
             
                  def attribute_value
         | 
| 32 | 
            -
                     | 
| 35 | 
            +
                    Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
         | 
| 33 36 | 
             
                  end
         | 
| 34 37 |  | 
| 35 38 | 
             
                  # @return [MicroMicro::Parsers::DateTimeParser]
         | 
| @@ -38,6 +41,7 @@ module MicroMicro | |
| 38 41 | 
             
                  end
         | 
| 39 42 |  | 
| 40 43 | 
             
                  # @see https://microformats.org/wiki/value-class-pattern#microformats2_parsers_implied_date
         | 
| 44 | 
            +
                  #   microformats.org: Value Class Pattern § microformats2 parsers implied date
         | 
| 41 45 | 
             
                  #
         | 
| 42 46 | 
             
                  # @return [Boolean]
         | 
| 43 47 | 
             
                  def imply_date?
         | 
| @@ -1,16 +1,18 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class EmbeddedMarkupPropertyParser < BasePropertyParser
         | 
| 4 6 | 
             
                  # @see https://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
         | 
| 7 | 
            +
                  #   microformats.org: microformats2 parsing specification § Parsing an +e-+ property
         | 
| 5 8 | 
             
                  #
         | 
| 6 9 | 
             
                  # @return [Hash{Symbol => String}]
         | 
| 7 10 | 
             
                  def value
         | 
| 8 | 
            -
                    @value ||= | 
| 11 | 
            +
                    @value ||=
         | 
| 9 12 | 
             
                      {
         | 
| 10 13 | 
             
                        html: node.inner_html.strip,
         | 
| 11 14 | 
             
                        value: super
         | 
| 12 15 | 
             
                      }
         | 
| 13 | 
            -
                    end
         | 
| 14 16 | 
             
                  end
         | 
| 15 17 | 
             
                end
         | 
| 16 18 | 
             
              end
         | 
| @@ -1,12 +1,16 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 | 
            -
                class ImpliedNamePropertyParser <  | 
| 4 | 
            -
                   | 
| 5 | 
            -
                    ' | 
| 6 | 
            -
                    ' | 
| 5 | 
            +
                class ImpliedNamePropertyParser < BaseImpliedPropertyParser
         | 
| 6 | 
            +
                  HTML_ELEMENTS_MAP = {
         | 
| 7 | 
            +
                    'img'  => 'alt',
         | 
| 8 | 
            +
                    'area' => 'alt',
         | 
| 9 | 
            +
                    'abbr' => 'title'
         | 
| 7 10 | 
             
                  }.freeze
         | 
| 8 11 |  | 
| 9 12 | 
             
                  # @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
         | 
| 13 | 
            +
                  #   microformats.org: microformats2 parsing specification § Parsing for implied properties
         | 
| 10 14 | 
             
                  #
         | 
| 11 15 | 
             
                  # @return [String]
         | 
| 12 16 | 
             
                  def value
         | 
| @@ -15,24 +19,19 @@ module MicroMicro | |
| 15 19 |  | 
| 16 20 | 
             
                  private
         | 
| 17 21 |  | 
| 18 | 
            -
                  # @return [Nokogiri::XML::NodeSet]
         | 
| 19 | 
            -
                  def candidate_nodes
         | 
| 20 | 
            -
                    @candidate_nodes ||= Nokogiri::XML::NodeSet.new(node.document, child_nodes.unshift(node))
         | 
| 21 | 
            -
                  end
         | 
| 22 | 
            -
             | 
| 23 22 | 
             
                  # @return [Array]
         | 
| 24 23 | 
             
                  def child_nodes
         | 
| 25 | 
            -
                    [ | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
                  def attribute_value
         | 
| 30 | 
            -
                    candidate_nodes.map { |node| self.class.attribute_value_from(node, HTML_ATTRIBUTES_MAP) }.compact.first
         | 
| 24 | 
            +
                    [
         | 
| 25 | 
            +
                      node.at_css('> :only-child'),
         | 
| 26 | 
            +
                      node.at_css('> :only-child > :only-child')
         | 
| 27 | 
            +
                    ].compact.reject { |child_node| Helpers.item_node?(child_node) }
         | 
| 31 28 | 
             
                  end
         | 
| 32 29 |  | 
| 33 30 | 
             
                  # @return [String]
         | 
| 34 31 | 
             
                  def text_content
         | 
| 35 | 
            -
                     | 
| 32 | 
            +
                    Helpers.text_content_from(node) do |context|
         | 
| 33 | 
            +
                      context.css('img').each { |img| img.content = img['alt'] }
         | 
| 34 | 
            +
                    end
         | 
| 36 35 | 
             
                  end
         | 
| 37 36 | 
             
                end
         | 
| 38 37 | 
             
              end
         | 
| @@ -1,64 +1,42 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 | 
            -
                class ImpliedPhotoPropertyParser <  | 
| 5 | 
            +
                class ImpliedPhotoPropertyParser < BaseImpliedPropertyParser
         | 
| 6 | 
            +
                  CSS_SELECTORS_ARRAY = ['> img[src]:only-of-type', '> object[data]:only-of-type'].freeze
         | 
| 7 | 
            +
             | 
| 4 8 | 
             
                  HTML_ELEMENTS_MAP = {
         | 
| 5 9 | 
             
                    'img'    => 'src',
         | 
| 6 10 | 
             
                    'object' => 'data'
         | 
| 7 11 | 
             
                  }.freeze
         | 
| 8 12 |  | 
| 9 13 | 
             
                  # @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
         | 
| 14 | 
            +
                  #   microformats.org: microformats2 parsing specification § Parsing for implied properties
         | 
| 10 15 | 
             
                  # @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
         | 
| 16 | 
            +
                  #   microformats.org: microformats2 parsing specification § Parse an img element for src and alt
         | 
| 11 17 | 
             
                  #
         | 
| 12 18 | 
             
                  # @return [String, Hash{Symbol => String}, nil]
         | 
| 13 19 | 
             
                  def value
         | 
| 14 | 
            -
                    @value ||= | 
| 15 | 
            -
                       | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 20 | 
            +
                    @value ||=
         | 
| 21 | 
            +
                      if attribute_value
         | 
| 22 | 
            +
                        return attribute_value unless candidate_node.matches?('img[alt]')
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                        {
         | 
| 25 | 
            +
                          value: attribute_value,
         | 
| 26 | 
            +
                          alt: candidate_node['alt'].strip
         | 
| 27 | 
            +
                        }
         | 
| 28 | 
            +
                      end
         | 
| 23 29 | 
             
                  end
         | 
| 24 30 |  | 
| 25 31 | 
             
                  private
         | 
| 26 32 |  | 
| 27 | 
            -
                  # @return [Array | 
| 28 | 
            -
                  def  | 
| 29 | 
            -
                     | 
| 30 | 
            -
                      HTML_ELEMENTS_MAP.map do |element, attribute|
         | 
| 31 | 
            -
                        node if node.matches?("#{element}[#{attribute}]")
         | 
| 32 | 
            -
                      end.compact
         | 
| 33 | 
            -
                    end
         | 
| 34 | 
            -
                  end
         | 
| 35 | 
            -
             | 
| 36 | 
            -
                  # @return [String, nil]
         | 
| 37 | 
            -
                  def resolved_value
         | 
| 38 | 
            -
                    @resolved_value ||= value_node[HTML_ELEMENTS_MAP[value_node.name]] if value_node
         | 
| 39 | 
            -
                  end
         | 
| 33 | 
            +
                  # @return [Array]
         | 
| 34 | 
            +
                  def child_nodes
         | 
| 35 | 
            +
                    nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
         | 
| 40 36 |  | 
| 41 | 
            -
             | 
| 42 | 
            -
                  def value_node
         | 
| 43 | 
            -
                    @value_node ||= begin
         | 
| 44 | 
            -
                      return attribute_values.first if attribute_values.any?
         | 
| 45 | 
            -
             | 
| 46 | 
            -
                      HTML_ELEMENTS_MAP.each do |element, attribute|
         | 
| 47 | 
            -
                        child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
         | 
| 48 | 
            -
             | 
| 49 | 
            -
                        return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
         | 
| 50 | 
            -
                      end
         | 
| 51 | 
            -
             | 
| 52 | 
            -
                      if node.element_children.one? && !Item.item_node?(node.first_element_child)
         | 
| 53 | 
            -
                        HTML_ELEMENTS_MAP.each do |element, attribute|
         | 
| 54 | 
            -
                          child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
         | 
| 55 | 
            -
             | 
| 56 | 
            -
                          return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
         | 
| 57 | 
            -
                        end
         | 
| 58 | 
            -
                      end
         | 
| 37 | 
            +
                    nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
         | 
| 59 38 |  | 
| 60 | 
            -
             | 
| 61 | 
            -
                    end
         | 
| 39 | 
            +
                    nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
         | 
| 62 40 | 
             
                  end
         | 
| 63 41 | 
             
                end
         | 
| 64 42 | 
             
              end
         | 
| @@ -1,50 +1,32 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 | 
            -
                class ImpliedUrlPropertyParser <  | 
| 5 | 
            +
                class ImpliedUrlPropertyParser < BaseImpliedPropertyParser
         | 
| 6 | 
            +
                  CSS_SELECTORS_ARRAY = ['> a[href]:only-of-type', '> area[href]:only-of-type'].freeze
         | 
| 7 | 
            +
             | 
| 4 8 | 
             
                  HTML_ELEMENTS_MAP = {
         | 
| 5 9 | 
             
                    'a'    => 'href',
         | 
| 6 10 | 
             
                    'area' => 'href'
         | 
| 7 11 | 
             
                  }.freeze
         | 
| 8 12 |  | 
| 9 13 | 
             
                  # @see https://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
         | 
| 14 | 
            +
                  #   microformats.org: microformats2 parsing specification § Parsing for implied properties
         | 
| 10 15 | 
             
                  #
         | 
| 11 16 | 
             
                  # @return [String, nil]
         | 
| 12 17 | 
             
                  def value
         | 
| 13 | 
            -
                    @value ||=  | 
| 18 | 
            +
                    @value ||= attribute_value
         | 
| 14 19 | 
             
                  end
         | 
| 15 20 |  | 
| 16 21 | 
             
                  private
         | 
| 17 22 |  | 
| 18 | 
            -
                  # @return [Array | 
| 19 | 
            -
                  def  | 
| 20 | 
            -
                     | 
| 21 | 
            -
                      HTML_ELEMENTS_MAP.map do |element, attribute|
         | 
| 22 | 
            -
                        node if node.matches?("#{element}[#{attribute}]")
         | 
| 23 | 
            -
                      end.compact
         | 
| 24 | 
            -
                    end
         | 
| 25 | 
            -
                  end
         | 
| 26 | 
            -
             | 
| 27 | 
            -
                  # @return [Nokogiri::XML::Element, nil]
         | 
| 28 | 
            -
                  def value_node
         | 
| 29 | 
            -
                    @value_node ||= begin
         | 
| 30 | 
            -
                      return attribute_values.first if attribute_values.any?
         | 
| 31 | 
            -
             | 
| 32 | 
            -
                      HTML_ELEMENTS_MAP.each do |element, attribute|
         | 
| 33 | 
            -
                        child_node = node.at_css("> #{element}[#{attribute}]:only-of-type")
         | 
| 34 | 
            -
             | 
| 35 | 
            -
                        return child_node if child_node && !Item.item_node?(child_node) && element == child_node.name && child_node[attribute]
         | 
| 36 | 
            -
                      end
         | 
| 37 | 
            -
             | 
| 38 | 
            -
                      if node.element_children.one? && !Item.item_node?(node.first_element_child)
         | 
| 39 | 
            -
                        HTML_ELEMENTS_MAP.each do |element, attribute|
         | 
| 40 | 
            -
                          child_node = node.first_element_child.at_css("> #{element}[#{attribute}]:only-of-type")
         | 
| 23 | 
            +
                  # @return [Array]
         | 
| 24 | 
            +
                  def child_nodes
         | 
| 25 | 
            +
                    nodes = [node.at_css(*CSS_SELECTORS_ARRAY)]
         | 
| 41 26 |  | 
| 42 | 
            -
             | 
| 43 | 
            -
                        end
         | 
| 44 | 
            -
                      end
         | 
| 27 | 
            +
                    nodes << node.first_element_child.at_css(*CSS_SELECTORS_ARRAY) if node.element_children.one?
         | 
| 45 28 |  | 
| 46 | 
            -
             | 
| 47 | 
            -
                    end
         | 
| 29 | 
            +
                    nodes.compact.reject { |child_node| Helpers.item_node?(child_node) }
         | 
| 48 30 | 
             
                  end
         | 
| 49 31 | 
             
                end
         | 
| 50 32 | 
             
              end
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class PlainTextPropertyParser < BasePropertyParser
         | 
| @@ -8,6 +10,7 @@ module MicroMicro | |
| 8 10 | 
             
                  }.freeze
         | 
| 9 11 |  | 
| 10 12 | 
             
                  # @see https://microformats.org/wiki/microformats2-parsing#parsing_a_p-_property
         | 
| 13 | 
            +
                  #   microformats.org: microformats2 parsing specification § Parsing a +p-+ property
         | 
| 11 14 | 
             
                  #
         | 
| 12 15 | 
             
                  # @return [String]
         | 
| 13 16 | 
             
                  def value
         | 
| @@ -18,7 +21,7 @@ module MicroMicro | |
| 18 21 |  | 
| 19 22 | 
             
                  # @return [String, nil]
         | 
| 20 23 | 
             
                  def attribute_value
         | 
| 21 | 
            -
                     | 
| 24 | 
            +
                    Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
         | 
| 22 25 | 
             
                  end
         | 
| 23 26 |  | 
| 24 27 | 
             
                  # @return [String, nil]
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class UrlPropertyParser < BasePropertyParser
         | 
| @@ -14,40 +16,48 @@ module MicroMicro | |
| 14 16 | 
             
                  }.freeze
         | 
| 15 17 |  | 
| 16 18 | 
             
                  # @see https://microformats.org/wiki/microformats2-parsing#parsing_a_u-_property
         | 
| 19 | 
            +
                  #   microformats.org: microformats2 parsing specification § Parsing a +u-+ property
         | 
| 17 20 | 
             
                  # @see https://microformats.org/wiki/microformats2-parsing#parse_an_img_element_for_src_and_alt
         | 
| 21 | 
            +
                  #   microformats.org: microformats2 parsing specification § Parse an img element for src and alt
         | 
| 18 22 | 
             
                  #
         | 
| 19 23 | 
             
                  # @return [String, Hash{Symbol => String}]
         | 
| 20 24 | 
             
                  def value
         | 
| 21 | 
            -
                    @value ||= | 
| 22 | 
            -
                       | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
                         | 
| 27 | 
            -
                       | 
| 28 | 
            -
             | 
| 25 | 
            +
                    @value ||=
         | 
| 26 | 
            +
                      if node.matches?('img[alt]')
         | 
| 27 | 
            +
                        {
         | 
| 28 | 
            +
                          value: resolved_value,
         | 
| 29 | 
            +
                          alt: node['alt'].strip
         | 
| 30 | 
            +
                        }
         | 
| 31 | 
            +
                      else
         | 
| 32 | 
            +
                        resolved_value
         | 
| 33 | 
            +
                      end
         | 
| 29 34 | 
             
                  end
         | 
| 30 35 |  | 
| 31 36 | 
             
                  private
         | 
| 32 37 |  | 
| 33 38 | 
             
                  # @return [String, nil]
         | 
| 34 39 | 
             
                  def attribute_value
         | 
| 35 | 
            -
                     | 
| 40 | 
            +
                    Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP)
         | 
| 36 41 | 
             
                  end
         | 
| 37 42 |  | 
| 38 43 | 
             
                  # @return [String, nil]
         | 
| 39 44 | 
             
                  def extended_attribute_value
         | 
| 40 | 
            -
                     | 
| 45 | 
            +
                    Helpers.attribute_value_from(node, EXTENDED_HTML_ATTRIBUTES_MAP)
         | 
| 41 46 | 
             
                  end
         | 
| 42 47 |  | 
| 43 48 | 
             
                  # @return [String]
         | 
| 44 49 | 
             
                  def resolved_value
         | 
| 45 | 
            -
                    @resolved_value ||=  | 
| 50 | 
            +
                    @resolved_value ||= node.document.resolve_relative_url(unresolved_value.strip)
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                  # @return [String]
         | 
| 54 | 
            +
                  def text_content
         | 
| 55 | 
            +
                    Helpers.text_content_from(node)
         | 
| 46 56 | 
             
                  end
         | 
| 47 57 |  | 
| 48 58 | 
             
                  # @return [String]
         | 
| 49 59 | 
             
                  def unresolved_value
         | 
| 50 | 
            -
                    attribute_value || value_class_pattern_value || extended_attribute_value ||  | 
| 60 | 
            +
                    attribute_value || value_class_pattern_value || extended_attribute_value || text_content
         | 
| 51 61 | 
             
                  end
         | 
| 52 62 |  | 
| 53 63 | 
             
                  # @return [String, nil]
         | 
| @@ -1,8 +1,12 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module MicroMicro
         | 
| 2 4 | 
             
              module Parsers
         | 
| 3 5 | 
             
                class ValueClassPatternParser
         | 
| 4 6 | 
             
                  # @see https://microformats.org/wiki/value-class-pattern#Basic_Parsing
         | 
| 7 | 
            +
                  #   microformats.org: Value Class Pattern § Basic Parsing
         | 
| 5 8 | 
             
                  # @see https://microformats.org/wiki/value-class-pattern#Date_and_time_values
         | 
| 9 | 
            +
                  #   microformats.org: Value Class Pattern § Date and time values
         | 
| 6 10 | 
             
                  HTML_ATTRIBUTES_MAP = {
         | 
| 7 11 | 
             
                    'alt'      => %w[area img],
         | 
| 8 12 | 
             
                    'value'    => %w[data],
         | 
| @@ -10,72 +14,55 @@ module MicroMicro | |
| 10 14 | 
             
                    'datetime' => %w[del ins time]
         | 
| 11 15 | 
             
                  }.freeze
         | 
| 12 16 |  | 
| 13 | 
            -
                  # @param context [Nokogiri::XML::Element]
         | 
| 14 | 
            -
                  # @param separator [String]
         | 
| 15 | 
            -
                  def initialize(node, separator = '')
         | 
| 16 | 
            -
                    @node = node
         | 
| 17 | 
            -
                    @separator = separator
         | 
| 18 | 
            -
                  end
         | 
| 19 | 
            -
             | 
| 20 | 
            -
                  # @return [String, nil]
         | 
| 21 | 
            -
                  def value
         | 
| 22 | 
            -
                    @value ||= values.join(separator).strip if values.any?
         | 
| 23 | 
            -
                  end
         | 
| 24 | 
            -
             | 
| 25 | 
            -
                  # @return [Array<String>]
         | 
| 26 | 
            -
                  def values
         | 
| 27 | 
            -
                    @values ||= value_nodes.map { |value_node| self.class.value_from(value_node) }.select(&:present?)
         | 
| 28 | 
            -
                  end
         | 
| 29 | 
            -
             | 
| 30 17 | 
             
                  # @param context [Nokogiri::XML::NodeSet, Nokogiri::XML::Element]
         | 
| 31 18 | 
             
                  # @param node_set [Nokogiri::XML::NodeSet]
         | 
| 32 19 | 
             
                  # @return [Nokogiri::XML::NodeSet]
         | 
| 33 | 
            -
                  def self. | 
| 34 | 
            -
                    context.each { |node|  | 
| 20 | 
            +
                  def self.node_set_from(context, node_set = Nokogiri::XML::NodeSet.new(context.document, []))
         | 
| 21 | 
            +
                    context.each { |node| node_set_from(node, node_set) } if context.is_a?(Nokogiri::XML::NodeSet)
         | 
| 35 22 |  | 
| 36 | 
            -
                    if context.is_a?(Nokogiri::XML::Element) && ! | 
| 37 | 
            -
                      if value_class_node?(context) || value_title_node?(context)
         | 
| 23 | 
            +
                    if context.is_a?(Nokogiri::XML::Element) && !Helpers.ignore_node?(context)
         | 
| 24 | 
            +
                      if Helpers.value_class_node?(context) || Helpers.value_title_node?(context)
         | 
| 38 25 | 
             
                        node_set << context
         | 
| 39 26 | 
             
                      else
         | 
| 40 | 
            -
                         | 
| 27 | 
            +
                        node_set_from(context.element_children, node_set)
         | 
| 41 28 | 
             
                      end
         | 
| 42 29 | 
             
                    end
         | 
| 43 30 |  | 
| 44 31 | 
             
                    node_set
         | 
| 45 32 | 
             
                  end
         | 
| 46 33 |  | 
| 47 | 
            -
                  # @param node [Nokogiri::XML::Element]
         | 
| 48 | 
            -
                  # @return [Boolean]
         | 
| 49 | 
            -
                  def self.value_class_node?(node)
         | 
| 50 | 
            -
                    node.classes.include?('value')
         | 
| 51 | 
            -
                  end
         | 
| 52 | 
            -
             | 
| 53 34 | 
             
                  # @param node [Nokogiri::XML::Element]
         | 
| 54 35 | 
             
                  # @return [String, nil]
         | 
| 55 36 | 
             
                  def self.value_from(node)
         | 
| 56 | 
            -
                    return node['title'] if value_title_node?(node)
         | 
| 37 | 
            +
                    return node['title'] if Helpers.value_title_node?(node)
         | 
| 57 38 |  | 
| 58 | 
            -
                     | 
| 59 | 
            -
                      return node[attribute] if names.include?(node.name) && node[attribute]
         | 
| 60 | 
            -
                    end
         | 
| 61 | 
            -
             | 
| 62 | 
            -
                    node.text
         | 
| 39 | 
            +
                    Helpers.attribute_value_from(node, HTML_ATTRIBUTES_MAP) || node.text
         | 
| 63 40 | 
             
                  end
         | 
| 64 41 |  | 
| 65 42 | 
             
                  # @param node [Nokogiri::XML::Element]
         | 
| 66 | 
            -
                  # @ | 
| 67 | 
            -
                  def  | 
| 68 | 
            -
                    node | 
| 43 | 
            +
                  # @param separator [String]
         | 
| 44 | 
            +
                  def initialize(node, separator = '')
         | 
| 45 | 
            +
                    @node = node
         | 
| 46 | 
            +
                    @separator = separator
         | 
| 47 | 
            +
                  end
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                  # @return [String, nil]
         | 
| 50 | 
            +
                  def value
         | 
| 51 | 
            +
                    @value ||= values.join(separator).strip if values.any?
         | 
| 52 | 
            +
                  end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                  # @return [Array<String>]
         | 
| 55 | 
            +
                  def values
         | 
| 56 | 
            +
                    @values ||=
         | 
| 57 | 
            +
                      self.class
         | 
| 58 | 
            +
                          .node_set_from(node)
         | 
| 59 | 
            +
                          .map { |value_node| self.class.value_from(value_node) }
         | 
| 60 | 
            +
                          .select(&:present?)
         | 
| 69 61 | 
             
                  end
         | 
| 70 62 |  | 
| 71 63 | 
             
                  private
         | 
| 72 64 |  | 
| 73 65 | 
             
                  attr_reader :node, :separator
         | 
| 74 | 
            -
             | 
| 75 | 
            -
                  # @return [Nokogiri::XML::NodeSet]
         | 
| 76 | 
            -
                  def value_nodes
         | 
| 77 | 
            -
                    @value_nodes ||= self.class.nodes_from(node)
         | 
| 78 | 
            -
                  end
         | 
| 79 66 | 
             
                end
         | 
| 80 67 | 
             
              end
         | 
| 81 68 | 
             
            end
         |