RubyGems - reading - Versions diffs - 0.6.1 → 0.8.0 - Mend

reading 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

checksums.yaml +4 -4
data/bin/reading +5 -5
data/bin/readingfile +31 -0
data/lib/reading/config.rb +96 -108
data/lib/reading/errors.rb +10 -66
data/lib/reading/filter.rb +95 -0
data/lib/reading/item/time_length.rb +140 -0
data/lib/reading/item/view.rb +121 -0
data/lib/reading/item.rb +117 -0
data/lib/reading/parsing/attributes/attribute.rb +26 -0
data/lib/reading/parsing/attributes/author.rb +15 -0
data/lib/reading/parsing/attributes/experiences/dates_and_head_transformer.rb +106 -0
data/lib/reading/parsing/attributes/experiences/history_transformer.rb +452 -0
data/lib/reading/parsing/attributes/experiences/spans_validator.rb +149 -0
data/lib/reading/parsing/attributes/experiences.rb +27 -0
data/lib/reading/parsing/attributes/genres.rb +16 -0
data/lib/reading/parsing/attributes/notes.rb +22 -0
data/lib/reading/parsing/attributes/rating.rb +17 -0
data/lib/reading/parsing/attributes/shared.rb +62 -0
data/lib/reading/parsing/attributes/title.rb +21 -0
data/lib/reading/parsing/attributes/variants.rb +77 -0
data/lib/reading/parsing/csv.rb +112 -0
data/lib/reading/parsing/parser.rb +292 -0
data/lib/reading/parsing/rows/column.rb +131 -0
data/lib/reading/parsing/rows/comment.rb +26 -0
data/lib/reading/parsing/rows/compact_planned.rb +30 -0
data/lib/reading/parsing/rows/compact_planned_columns/head.rb +60 -0
data/lib/reading/parsing/rows/regular.rb +33 -0
data/lib/reading/parsing/rows/regular_columns/end_dates.rb +20 -0
data/lib/reading/parsing/rows/regular_columns/genres.rb +20 -0
data/lib/reading/parsing/rows/regular_columns/head.rb +45 -0
data/lib/reading/parsing/rows/regular_columns/history.rb +143 -0
data/lib/reading/parsing/rows/regular_columns/length.rb +35 -0
data/lib/reading/parsing/rows/regular_columns/notes.rb +32 -0
data/lib/reading/parsing/rows/regular_columns/rating.rb +15 -0
data/lib/reading/parsing/rows/regular_columns/sources.rb +94 -0
data/lib/reading/parsing/rows/regular_columns/start_dates.rb +35 -0
data/lib/reading/parsing/transformer.rb +70 -0
data/lib/reading/util/hash_compact_by_template.rb +1 -0
data/lib/reading/util/hash_deep_merge.rb +1 -1
data/lib/reading/util/hash_to_data.rb +30 -0
data/lib/reading/util/numeric_to_i_if_whole.rb +12 -0
data/lib/reading/util/string_truncate.rb +13 -4
data/lib/reading/version.rb +1 -1
data/lib/reading.rb +49 -0
metadata +76 -42
data/lib/reading/attribute/all_attributes.rb +0 -83
data/lib/reading/attribute/attribute.rb +0 -25
data/lib/reading/attribute/experiences/dates_validator.rb +0 -94
data/lib/reading/attribute/experiences/experiences_attribute.rb +0 -74
data/lib/reading/attribute/experiences/progress_subattribute.rb +0 -48
data/lib/reading/attribute/experiences/spans_subattribute.rb +0 -82
data/lib/reading/attribute/variants/extra_info_subattribute.rb +0 -44
data/lib/reading/attribute/variants/length_subattribute.rb +0 -45
data/lib/reading/attribute/variants/series_subattribute.rb +0 -57
data/lib/reading/attribute/variants/sources_subattribute.rb +0 -78
data/lib/reading/attribute/variants/variants_attribute.rb +0 -69
data/lib/reading/csv.rb +0 -76
data/lib/reading/line.rb +0 -23
data/lib/reading/row/blank_row.rb +0 -23
data/lib/reading/row/compact_planned_row.rb +0 -130
data/lib/reading/row/regular_row.rb +0 -99
data/lib/reading/row/row.rb +0 -88
data/lib/reading/util/hash_to_struct.rb +0 -29

data/lib/reading/parsing/rows/column.rb ADDED Viewed

@@ -0,0 +1,131 @@
+module Reading
+  module Parsing
+    module Rows
+      # The base class for all the columns in parsing/rows/compact_planned_columns
+      # and parsing/rows/regular_columns.
+      class Column
+        # The class name changed into a string, e.g. StartDates => "Start Dates"
+        # @return [String]
+        def self.column_name
+          class_name = name.split("::").last
+          class_name.gsub(/(.)([A-Z])/,'\1 \2')
+        end
+        # The class name changed into a symbol, e.g. StartDates => :start_dates
+        # @return [Symbol]
+        def self.to_sym
+          class_name = name.split("::").last
+          class_name
+            .gsub(/(.)([A-Z])/,'\1_\2')
+            .downcase
+            .to_sym
+        end
+        # Whether the column can contain "chunks" each set off by a format emoji.
+        # For example, the Head column of a compact planned row typically
+        # contains a list of multiple items. (The two others are the Sources
+        # column, for multiple variants of an item; and the regular Head column,
+        # for multiple items.)
+        # @return [Boolean]
+        def self.split_by_format?
+          false
+        end
+        # Whether the column can contain multiple segments, e.g. "Cosmos -- 2013 paperback"
+        # @return [Boolean]
+        def self.split_by_segment?
+          !!segment_separator
+        end
+        # The regular expression used to split segments (e.g. /\s*--\s*/),
+        # or nil if the column should not be split by segment.
+        # @return [Regexp, nil]
+        def self.segment_separator
+          nil
+        end
+        # Whether the column can contain multiple segment groups, e.g.
+        # "2021/1/28..2/1 x4 -- ..2/3 x5 ---- 11/1 -- 11/2"
+        # @return [Boolean]
+        def self.split_by_segment_group?
+          !!segment_group_separator
+        end
+        # The regular expression used to split segment groups (e.g. /\s*----\s*/),
+        # or nil if the column should not be split by segment group.
+        # @return [Regexp, nil]
+        def self.segment_group_separator
+          nil
+        end
+        # Adjustments that are made to captured values at the end of parsing
+        # the column. For example, if ::regexes includes a capture group named
+        # "sources" and it needs to be split by commas:
+        # { sources: -> { _1.split(/\s*,\s*/) } }
+        # @return [Hash{Symbol => Proc}]
+        def self.tweaks
+          {}
+        end
+        # Keys in the parsed output hash that should be converted to an array, even
+        # if only one value was in the input, as in { ... extra_info: ["ed. Jane Doe"] }
+        # @return [Array<Symbol>]
+        def self.flatten_into_arrays
+          []
+        end
+        # The regular expressions used to parse the column (except the part of
+        # the column before the first format emoji, which is in
+        # ::regexes_before_formats below). An array because sometimes it's
+        # simpler to try several smaller regular expressions in series, and
+        # because a regular expression might be applicable only for segments in
+        # a certain position. See parsing/rows/regular_columns/head.rb for an example.
+        # @param segment_index [Integer] the position of the current segment.
+        # @return [Array<Regexp>]
+        def self.regexes(segment_index)
+          []
+        end
+        # The regular expressions used to parse the part of the column before
+        # the first format emoji.
+        # @return [Array<Regexp>]
+        def self.regexes_before_formats
+          []
+        end
+        # Regular expressions that are shared across more than one column,
+        # placed here just to be DRY.
+        SHARED_REGEXES = {
+          progress: %r{
+            (DNF\s+)?(?<progress_percent>\d\d?)%
+            |
+            (DNF\s+)?p?(?<progress_pages>\d+)p?
+            |
+            (DNF\s+)?(?<progress_time>\d+:\d\d)
+            |
+            # just DNF
+            (?<progress_dnf>DNF)
+          }x,
+          series_and_extra_info: [
+            # just series
+            %r{\A
+              in\s(?<series_names>.+)
+              # empty volume so that names and volumes have equal sizes when turned into arrays
+              (?<series_volumes>)
+            \z}x,
+            # series and volume
+            %r{\A
+              (?<series_names>.+?)
+              ,?\s*
+              \#(?<series_volumes>\d+)
+            \z}x,
+            # extra info
+            %r{\A
+              (?<extra_info>.+)
+            \z}x,
+          ],
+        }.freeze
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/comment.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module Reading
+  module Parsing
+    module Rows
+      # A row that is a comment.
+      module Comment
+        using Util::HashArrayDeepFetch
+        # No columns; comments are parsed as if the row were blank.
+        # @return [Array]
+        def self.column_classes
+          []
+        end
+        # Starts with a comment character and does not include any format emojis.
+        # (Commented rows that DO include format emojis are matched as compact
+        # planned rows.)
+        # @param row_string [String]
+        # @param config [Hash]
+        # @return [Boolean]
+        def self.match?(row_string, config)
+          row_string.lstrip.start_with?(config.fetch(:comment_character))
+        end
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/compact_planned.rb ADDED Viewed

@@ -0,0 +1,30 @@
+require_relative "column"
+require_relative "compact_planned_columns/head"
+require_relative "regular_columns/sources"
+module Reading
+  module Parsing
+    module Rows
+      # A row that contains compact planned items.
+      module CompactPlanned
+        using Util::HashArrayDeepFetch
+        # The columns that are possible in this type of row.
+        # @return [Array<Class>]
+        def self.column_classes
+          [CompactPlanned::Head, Regular::Sources]
+        end
+        # Starts with a comment character and includes one or more format emojis.
+        # @param row_string [String]
+        # @param config [Hash]
+        # @return [Boolean]
+        def self.match?(row_string, config)
+          row_string.lstrip.start_with?(config.fetch(:comment_character)) &&
+            row_string.match?(config.deep_fetch(:regex, :formats)) &&
+            row_string.count(config.fetch(:column_separator)) <= column_classes.count - 1
+        end
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/compact_planned_columns/head.rb ADDED Viewed

@@ -0,0 +1,60 @@
+module Reading
+  module Parsing
+    module Rows
+      module CompactPlanned
+        # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#compact-planned-items
+        # and the sections following.
+        class Head < Column
+          def self.split_by_format?
+            true
+          end
+          def self.regexes_before_formats
+            [
+              %r{\A
+                \\ # comment character
+                \s*
+                (
+                  (?<genres>[^a-z]+)?
+                  \s*
+                  (?<sources>@.+)?
+                  \s*:
+                )?
+              \z}x,
+            ]
+          end
+          def self.segment_separator
+            /\s*--\s*/
+          end
+          def self.flatten_into_arrays
+            %i[extra_info series_names series_volumes]
+          end
+          def self.tweaks
+            {
+              genres: -> { _1.downcase.split(/\s*,\s*/) },
+              sources: -> { _1.split(/\s*@/).map(&:presence).compact }
+            }
+          end
+          def self.regexes(segment_index)
+            [
+              # author, title, sources
+              (%r{\A
+                (
+                  (?<author>[^@]+?)
+                  \s+-\s+
+                )?
+                (?<title>[^@]+)
+                (?<sources>@.+)?
+              \z}x if  segment_index.zero?),
+              *Column::SHARED_REGEXES[:series_and_extra_info],
+            ].compact
+          end
+        end
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/regular.rb ADDED Viewed

@@ -0,0 +1,33 @@
+require_relative "column"
+require_relative "regular_columns/rating"
+require_relative "regular_columns/head"
+require_relative "regular_columns/sources"
+require_relative "regular_columns/start_dates"
+require_relative "regular_columns/end_dates"
+require_relative "regular_columns/genres"
+require_relative "regular_columns/length"
+require_relative "regular_columns/notes"
+require_relative "regular_columns/history"
+module Reading
+  module Parsing
+    module Rows
+      # A normal row of (usually) one item.
+      module Regular
+        # The columns that are possible in this type of row.
+        # @return [Array<Class>]
+        def self.column_classes
+          [Rating, Head, Sources, StartDates, EndDates, Genres, Length, Notes, History]
+        end
+        # Does not start with a comment character.
+        # @param row_string [String]
+        # @param config [Hash]
+        # @return [Boolean]
+        def self.match?(row_string, config)
+          !row_string.lstrip.start_with?(config.fetch(:comment_character))
+        end
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/regular_columns/end_dates.rb ADDED Viewed

@@ -0,0 +1,20 @@
+module Reading
+  module Parsing
+    module Rows
+      module Regular
+        # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#start-dates-and-end-dates-columns
+        class EndDates < Column
+          def self.segment_separator
+            /,\s*/
+          end
+          def self.regexes(segment_index)
+            [%r{\A
+              (?<date>\d{4}/\d\d?/\d\d?)
+            \z}x]
+          end
+        end
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/regular_columns/genres.rb ADDED Viewed

@@ -0,0 +1,20 @@
+module Reading
+  module Parsing
+    module Rows
+      module Regular
+        # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#genres-column
+        class Genres < Column
+          def self.segment_separator
+            /,\s*/
+          end
+          def self.regexes(segment_index)
+            [%r{\A
+              (?<genre>.+)
+            \z}x]
+          end
+        end
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/regular_columns/head.rb ADDED Viewed

@@ -0,0 +1,45 @@
+module Reading
+  module Parsing
+    module Rows
+      module Regular
+        # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#head-column-title
+        # and https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#head-column-dnf
+        # and the sections following.
+        class Head < Column
+          def self.split_by_format?
+            true
+          end
+          def self.regexes_before_formats
+            [
+              /\A#{Column::SHARED_REGEXES[:progress]}\z/,
+              /.+/,
+            ]
+          end
+          def self.segment_separator
+            /\s*--\s*/
+          end
+          def self.flatten_into_arrays
+            %i[extra_info series_names series_volumes]
+          end
+          def self.regexes(segment_index)
+            [
+              # author and title
+              (%r{\A
+                (
+                  (?<author>.+?)
+                  \s+-\s+
+                )?
+                (?<title>.+)
+              \z}x if segment_index.zero?),
+              *Column::SHARED_REGEXES[:series_and_extra_info],
+            ].compact
+          end
+        end
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/regular_columns/history.rb ADDED Viewed

@@ -0,0 +1,143 @@
+module Reading
+  module Parsing
+    module Rows
+      module Regular
+        # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#history-column
+        class History < Column
+          def self.segment_separator
+            /\s*--\s*/
+          end
+          def self.segment_group_separator
+            /\s*----\s*/
+          end
+          def self.tweaks
+            {
+              except_dates: ->(dates_list) {
+                dates_list
+                  .split(/\s*,\s*/)
+                  .map { |date|
+                    date.match(
+                      %r{\A
+                        #{START_END_DATES_REGEX}
+                      \z}xo
+                    )
+                    &.named_captures
+                    &.compact
+                    &.transform_keys(&:to_sym)
+                    &.presence
+                  }
+                  .compact
+              },
+            }
+          end
+          def self.regexes(segment_index)
+            [
+              # entry of exception dates ("but not on these dates")
+              %r{\A
+                not
+                \s+
+                (?<except_dates>.+)
+              \z}x,
+              # normal entry
+              %r{\A
+                \(?\s*
+                # variant, group before first start date
+                (
+                  (
+                    v(?<variant>\d)
+                    (\s+|\z)
+                  )?
+                  (
+                    🤝🏼(?<group>.+?)
+                  )?
+                  (?=(\d{4}/)?\d\d?/\d\d?)
+                )?
+                # planned or dates
+                (
+                  (
+                    (?<planned>\?\?)
+                    |
+                    (#{START_END_DATES_REGEX})
+                  )
+                  (\s*\)?\s*\z|\s+)
+                )?
+                # progress
+                (
+                  # requires the at symbol, unlike the shared progress regex in Column
+                  # and also adds the done option
+                  (
+                    (DNF\s+)?@?(?<progress_percent>\d\d?)%
+                    |
+                    (DNF\s+)?@p?(?<progress_pages>\d+)p?
+                    |
+                    (DNF\s+)?@(?<progress_time>\d+:\d\d)
+                    |
+                    # just DNF
+                    (?<progress_dnf>DNF)
+                    |
+                    # done
+                    (?<progress_done>done)
+                  )
+                  (\s*\)?\s*\z|\s+)
+                )?
+                # amount, repetitions, frequency
+                (
+                  (
+                    p?(?<amount_pages>\d+)p?
+                    |
+                    (?<amount_time>\d+:\d\d)
+                  )?
+                  (
+                    \s*
+                    x(?<repetitions>\d+)
+                  )?
+                  (
+                    /(?<frequency>day|week|month)
+                  )?
+                  (\s*\)?\s*\z|\s+)
+                )?
+                # favorite, name
+                (
+                  (?<favorite>⭐)?
+                  \s*
+                  (?<name>[^\d].*)
+                )?
+              \z}xo,
+            ]
+          end
+          private
+          START_END_DATES_REGEX =
+            %r{
+              (
+                (?<start_year>\d{4})
+                /
+              )?
+              (
+                (?<start_month>\d\d?)
+                /
+              )?
+              (?<start_day>\d\d?)?
+              (?<range>\.\.)?
+              (
+                (?<=\.\.)
+                (
+                  (?<end_year>\d{4})
+                  /
+                )?
+                (
+                  (?<end_month>\d\d?)
+                  /
+                )?
+                (?<end_day>\d\d?)?
+              )?
+            }x
+        end
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/regular_columns/length.rb ADDED Viewed

@@ -0,0 +1,35 @@
+module Reading
+  module Parsing
+    module Rows
+      module Regular
+        # See  https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#length-column
+        class Length < Column
+          def self.regexes(segment_index)
+            [%r{\A
+              # length
+              (
+                (
+                  (?<length_pages>\d+)p?
+                  |
+                  (?<length_time>\d+:\d\d)
+                )
+                (\s+|\z)
+              )
+              # each or repetitions, used in conjunction with the History column
+              (
+                # each
+                (?<each>each)
+                |
+                # repetitions
+                (
+                  x
+                  (?<repetitions>\d+)
+                )
+              )?
+            \z}x]
+          end
+        end
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/regular_columns/notes.rb ADDED Viewed

@@ -0,0 +1,32 @@
+module Reading
+  module Parsing
+    module Rows
+      module Regular
+        # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#notes-column
+        # and https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#notes-column-special-notes
+        class Notes < Column
+          def self.segment_separator
+            /\s*--\s*/
+          end
+          def self.regexes(segment_index)
+            [
+              # blurb note
+              %r{\A
+                💬\s*(?<note_blurb>.+)
+              \z}x,
+              # private note
+              %r{\A
+                🔒\s*(?<note_private>.+)
+              \z}x,
+              # regular note
+              %r{\A
+                (?<note_regular>.+)
+              \z}x,
+            ]
+          end
+        end
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/regular_columns/rating.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Reading
+  module Parsing
+    module Rows
+      module Regular
+        # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#rating-column
+        class Rating < Column
+          def self.regexes(segment_index)
+            # integer or float
+            [/\A(?<number>\d+\.?\d*)?\z/]
+          end
+        end
+      end
+    end
+  end
+end

data/lib/reading/parsing/rows/regular_columns/sources.rb ADDED Viewed

@@ -0,0 +1,94 @@
+module Reading
+  module Parsing
+    module Rows
+      module Regular
+        # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#sources-column
+        # and https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#sources-column-variants
+        class Sources < Column
+          SOURCES_PARSING_ERRORS = {
+            "Missing comma before URL(s) in the Sources column" =>
+              ->(source) {
+                source.match?(/\shttps?:\/\//) || source.scan(/https?:\/\//).count > 1
+              },
+            "The ISBN/ASIN must be placed after sources in the Sources column" =>
+              ->(source) {
+                source.match?(/\A#{ISBN_REGEX}/o) || source.match(/\A#{ASIN_REGEX}/o)
+              },
+          }
+          def self.split_by_format?
+            true
+          end
+          def self.segment_separator
+            /\s*--\s*/
+          end
+          def self.flatten_into_arrays
+            %i[extra_info series_names series_volumes]
+          end
+          def self.tweaks
+            {
+              sources: -> {
+                sources = _1.split(/\s*,\s*/)
+                SOURCES_PARSING_ERRORS.each do |message, check|
+                  if sources.any? { |source| check.call(source) }
+                    raise ParsingError, message
+                  end
+                end
+                sources
+              },
+            }
+          end
+          def self.regexes(segment_index)
+            [
+              # ISBN/ASIN and length (without sources)
+              (%r{\A
+                (
+                  (?<isbn>(\d{3}[-\s]?)?[A-Z\d]{10})
+                  ,?(\s+|\z)
+                )?
+                (
+                  (?<length_pages>\d+)p?
+                  |
+                  (?<length_time>\d+:\d\d)
+                )?
+              \z}x if  segment_index.zero?),
+              # sources, ISBN/ASIN, length
+              (%r{\A
+                (
+                  (?<sources>.+?)
+                  ,?(\s+|\z)
+                )?
+                (
+                  (
+                    (?<isbn>#{ISBN_REGEX})
+                    |
+                    (?<asin>#{ASIN_REGEX})
+                  )
+                  ,?(\s+|\z)
+                )?
+                (
+                  (?<length_pages>\d+)p?
+                  |
+                  (?<length_time>\d+:\d\d)
+                )?
+              \z}xo if  segment_index.zero?),
+              *Column::SHARED_REGEXES[:series_and_extra_info],
+            ].compact
+          end
+          private
+          ISBN_REGEX = /(\d{3}[-\s]?)?\d{10}/
+          ASIN_REGEX = /B0[A-Z\d]{8}/
+        end
+      end
+    end
+  end
+end