reading 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/bin/reading +8 -8
  3. data/bin/readingfile +31 -0
  4. data/lib/reading/config.rb +115 -148
  5. data/lib/reading/errors.rb +11 -64
  6. data/lib/reading/item/time_length.rb +138 -0
  7. data/lib/reading/parsing/attributes/attribute.rb +26 -0
  8. data/lib/reading/parsing/attributes/author.rb +15 -0
  9. data/lib/reading/parsing/attributes/experiences/dates_and_head_transformer.rb +106 -0
  10. data/lib/reading/parsing/attributes/experiences/history_transformer.rb +452 -0
  11. data/lib/reading/parsing/attributes/experiences/spans_validator.rb +149 -0
  12. data/lib/reading/parsing/attributes/experiences.rb +27 -0
  13. data/lib/reading/parsing/attributes/genres.rb +16 -0
  14. data/lib/reading/parsing/attributes/notes.rb +22 -0
  15. data/lib/reading/parsing/attributes/rating.rb +17 -0
  16. data/lib/reading/parsing/attributes/shared.rb +62 -0
  17. data/lib/reading/parsing/attributes/title.rb +21 -0
  18. data/lib/reading/parsing/attributes/variants.rb +77 -0
  19. data/lib/reading/parsing/csv.rb +101 -0
  20. data/lib/reading/parsing/parser.rb +292 -0
  21. data/lib/reading/parsing/rows/column.rb +131 -0
  22. data/lib/reading/parsing/rows/comment.rb +26 -0
  23. data/lib/reading/parsing/rows/compact_planned.rb +30 -0
  24. data/lib/reading/parsing/rows/compact_planned_columns/head.rb +60 -0
  25. data/lib/reading/parsing/rows/regular.rb +33 -0
  26. data/lib/reading/parsing/rows/regular_columns/end_dates.rb +20 -0
  27. data/lib/reading/parsing/rows/regular_columns/genres.rb +20 -0
  28. data/lib/reading/parsing/rows/regular_columns/head.rb +45 -0
  29. data/lib/reading/parsing/rows/regular_columns/history.rb +143 -0
  30. data/lib/reading/parsing/rows/regular_columns/length.rb +35 -0
  31. data/lib/reading/parsing/rows/regular_columns/notes.rb +32 -0
  32. data/lib/reading/parsing/rows/regular_columns/rating.rb +15 -0
  33. data/lib/reading/parsing/rows/regular_columns/sources.rb +94 -0
  34. data/lib/reading/parsing/rows/regular_columns/start_dates.rb +35 -0
  35. data/lib/reading/parsing/transformer.rb +70 -0
  36. data/lib/reading/util/hash_compact_by_template.rb +1 -0
  37. data/lib/reading/util/hash_deep_merge.rb +1 -1
  38. data/lib/reading/util/hash_to_struct.rb +1 -0
  39. data/lib/reading/util/numeric_to_i_if_whole.rb +12 -0
  40. data/lib/reading/util/string_truncate.rb +13 -4
  41. data/lib/reading/version.rb +1 -1
  42. data/lib/reading.rb +18 -0
  43. metadata +58 -41
  44. data/lib/reading/attribute/all_attributes.rb +0 -83
  45. data/lib/reading/attribute/attribute.rb +0 -25
  46. data/lib/reading/attribute/experiences/dates_validator.rb +0 -94
  47. data/lib/reading/attribute/experiences/experiences_attribute.rb +0 -74
  48. data/lib/reading/attribute/experiences/progress_subattribute.rb +0 -48
  49. data/lib/reading/attribute/experiences/spans_subattribute.rb +0 -82
  50. data/lib/reading/attribute/variants/extra_info_subattribute.rb +0 -44
  51. data/lib/reading/attribute/variants/length_subattribute.rb +0 -45
  52. data/lib/reading/attribute/variants/series_subattribute.rb +0 -57
  53. data/lib/reading/attribute/variants/sources_subattribute.rb +0 -78
  54. data/lib/reading/attribute/variants/variants_attribute.rb +0 -69
  55. data/lib/reading/csv.rb +0 -67
  56. data/lib/reading/line.rb +0 -23
  57. data/lib/reading/row/blank_row.rb +0 -23
  58. data/lib/reading/row/compact_planned_row.rb +0 -130
  59. data/lib/reading/row/regular_row.rb +0 -94
  60. data/lib/reading/row/row.rb +0 -88
@@ -0,0 +1,143 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#history-column
6
+ class History < Column
7
+ def self.segment_separator
8
+ /\s*--\s*/
9
+ end
10
+
11
+ def self.segment_group_separator
12
+ /\s*----\s*/
13
+ end
14
+
15
+ def self.tweaks
16
+ {
17
+ except_dates: ->(dates_list) {
18
+ dates_list
19
+ .split(/\s*,\s*/)
20
+ .map { |date|
21
+ date.match(
22
+ %r{\A
23
+ #{START_END_DATES_REGEX}
24
+ \z}xo
25
+ )
26
+ &.named_captures
27
+ &.compact
28
+ &.transform_keys(&:to_sym)
29
+ &.presence
30
+ }
31
+ .compact
32
+ },
33
+ }
34
+ end
35
+
36
+ def self.regexes(segment_index)
37
+ [
38
+ # entry of exception dates ("but not on these dates")
39
+ %r{\A
40
+ not
41
+ \s+
42
+ (?<except_dates>.+)
43
+ \z}x,
44
+ # normal entry
45
+ %r{\A
46
+ \(?\s*
47
+ # variant, group before first start date
48
+ (
49
+ (
50
+ v(?<variant>\d)
51
+ (\s+|\z)
52
+ )?
53
+ (
54
+ 🤝🏼(?<group>.+?)
55
+ )?
56
+ (?=(\d{4}/)?\d\d?/\d\d?)
57
+ )?
58
+ # planned or dates
59
+ (
60
+ (
61
+ (?<planned>\?\?)
62
+ |
63
+ (#{START_END_DATES_REGEX})
64
+ )
65
+ (\s*\)?\s*\z|\s+)
66
+ )?
67
+ # progress
68
+ (
69
+ # requires the at symbol, unlike the shared progress regex in Column
70
+ # and also adds the done option
71
+ (
72
+ (DNF\s+)?@?(?<progress_percent>\d\d?)%
73
+ |
74
+ (DNF\s+)?@p?(?<progress_pages>\d+)p?
75
+ |
76
+ (DNF\s+)?@(?<progress_time>\d+:\d\d)
77
+ |
78
+ # just DNF
79
+ (?<progress_dnf>DNF)
80
+ |
81
+ # done
82
+ (?<progress_done>done)
83
+ )
84
+ (\s*\)?\s*\z|\s+)
85
+ )?
86
+ # amount, repetitions, frequency
87
+ (
88
+ (
89
+ p?(?<amount_pages>\d+)p?
90
+ |
91
+ (?<amount_time>\d+:\d\d)
92
+ )?
93
+ (
94
+ \s*
95
+ x(?<repetitions>\d+)
96
+ )?
97
+ (
98
+ /(?<frequency>day|week|month)
99
+ )?
100
+ (\s*\)?\s*\z|\s+)
101
+ )?
102
+ # favorite, name
103
+ (
104
+ (?<favorite>⭐)?
105
+ \s*
106
+ (?<name>[^\d].*)
107
+ )?
108
+ \z}xo,
109
+ ]
110
+ end
111
+
112
+ private
113
+
114
+ START_END_DATES_REGEX =
115
+ %r{
116
+ (
117
+ (?<start_year>\d{4})
118
+ /
119
+ )?
120
+ (
121
+ (?<start_month>\d\d?)
122
+ /
123
+ )?
124
+ (?<start_day>\d\d?)?
125
+ (?<range>\.\.)?
126
+ (
127
+ (?<=\.\.)
128
+ (
129
+ (?<end_year>\d{4})
130
+ /
131
+ )?
132
+ (
133
+ (?<end_month>\d\d?)
134
+ /
135
+ )?
136
+ (?<end_day>\d\d?)?
137
+ )?
138
+ }x
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,35 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#length-column
6
+ class Length < Column
7
+ def self.regexes(segment_index)
8
+ [%r{\A
9
+ # length
10
+ (
11
+ (
12
+ (?<length_pages>\d+)p?
13
+ |
14
+ (?<length_time>\d+:\d\d)
15
+ )
16
+ (\s+|\z)
17
+ )
18
+ # each or repetitions, used in conjunction with the History column
19
+ (
20
+ # each
21
+ (?<each>each)
22
+ |
23
+ # repetitions
24
+ (
25
+ x
26
+ (?<repetitions>\d+)
27
+ )
28
+ )?
29
+ \z}x]
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,32 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#notes-column
6
+ # and https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#notes-column-special-notes
7
+ class Notes < Column
8
+ def self.segment_separator
9
+ /\s*--\s*/
10
+ end
11
+
12
+ def self.regexes(segment_index)
13
+ [
14
+ # blurb note
15
+ %r{\A
16
+ 💬\s*(?<note_blurb>.+)
17
+ \z}x,
18
+ # private note
19
+ %r{\A
20
+ 🔒\s*(?<note_private>.+)
21
+ \z}x,
22
+ # regular note
23
+ %r{\A
24
+ (?<note_regular>.+)
25
+ \z}x,
26
+ ]
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,15 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#rating-column
6
+ class Rating < Column
7
+ def self.regexes(segment_index)
8
+ # integer or float
9
+ [/\A(?<number>\d+\.?\d*)?\z/]
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,94 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#sources-column
6
+ # and https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#sources-column-variants
7
+ class Sources < Column
8
+ SOURCES_PARSING_ERRORS = {
9
+ "Missing comma before URL(s) in the Sources column" =>
10
+ ->(source) {
11
+ source.match?(/\shttps?:\/\//) || source.scan(/https?:\/\//).count > 1
12
+ },
13
+ "The ISBN/ASIN must be placed after sources in the Sources column" =>
14
+ ->(source) {
15
+ source.match?(/\A#{ISBN_REGEX}/o) || source.match(/\A#{ASIN_REGEX}/o)
16
+ },
17
+ }
18
+
19
+
20
+ def self.split_by_format?
21
+ true
22
+ end
23
+
24
+ def self.segment_separator
25
+ /\s*--\s*/
26
+ end
27
+
28
+ def self.flatten_into_arrays
29
+ %i[extra_info series_names series_volumes]
30
+ end
31
+
32
+ def self.tweaks
33
+ {
34
+ sources: -> {
35
+ sources = _1.split(/\s*,\s*/)
36
+
37
+ SOURCES_PARSING_ERRORS.each do |message, check|
38
+ if sources.any? { |source| check.call(source) }
39
+ raise ParsingError, message
40
+ end
41
+ end
42
+
43
+ sources
44
+ },
45
+ }
46
+ end
47
+
48
+ def self.regexes(segment_index)
49
+ [
50
+ # ISBN/ASIN and length (without sources)
51
+ (%r{\A
52
+ (
53
+ (?<isbn>(\d{3}[-\s]?)?[A-Z\d]{10})
54
+ ,?(\s+|\z)
55
+ )?
56
+ (
57
+ (?<length_pages>\d+)p?
58
+ |
59
+ (?<length_time>\d+:\d\d)
60
+ )?
61
+ \z}x if segment_index.zero?),
62
+ # sources, ISBN/ASIN, length
63
+ (%r{\A
64
+ (
65
+ (?<sources>.+?)
66
+ ,?(\s+|\z)
67
+ )?
68
+ (
69
+ (
70
+ (?<isbn>#{ISBN_REGEX})
71
+ |
72
+ (?<asin>#{ASIN_REGEX})
73
+ )
74
+ ,?(\s+|\z)
75
+ )?
76
+ (
77
+ (?<length_pages>\d+)p?
78
+ |
79
+ (?<length_time>\d+:\d\d)
80
+ )?
81
+ \z}xo if segment_index.zero?),
82
+ *Column::SHARED_REGEXES[:series_and_extra_info],
83
+ ].compact
84
+ end
85
+
86
+ private
87
+
88
+ ISBN_REGEX = /(\d{3}[-\s]?)?\d{10}/
89
+ ASIN_REGEX = /B0[A-Z\d]{8}/
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,35 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#start-dates-and-end-dates-columns
6
+ class StartDates < Column
7
+ def self.segment_separator
8
+ /,\s*/
9
+ end
10
+
11
+ def self.regexes(segment_index)
12
+ # dnf/progress, date, variant number, group
13
+ [%r{\A
14
+ (
15
+ #{Column::SHARED_REGEXES[:progress]}
16
+ (\s+|\z)
17
+ )?
18
+ (
19
+ (?<date>\d{4}/\d\d?/\d\d?)
20
+ (\s+|\z)
21
+ )?
22
+ (
23
+ v(?<variant>\d)
24
+ (\s+|\z)
25
+ )?
26
+ (
27
+ 🤝🏼(?<group>.+)
28
+ )?
29
+ \z}x]
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,70 @@
1
+ require_relative "attributes/shared"
2
+ require_relative "attributes/attribute"
3
+ require_relative "attributes/rating"
4
+ require_relative "attributes/author"
5
+ require_relative "attributes/title"
6
+ require_relative "attributes/genres"
7
+ require_relative "attributes/variants"
8
+ require_relative "attributes/experiences"
9
+ require_relative "attributes/notes"
10
+
11
+ module Reading
12
+ module Parsing
13
+ #
14
+ # Transforms an intermediate hash (parsed from a CSV row) into item data.
15
+ # While the intermediate hash mirrors the structure of a row, the output of
16
+ # Transformer is based around item attributes, which are listed in
17
+ # Config#default_config[:item_template] and in the files in parsing/attributes.
18
+ #
19
+ class Transformer
20
+ using Util::HashArrayDeepFetch
21
+ using Util::HashCompactByTemplate
22
+
23
+ attr_reader :config
24
+ private attr_reader :attributes
25
+
26
+ # @param config [Hash] an entire config.
27
+ def initialize(config)
28
+ @config = config
29
+
30
+ set_attributes
31
+ end
32
+
33
+ # Transforms the intermediate hash of a row into item data.
34
+ # @param parsed_row [Hash{Symbol => Hash, Array}] output from
35
+ # Parsing::Parser#parse_row_to_intermediate_hash.
36
+ # @return [Array<Hash>] an array of Hashes like the template in
37
+ # Config#default_config[:item_template].
38
+ def transform_intermediate_hash_to_item_hashes(parsed_row)
39
+ if parsed_row[:head].blank?
40
+ raise InvalidHeadError, "Blank or missing Head column"
41
+ end
42
+
43
+ template = config.fetch(:item_template)
44
+
45
+ parsed_row[:head].map.with_index { |_head, head_index|
46
+ template.map { |attribute_name, default_value|
47
+ attribute = attributes.fetch(attribute_name)
48
+ transformed_value = attribute.transform_from_parsed(parsed_row, head_index)
49
+
50
+ [attribute_name, transformed_value || default_value]
51
+ }.to_h
52
+ .compact_by(template:)
53
+ }
54
+ end
55
+
56
+ private
57
+
58
+ # Sets the attributes classes which do all the transforming work.
59
+ # See parsing/attributes/*.
60
+ def set_attributes
61
+ @attributes ||= config.fetch(:item_template).map { |attribute_name, _default|
62
+ attribute_name_camelcase = attribute_name.to_s.split("_").map(&:capitalize).join
63
+ attribute_class = Attributes.const_get(attribute_name_camelcase)
64
+
65
+ [attribute_name, attribute_class.new(config)]
66
+ }.to_h
67
+ end
68
+ end
69
+ end
70
+ end
@@ -9,6 +9,7 @@ module Reading
9
9
  # If no parsed data has been added to the template values for these, they
10
10
  # are considered blank, and are replaced with an empty array so that their
11
11
  # emptiness is more apparent, e.g. item[:experiences].empty? will return true.
12
+ # @return [Hash]
12
13
  def compact_by(template:)
13
14
  map { |key, val|
14
15
  if is_array_of_hashes?(val)
@@ -32,7 +32,7 @@ module Reading
32
32
  end
33
33
  }
34
34
  elsif block_given?
35
- block.call(key, this_val, other_val)
35
+ yield key, this_val, other_val
36
36
  else
37
37
  other_val
38
38
  end
@@ -3,6 +3,7 @@ module Reading
3
3
  # Converts a Hash to a Struct. Converts inner hashes (and inner arrays of hashes) as well.
4
4
  module HashToStruct
5
5
  refine Hash do
6
+ # @return [Struct]
6
7
  def to_struct
7
8
  MEMOIZED_STRUCTS[keys] ||= Struct.new(*keys)
8
9
  struct_class = MEMOIZED_STRUCTS[keys]
@@ -0,0 +1,12 @@
1
+ module Reading
2
+ module Util
3
+ # Same as #to_i but only applies if the result is equal to the original number.
4
+ module NumericToIIfWhole
5
+ refine Numeric do
6
+ def to_i_if_whole
7
+ to_i == self ? to_i : self
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
@@ -1,13 +1,22 @@
1
1
  module Reading
2
2
  module Util
3
+ # Shortens the String to a given length.
3
4
  module StringTruncate
4
5
  refine String do
5
- def truncate(max, padding: 0, min: 30)
6
- end_index = max - padding
7
- end_index = min if end_index < min
8
- self.length + padding > max ? "#{self[0...end_index]}..." : self
6
+ # @param length [Integer]
7
+ # @return [String]
8
+ def truncate(length)
9
+ if length < self.length - ELLIPSIS.length
10
+ "#{self[0...length]}#{ELLIPSIS}"
11
+ else
12
+ self
13
+ end
9
14
  end
10
15
  end
16
+
17
+ private
18
+
19
+ ELLIPSIS = "...".freeze
11
20
  end
12
21
  end
13
22
  end
@@ -1,3 +1,3 @@
1
1
  module Reading
2
- VERSION = "0.6.0"
2
+ VERSION = "0.7.0"
3
3
  end
data/lib/reading.rb ADDED
@@ -0,0 +1,18 @@
1
+ require_relative "reading/parsing/csv"
2
+ require_relative "reading/item/time_length.rb"
3
+
4
+ # The gem's public API. See https://github.com/fpsvogel/reading#usage
5
+
6
+ module Reading
7
+ # Parses a CSV file or string. See Parsing::CSV#initialize and #parse for details.
8
+ def self.parse(...)
9
+ csv = Parsing::CSV.new(...)
10
+ csv.parse
11
+ end
12
+
13
+ # @param string [String] a time duration in "h:mm" format.
14
+ # @return [Reading::Item::TimeLength]
15
+ def self.time(string)
16
+ Reading::Item::TimeLength.parse(string)
17
+ end
18
+ end