reading 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/bin/reading +8 -8
  3. data/bin/readingfile +31 -0
  4. data/lib/reading/config.rb +115 -148
  5. data/lib/reading/errors.rb +11 -64
  6. data/lib/reading/item/time_length.rb +138 -0
  7. data/lib/reading/parsing/attributes/attribute.rb +26 -0
  8. data/lib/reading/parsing/attributes/author.rb +15 -0
  9. data/lib/reading/parsing/attributes/experiences/dates_and_head_transformer.rb +106 -0
  10. data/lib/reading/parsing/attributes/experiences/history_transformer.rb +452 -0
  11. data/lib/reading/parsing/attributes/experiences/spans_validator.rb +149 -0
  12. data/lib/reading/parsing/attributes/experiences.rb +27 -0
  13. data/lib/reading/parsing/attributes/genres.rb +16 -0
  14. data/lib/reading/parsing/attributes/notes.rb +22 -0
  15. data/lib/reading/parsing/attributes/rating.rb +17 -0
  16. data/lib/reading/parsing/attributes/shared.rb +62 -0
  17. data/lib/reading/parsing/attributes/title.rb +21 -0
  18. data/lib/reading/parsing/attributes/variants.rb +77 -0
  19. data/lib/reading/parsing/csv.rb +101 -0
  20. data/lib/reading/parsing/parser.rb +292 -0
  21. data/lib/reading/parsing/rows/column.rb +131 -0
  22. data/lib/reading/parsing/rows/comment.rb +26 -0
  23. data/lib/reading/parsing/rows/compact_planned.rb +30 -0
  24. data/lib/reading/parsing/rows/compact_planned_columns/head.rb +60 -0
  25. data/lib/reading/parsing/rows/regular.rb +33 -0
  26. data/lib/reading/parsing/rows/regular_columns/end_dates.rb +20 -0
  27. data/lib/reading/parsing/rows/regular_columns/genres.rb +20 -0
  28. data/lib/reading/parsing/rows/regular_columns/head.rb +45 -0
  29. data/lib/reading/parsing/rows/regular_columns/history.rb +143 -0
  30. data/lib/reading/parsing/rows/regular_columns/length.rb +35 -0
  31. data/lib/reading/parsing/rows/regular_columns/notes.rb +32 -0
  32. data/lib/reading/parsing/rows/regular_columns/rating.rb +15 -0
  33. data/lib/reading/parsing/rows/regular_columns/sources.rb +94 -0
  34. data/lib/reading/parsing/rows/regular_columns/start_dates.rb +35 -0
  35. data/lib/reading/parsing/transformer.rb +70 -0
  36. data/lib/reading/util/hash_compact_by_template.rb +1 -0
  37. data/lib/reading/util/hash_deep_merge.rb +1 -1
  38. data/lib/reading/util/hash_to_struct.rb +1 -0
  39. data/lib/reading/util/numeric_to_i_if_whole.rb +12 -0
  40. data/lib/reading/util/string_truncate.rb +13 -4
  41. data/lib/reading/version.rb +1 -1
  42. data/lib/reading.rb +18 -0
  43. metadata +58 -41
  44. data/lib/reading/attribute/all_attributes.rb +0 -83
  45. data/lib/reading/attribute/attribute.rb +0 -25
  46. data/lib/reading/attribute/experiences/dates_validator.rb +0 -94
  47. data/lib/reading/attribute/experiences/experiences_attribute.rb +0 -74
  48. data/lib/reading/attribute/experiences/progress_subattribute.rb +0 -48
  49. data/lib/reading/attribute/experiences/spans_subattribute.rb +0 -82
  50. data/lib/reading/attribute/variants/extra_info_subattribute.rb +0 -44
  51. data/lib/reading/attribute/variants/length_subattribute.rb +0 -45
  52. data/lib/reading/attribute/variants/series_subattribute.rb +0 -57
  53. data/lib/reading/attribute/variants/sources_subattribute.rb +0 -78
  54. data/lib/reading/attribute/variants/variants_attribute.rb +0 -69
  55. data/lib/reading/csv.rb +0 -67
  56. data/lib/reading/line.rb +0 -23
  57. data/lib/reading/row/blank_row.rb +0 -23
  58. data/lib/reading/row/compact_planned_row.rb +0 -130
  59. data/lib/reading/row/regular_row.rb +0 -94
  60. data/lib/reading/row/row.rb +0 -88
@@ -0,0 +1,292 @@
1
+ require_relative "rows/regular"
2
+ require_relative "rows/compact_planned"
3
+ require_relative "rows/comment"
4
+
5
+ module Reading
6
+ module Parsing
7
+ #
8
+ # Parses a string containing a row of a CSV reading log, into a hash
9
+ # mirroring the structure of the row. This hash is an intermediate form and
10
+ # not the final item data. It's the raw material for Parsing::Transformer to
11
+ # generate the final item data.
12
+ #
13
+ # Below is an example intermediate hash parsed from this row, which has a Rating
14
+ # column, then a Head column containing an author, title, series, and extra info:
15
+ #
16
+ # 3|📕Thomas More - Utopia -- trans. Robert Adams -- ed. George Logan -- in Cambridge History of Political Thought
17
+ #
18
+ # {
19
+ # rating: { number: "1" },
20
+ # head: [{
21
+ # author: "Thomas More",
22
+ # title: "Utopia",
23
+ # series_names: ["Cambridge History of Political Thought"],
24
+ # series_volumes: [nil],
25
+ # extra_info: ["trans. Robert Adams", "ed. George Logan"],
26
+ # format: :print,
27
+ # }]
28
+ # }
29
+ #
30
+ # The hash's top-level keys are column names. The nested keys come from
31
+ # regex capture group names in each column (for this example, see ::regexes
32
+ # in rating.rb and head.rb in parsing/rows/regular_columns).
33
+ #
34
+ # All the rest is just details of how the parts of a column are joined:
35
+ #
36
+ # - The :head value is an array because Head.split_by_format? is
37
+ # true (because a Head column can potentially contain multiple items).
38
+ # That's also where { format: :print } comes from.
39
+ #
40
+ # - The :series_names and :series_volumes values are arrays because these
41
+ # keys are in Head.flatten_into_arrays, which causes the column's segments
42
+ # (separated by " -- ") to be merged into one hash.
43
+ #
44
+ class Parser
45
+ using Util::HashArrayDeepFetch
46
+ using Util::StringRemove
47
+
48
+ attr_reader :config
49
+
50
+ # @param config [Hash] an entire config.
51
+ def initialize(config)
52
+ @config = config
53
+ end
54
+
55
+ # Parses a row string into a hash that mirrors the structure of the row.
56
+ # @param string [String] a string containing a row of a CSV reading log.
57
+ # @return [Hash]
58
+ def parse_row_to_intermediate_hash(string)
59
+ columns = extract_columns(string)
60
+
61
+ if config.fetch(:skip_compact_planned) && columns.has_key?(Rows::CompactPlanned::Head)
62
+ return {}
63
+ end
64
+
65
+ columns.map { |column, column_string|
66
+ parse_column(column, column_string)
67
+ }.to_h
68
+ end
69
+
70
+ private
71
+
72
+ # Splits the row string by column and pairs them in a hash with column
73
+ # classes, which contain the information necessary to parse each column.
74
+ # @param string [String] a string containing a row of a CSV reading log.
75
+ # @return [Hash{Class => String}] a hash whose keys are classes inheriting
76
+ # Parsing::Rows::Column.
77
+ def extract_columns(string)
78
+ clean_string = string.dup.force_encoding(Encoding::UTF_8)
79
+ column_strings = clean_string.split(config.fetch(:column_separator))
80
+
81
+ row_types = [Rows::Regular, Rows::CompactPlanned, Rows::Comment]
82
+ column_classes = row_types
83
+ .find { |row_type| row_type.match?(string, config) }
84
+ .column_classes
85
+ .filter { |column_class|
86
+ config.fetch(:enabled_columns).include?(column_class.to_sym)
87
+ }
88
+
89
+ if !column_classes.count.zero? && column_strings.count > column_classes.count
90
+ raise TooManyColumnsError, "Too many columns"
91
+ end
92
+
93
+ column_classes
94
+ .zip(column_strings)
95
+ .reject { |_class, string| string.nil? }
96
+ .to_h
97
+ end
98
+
99
+ # Parses a column into an array of two elements (a key for the column name
100
+ # and a value of its contents).
101
+ # @param column_class [Class] a class inheriting Parsing::Rows::Column.
102
+ # @param column_string [String] a string containing a column from a row.
103
+ # @return [Array(Symbol, Hash), Array(Symbol, Array)]
104
+ def parse_column(column_class, column_string)
105
+ # Multiple format emojis are possible in some columns:
106
+ # - Head column, for multiple items.
107
+ # - Sources column, for multiple variants of an item.
108
+ # - Compact planned head column, for multiple items.
109
+ # This is the default case below the two guard clauses. It's more complex
110
+ # because there's possibly a string before the first format, and there's
111
+ # an extra level of nesting in the returned array.
112
+
113
+ # Simplest case: if the column is never split by format, return the
114
+ # column name and the parsed segment(s), which is either a Hash (if the
115
+ # column can't have multiple segments or if its segments are flattened)
116
+ # or an Array (if there are multiple segments and they're not flattened).
117
+ if !column_class.split_by_format?
118
+ parsed_column = parse_segments(column_class, column_string)
119
+ return [column_class.to_sym, parsed_column]
120
+ end
121
+
122
+ # Also simple: if the column *can* be split by format but in this row
123
+ # it doesn't contain any format emojis, return the same as above but
124
+ # with an extra level of nesting (except when the parsed result is nil).
125
+ if column_class.split_by_format? &&
126
+ !column_string.match?(config.deep_fetch(:regex, :formats))
127
+
128
+ parsed_column = parse_segments(column_class, column_string)
129
+ # Wrap a non-empty value in an array so that e.g. a head without
130
+ # emojis is still an array. This way the extra level of nesting can
131
+ # be consistently expected for columns that *can* be split by format.
132
+ parsed_column_nonempty_nested = [parsed_column.presence].compact
133
+ return [column_class.to_sym, parsed_column_nonempty_nested]
134
+ end
135
+
136
+ # The rest is the complex case: if the column *can and is* split by format.
137
+
138
+ # Each format plus the string after it.
139
+ format_strings = column_string.split(config.deep_fetch(:regex, :formats_split))
140
+
141
+ # If there's a string before the first format, e.g. "DNF" in Head column.
142
+ unless format_strings.first.match?(config.deep_fetch(:regex, :formats))
143
+ before_formats = parse_segment(column_class, format_strings.shift, before_formats: true)
144
+ end
145
+
146
+ # Parse each format-plus-string into an array of segments.
147
+ heads = format_strings.map { |string|
148
+ format_emoji = string[config.deep_fetch(:regex, :formats)]
149
+ string.remove!(format_emoji)
150
+ format = config.fetch(:formats).key(format_emoji)
151
+
152
+ parse_segments(column_class, string)
153
+ .merge(format: format)
154
+ }
155
+
156
+ # Combine values of conflicting keys so that in a compact planned
157
+ # Head column, sources from before_formats are not ignored.
158
+ if before_formats
159
+ heads.each do |head|
160
+ head.merge!(before_formats) do |k, old_v, new_v|
161
+ (new_v + old_v).uniq
162
+ end
163
+ end
164
+ end
165
+
166
+ [column_class.to_sym, heads]
167
+ end
168
+
169
+ # Parses a string of segments, e.g. "Utopia -- trans. Robert Adams -- ed. George Logan"
170
+ # @param column_class [Class] a class inheriting Parsing::Rows::Column.
171
+ # @param string [String] a string containing segments, which is either an
172
+ # entire column or (for columns that are split by format emoji) a string
173
+ # following a format emoji.
174
+ # @return [Array<Hash>, Hash] either an array of parsed segments (hashes),
175
+ # or a single hash if the column can't be split by segment or if the
176
+ # segments are flattened into one hash.
177
+ def parse_segments(column_class, string)
178
+ return {} if string.blank?
179
+
180
+ # If the column can't be split by segment, parse as a single segment.
181
+ if !column_class.split_by_segment?
182
+ return parse_segment(column_class, string)
183
+ end
184
+
185
+ # Add an extra level of nesting if the column can have segment groups,
186
+ # as in "2021/1/28..2/1 x4 -- ..2/3 x5 ---- 11/1 -- 11/2"
187
+ if column_class.split_by_segment_group?
188
+ segments = string
189
+ .split(column_class.segment_group_separator)
190
+ .map { |segment_group|
191
+ segment_group
192
+ .split(column_class.segment_separator)
193
+ .map.with_index { |segment, i|
194
+ parse_segment(column_class, segment, i)
195
+ }
196
+ }
197
+ else
198
+ segments = string
199
+ .split(column_class.segment_separator)
200
+ .map.with_index { |segment, i|
201
+ parse_segment(column_class, segment, i)
202
+ }
203
+ end
204
+
205
+ if column_class.flatten_into_arrays.any?
206
+ segments = segments.reduce { |merged, segment|
207
+ merged.merge!(segment) { |_k, old_v, new_v|
208
+ # old_v is already an array by this point, since its key should be
209
+ # in Column.flatten_into_arrays
210
+ old_v + new_v
211
+ }
212
+ }
213
+ end
214
+
215
+ segments
216
+ end
217
+
218
+ # Parses a segment using a regular expression from the column class.
219
+ # @param column_class [Class] a class inheriting Parsing::Rows::Column.
220
+ # @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
221
+ # @param segment_index [Integer] the position of the segment when it's in
222
+ # part of a series of segments; this can change which regular expressions
223
+ # are applicable to it.
224
+ # @param before_formats [Boolean] whether to use the before-formats regexes.
225
+ # @return [Hash{Symbol => Object}] the parsed segment, whose values are Strings
226
+ # unless changed via column_class.tweaks or column_class.flatten_into_arrays.
227
+ # Example: { author: "Bram Stoker", title: "Dracula"}
228
+ def parse_segment(column_class, segment, segment_index = 0, before_formats: false)
229
+ if before_formats
230
+ regexes = column_class.regexes_before_formats
231
+ else
232
+ regexes = column_class.regexes(segment_index)
233
+ end
234
+
235
+ parsed_segment = nil
236
+ regexes.each do |regex|
237
+ parsed_segment = parse_segment_with_regex(segment, regex)
238
+ break if parsed_segment
239
+ end
240
+
241
+ if parsed_segment.nil?
242
+ raise ParsingError, "Could not parse \"#{segment}\" in " \
243
+ "the #{column_class.column_name} column"
244
+ end
245
+
246
+ tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
247
+ end
248
+
249
+ # Parses a segment using the given regular expression.
250
+ # @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
251
+ # @param regex [Regexp] the regular expression with which to parse the segment.
252
+ # @return [Hash{Symbol => String}] e.g. { author: "Bram Stoker", title: "Dracula"}
253
+ def parse_segment_with_regex(segment, regex)
254
+ segment
255
+ .tr(config.fetch(:ignored_characters), "")
256
+ .strip
257
+ .match(regex)
258
+ &.named_captures
259
+ &.compact
260
+ &.transform_keys(&:to_sym)
261
+ &.transform_values(&:strip)
262
+ &.transform_values(&:presence)
263
+ end
264
+
265
+ # Modify the values of the parsed segment according to column_class.tweaks,
266
+ # and wrap them in an array according to column_class.flatten_into_arrays.
267
+ # @param parsed_segment [Hash] e.g. { author: "Bram Stoker", title: "Dracula"}
268
+ # @return [Hash{Symbol => Object}]
269
+ def tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
270
+ column_class.tweaks.each do |key, tweak|
271
+ if parsed_segment.has_key?(key)
272
+ parsed_segment[key] = tweak.call(parsed_segment[key])
273
+ end
274
+ end
275
+
276
+ # Ensure that values of keys in column_class.flatten_into_arrays are arrays.
277
+ column_class.flatten_into_arrays.each do |key|
278
+ if parsed_segment.has_key?(key)
279
+ val = parsed_segment[key]
280
+ # Not using Array(val) because that results in an empty array when
281
+ # val is nil, and the nil must be preserved for series name and
282
+ # volume arrays to line up with an equal number of elements (because
283
+ # the volume may be nil).
284
+ parsed_segment[key] = [val] if !val.is_a?(Array)
285
+ end
286
+ end
287
+
288
+ parsed_segment
289
+ end
290
+ end
291
+ end
292
+ end
@@ -0,0 +1,131 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ # The base class for all the columns in parsing/rows/compact_planned_columns
5
+ # and parsing/rows/regular_columns.
6
+ class Column
7
+ # The class name changed into a string, e.g. StartDates => "Start Dates"
8
+ # @return [String]
9
+ def self.column_name
10
+ class_name = name.split("::").last
11
+ class_name.gsub(/(.)([A-Z])/,'\1 \2')
12
+ end
13
+
14
+ # The class name changed into a symbol, e.g. StartDates => :start_dates
15
+ # @return [Symbol]
16
+ def self.to_sym
17
+ class_name = name.split("::").last
18
+ class_name
19
+ .gsub(/(.)([A-Z])/,'\1_\2')
20
+ .downcase
21
+ .to_sym
22
+ end
23
+
24
+ # Whether the column can contain "chunks" each set off by a format emoji.
25
+ # For example, the Head column of a compact planned row typically
26
+ # contains a list of multiple items. (The two others are the Sources
27
+ # column, for multiple variants of an item; and the regular Head column,
28
+ # for multiple items.)
29
+ # @return [Boolean]
30
+ def self.split_by_format?
31
+ false
32
+ end
33
+
34
+ # Whether the column can contain multiple segments, e.g. "Cosmos -- 2013 paperback"
35
+ # @return [Boolean]
36
+ def self.split_by_segment?
37
+ !!segment_separator
38
+ end
39
+
40
+ # The regular expression used to split segments (e.g. /\s*--\s*/),
41
+ # or nil if the column should not be split by segment.
42
+ # @return [Regexp, nil]
43
+ def self.segment_separator
44
+ nil
45
+ end
46
+
47
+ # Whether the column can contain multiple segment groups, e.g.
48
+ # "2021/1/28..2/1 x4 -- ..2/3 x5 ---- 11/1 -- 11/2"
49
+ # @return [Boolean]
50
+ def self.split_by_segment_group?
51
+ !!segment_group_separator
52
+ end
53
+
54
+ # The regular expression used to split segment groups (e.g. /\s*----\s*/),
55
+ # or nil if the column should not be split by segment group.
56
+ # @return [Regexp, nil]
57
+ def self.segment_group_separator
58
+ nil
59
+ end
60
+
61
+ # Adjustments that are made to captured values at the end of parsing
62
+ # the column. For example, if ::regexes includes a capture group named
63
+ # "sources" and it needs to be split by commas:
64
+ # { sources: -> { _1.split(/\s*,\s*/) } }
65
+ # @return [Hash{Symbol => Proc}]
66
+ def self.tweaks
67
+ {}
68
+ end
69
+
70
+ # Keys in the parsed output hash that should be converted to an array, even
71
+ # if only one value was in the input, as in { ... extra_info: ["ed. Jane Doe"] }
72
+ # @return [Array<Symbol>]
73
+ def self.flatten_into_arrays
74
+ []
75
+ end
76
+
77
+ # The regular expressions used to parse the column (except the part of
78
+ # the column before the first format emoji, which is in
79
+ # ::regexes_before_formats below). An array because sometimes it's
80
+ # simpler to try several smaller regular expressions in series, and
81
+ # because a regular expression might be applicable only for segments in
82
+ # a certain position. See parsing/rows/regular_columns/head.rb for an example.
83
+ # @param segment_index [Integer] the position of the current segment.
84
+ # @return [Array<Regexp>]
85
+ def self.regexes(segment_index)
86
+ []
87
+ end
88
+
89
+ # The regular expressions used to parse the part of the column before
90
+ # the first format emoji.
91
+ # @return [Array<Regexp>]
92
+ def self.regexes_before_formats
93
+ []
94
+ end
95
+
96
+ # Regular expressions that are shared across more than one column,
97
+ # placed here just to be DRY.
98
+ SHARED_REGEXES = {
99
+ progress: %r{
100
+ (DNF\s+)?(?<progress_percent>\d\d?)%
101
+ |
102
+ (DNF\s+)?p?(?<progress_pages>\d+)p?
103
+ |
104
+ (DNF\s+)?(?<progress_time>\d+:\d\d)
105
+ |
106
+ # just DNF
107
+ (?<progress_dnf>DNF)
108
+ }x,
109
+ series_and_extra_info: [
110
+ # just series
111
+ %r{\A
112
+ in\s(?<series_names>.+)
113
+ # empty volume so that names and volumes have equal sizes when turned into arrays
114
+ (?<series_volumes>)
115
+ \z}x,
116
+ # series and volume
117
+ %r{\A
118
+ (?<series_names>.+?)
119
+ ,?\s*
120
+ \#(?<series_volumes>\d+)
121
+ \z}x,
122
+ # extra info
123
+ %r{\A
124
+ (?<extra_info>.+)
125
+ \z}x,
126
+ ],
127
+ }.freeze
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,26 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ # A row that is a comment.
5
+ module Comment
6
+ using Util::HashArrayDeepFetch
7
+
8
+ # No columns; comments are parsed as if the row were blank.
9
+ # @return [Array]
10
+ def self.column_classes
11
+ []
12
+ end
13
+
14
+ # Starts with a comment character and does not include any format emojis.
15
+ # (Commented rows that DO include format emojis are matched as compact
16
+ # planned rows.)
17
+ # @param row_string [String]
18
+ # @param config [Hash]
19
+ # @return [Boolean]
20
+ def self.match?(row_string, config)
21
+ row_string.lstrip.start_with?(config.fetch(:comment_character))
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ require_relative "column"
2
+ require_relative "compact_planned_columns/head"
3
+ require_relative "regular_columns/sources"
4
+
5
+ module Reading
6
+ module Parsing
7
+ module Rows
8
+ # A row that contains compact planned items.
9
+ module CompactPlanned
10
+ using Util::HashArrayDeepFetch
11
+
12
+ # The columns that are possible in this type of row.
13
+ # @return [Array<Class>]
14
+ def self.column_classes
15
+ [CompactPlanned::Head, Regular::Sources]
16
+ end
17
+
18
+ # Starts with a comment character and includes one or more format emojis.
19
+ # @param row_string [String]
20
+ # @param config [Hash]
21
+ # @return [Boolean]
22
+ def self.match?(row_string, config)
23
+ row_string.lstrip.start_with?(config.fetch(:comment_character)) &&
24
+ row_string.match?(config.deep_fetch(:regex, :formats)) &&
25
+ row_string.count(config.fetch(:column_separator)) <= column_classes.count - 1
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,60 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module CompactPlanned
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#compact-planned-items
6
+ # and the sections following.
7
+ class Head < Column
8
+ def self.split_by_format?
9
+ true
10
+ end
11
+
12
+ def self.regexes_before_formats
13
+ [
14
+ %r{\A
15
+ \\ # comment character
16
+ \s*
17
+ (
18
+ (?<genres>[^a-z]+)?
19
+ \s*
20
+ (?<sources>@.+)?
21
+ \s*:
22
+ )?
23
+ \z}x,
24
+ ]
25
+ end
26
+
27
+ def self.segment_separator
28
+ /\s*--\s*/
29
+ end
30
+
31
+ def self.flatten_into_arrays
32
+ %i[extra_info series_names series_volumes]
33
+ end
34
+
35
+ def self.tweaks
36
+ {
37
+ genres: -> { _1.downcase.split(/\s*,\s*/) },
38
+ sources: -> { _1.split(/\s*@/).map(&:presence).compact }
39
+ }
40
+ end
41
+
42
+ def self.regexes(segment_index)
43
+ [
44
+ # author, title, sources
45
+ (%r{\A
46
+ (
47
+ (?<author>[^@]+?)
48
+ \s+-\s+
49
+ )?
50
+ (?<title>[^@]+)
51
+ (?<sources>@.+)?
52
+ \z}x if segment_index.zero?),
53
+ *Column::SHARED_REGEXES[:series_and_extra_info],
54
+ ].compact
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,33 @@
1
+ require_relative "column"
2
+ require_relative "regular_columns/rating"
3
+ require_relative "regular_columns/head"
4
+ require_relative "regular_columns/sources"
5
+ require_relative "regular_columns/start_dates"
6
+ require_relative "regular_columns/end_dates"
7
+ require_relative "regular_columns/genres"
8
+ require_relative "regular_columns/length"
9
+ require_relative "regular_columns/notes"
10
+ require_relative "regular_columns/history"
11
+
12
+ module Reading
13
+ module Parsing
14
+ module Rows
15
+ # A normal row of (usually) one item.
16
+ module Regular
17
+ # The columns that are possible in this type of row.
18
+ # @return [Array<Class>]
19
+ def self.column_classes
20
+ [Rating, Head, Sources, StartDates, EndDates, Genres, Length, Notes, History]
21
+ end
22
+
23
+ # Does not start with a comment character.
24
+ # @param row_string [String]
25
+ # @param config [Hash]
26
+ # @return [Boolean]
27
+ def self.match?(row_string, config)
28
+ !row_string.lstrip.start_with?(config.fetch(:comment_character))
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,20 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#start-dates-and-end-dates-columns
6
+ class EndDates < Column
7
+ def self.segment_separator
8
+ /,\s*/
9
+ end
10
+
11
+ def self.regexes(segment_index)
12
+ [%r{\A
13
+ (?<date>\d{4}/\d\d?/\d\d?)
14
+ \z}x]
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#genres-column
6
+ class Genres < Column
7
+ def self.segment_separator
8
+ /,\s*/
9
+ end
10
+
11
+ def self.regexes(segment_index)
12
+ [%r{\A
13
+ (?<genre>.+)
14
+ \z}x]
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,45 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#head-column-title
6
+ # and https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#head-column-dnf
7
+ # and the sections following.
8
+ class Head < Column
9
+ def self.split_by_format?
10
+ true
11
+ end
12
+
13
+ def self.regexes_before_formats
14
+ [
15
+ /\A#{Column::SHARED_REGEXES[:progress]}\z/,
16
+ /.+/,
17
+ ]
18
+ end
19
+
20
+ def self.segment_separator
21
+ /\s*--\s*/
22
+ end
23
+
24
+ def self.flatten_into_arrays
25
+ %i[extra_info series_names series_volumes]
26
+ end
27
+
28
+ def self.regexes(segment_index)
29
+ [
30
+ # author and title
31
+ (%r{\A
32
+ (
33
+ (?<author>.+?)
34
+ \s+-\s+
35
+ )?
36
+ (?<title>.+)
37
+ \z}x if segment_index.zero?),
38
+ *Column::SHARED_REGEXES[:series_and_extra_info],
39
+ ].compact
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end