reading 0.6.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/bin/reading +5 -5
  3. data/bin/readingfile +31 -0
  4. data/lib/reading/config.rb +115 -149
  5. data/lib/reading/errors.rb +10 -66
  6. data/lib/reading/item/time_length.rb +138 -0
  7. data/lib/reading/parsing/attributes/attribute.rb +26 -0
  8. data/lib/reading/parsing/attributes/author.rb +15 -0
  9. data/lib/reading/parsing/attributes/experiences/dates_and_head_transformer.rb +106 -0
  10. data/lib/reading/parsing/attributes/experiences/history_transformer.rb +452 -0
  11. data/lib/reading/parsing/attributes/experiences/spans_validator.rb +149 -0
  12. data/lib/reading/parsing/attributes/experiences.rb +27 -0
  13. data/lib/reading/parsing/attributes/genres.rb +16 -0
  14. data/lib/reading/parsing/attributes/notes.rb +22 -0
  15. data/lib/reading/parsing/attributes/rating.rb +17 -0
  16. data/lib/reading/parsing/attributes/shared.rb +62 -0
  17. data/lib/reading/parsing/attributes/title.rb +21 -0
  18. data/lib/reading/parsing/attributes/variants.rb +77 -0
  19. data/lib/reading/parsing/csv.rb +101 -0
  20. data/lib/reading/parsing/parser.rb +292 -0
  21. data/lib/reading/parsing/rows/column.rb +131 -0
  22. data/lib/reading/parsing/rows/comment.rb +26 -0
  23. data/lib/reading/parsing/rows/compact_planned.rb +30 -0
  24. data/lib/reading/parsing/rows/compact_planned_columns/head.rb +60 -0
  25. data/lib/reading/parsing/rows/regular.rb +33 -0
  26. data/lib/reading/parsing/rows/regular_columns/end_dates.rb +20 -0
  27. data/lib/reading/parsing/rows/regular_columns/genres.rb +20 -0
  28. data/lib/reading/parsing/rows/regular_columns/head.rb +45 -0
  29. data/lib/reading/parsing/rows/regular_columns/history.rb +143 -0
  30. data/lib/reading/parsing/rows/regular_columns/length.rb +35 -0
  31. data/lib/reading/parsing/rows/regular_columns/notes.rb +32 -0
  32. data/lib/reading/parsing/rows/regular_columns/rating.rb +15 -0
  33. data/lib/reading/parsing/rows/regular_columns/sources.rb +94 -0
  34. data/lib/reading/parsing/rows/regular_columns/start_dates.rb +35 -0
  35. data/lib/reading/parsing/transformer.rb +70 -0
  36. data/lib/reading/util/hash_compact_by_template.rb +1 -0
  37. data/lib/reading/util/hash_deep_merge.rb +1 -1
  38. data/lib/reading/util/hash_to_struct.rb +1 -0
  39. data/lib/reading/util/numeric_to_i_if_whole.rb +12 -0
  40. data/lib/reading/util/string_truncate.rb +13 -4
  41. data/lib/reading/version.rb +1 -1
  42. data/lib/reading.rb +18 -0
  43. metadata +58 -41
  44. data/lib/reading/attribute/all_attributes.rb +0 -83
  45. data/lib/reading/attribute/attribute.rb +0 -25
  46. data/lib/reading/attribute/experiences/dates_validator.rb +0 -94
  47. data/lib/reading/attribute/experiences/experiences_attribute.rb +0 -74
  48. data/lib/reading/attribute/experiences/progress_subattribute.rb +0 -48
  49. data/lib/reading/attribute/experiences/spans_subattribute.rb +0 -82
  50. data/lib/reading/attribute/variants/extra_info_subattribute.rb +0 -44
  51. data/lib/reading/attribute/variants/length_subattribute.rb +0 -45
  52. data/lib/reading/attribute/variants/series_subattribute.rb +0 -57
  53. data/lib/reading/attribute/variants/sources_subattribute.rb +0 -78
  54. data/lib/reading/attribute/variants/variants_attribute.rb +0 -69
  55. data/lib/reading/csv.rb +0 -76
  56. data/lib/reading/line.rb +0 -23
  57. data/lib/reading/row/blank_row.rb +0 -23
  58. data/lib/reading/row/compact_planned_row.rb +0 -130
  59. data/lib/reading/row/regular_row.rb +0 -99
  60. data/lib/reading/row/row.rb +0 -88
@@ -0,0 +1,292 @@
1
+ require_relative "rows/regular"
2
+ require_relative "rows/compact_planned"
3
+ require_relative "rows/comment"
4
+
5
+ module Reading
6
+ module Parsing
7
+ #
8
+ # Parses a string containing a row of a CSV reading log, into a hash
9
+ # mirroring the structure of the row. This hash is an intermediate form and
10
+ # not the final item data. It's the raw material for Parsing::Transformer to
11
+ # generate the final item data.
12
+ #
13
+ # Below is an example intermediate hash parsed from this row, which has a Rating
14
+ # column, then a Head column containing an author, title, series, and extra info:
15
+ #
16
+ # 3|📕Thomas More - Utopia -- trans. Robert Adams -- ed. George Logan -- in Cambridge History of Political Thought
17
+ #
18
+ # {
19
+ # rating: { number: "1" },
20
+ # head: [{
21
+ # author: "Thomas More",
22
+ # title: "Utopia",
23
+ # series_names: ["Cambridge History of Political Thought"],
24
+ # series_volumes: [nil],
25
+ # extra_info: ["trans. Robert Adams", "ed. George Logan"],
26
+ # format: :print,
27
+ # }]
28
+ # }
29
+ #
30
+ # The hash's top-level keys are column names. The nested keys come from
31
+ # regex capture group names in each column (for this example, see ::regexes
32
+ # in rating.rb and head.rb in parsing/rows/regular_columns).
33
+ #
34
+ # All the rest is just details of how the parts of a column are joined:
35
+ #
36
+ # - The :head value is an array because Head.split_by_format? is
37
+ # true (because a Head column can potentially contain multiple items).
38
+ # That's also where { format: :print } comes from.
39
+ #
40
+ # - The :series_names and :series_volumes values are arrays because these
41
+ # keys are in Head.flatten_into_arrays, which causes the column's segments
42
+ # (separated by " -- ") to be merged into one hash.
43
+ #
44
+ class Parser
45
+ using Util::HashArrayDeepFetch
46
+ using Util::StringRemove
47
+
48
+ attr_reader :config
49
+
50
+ # @param config [Hash] an entire config.
51
+ def initialize(config)
52
+ @config = config
53
+ end
54
+
55
+ # Parses a row string into a hash that mirrors the structure of the row.
56
+ # @param string [String] a string containing a row of a CSV reading log.
57
+ # @return [Hash]
58
+ def parse_row_to_intermediate_hash(string)
59
+ columns = extract_columns(string)
60
+
61
+ if config.fetch(:skip_compact_planned) && columns.has_key?(Rows::CompactPlanned::Head)
62
+ return {}
63
+ end
64
+
65
+ columns.map { |column, column_string|
66
+ parse_column(column, column_string)
67
+ }.to_h
68
+ end
69
+
70
+ private
71
+
72
+ # Splits the row string by column and pairs them in a hash with column
73
+ # classes, which contain the information necessary to parse each column.
74
+ # @param string [String] a string containing a row of a CSV reading log.
75
+ # @return [Hash{Class => String}] a hash whose keys are classes inheriting
76
+ # Parsing::Rows::Column.
77
+ def extract_columns(string)
78
+ clean_string = string.dup.force_encoding(Encoding::UTF_8)
79
+ column_strings = clean_string.split(config.fetch(:column_separator))
80
+
81
+ row_types = [Rows::Regular, Rows::CompactPlanned, Rows::Comment]
82
+ column_classes = row_types
83
+ .find { |row_type| row_type.match?(string, config) }
84
+ .column_classes
85
+ .filter { |column_class|
86
+ config.fetch(:enabled_columns).include?(column_class.to_sym)
87
+ }
88
+
89
+ if !column_classes.count.zero? && column_strings.count > column_classes.count
90
+ raise TooManyColumnsError, "Too many columns"
91
+ end
92
+
93
+ column_classes
94
+ .zip(column_strings)
95
+ .reject { |_class, string| string.nil? }
96
+ .to_h
97
+ end
98
+
99
+ # Parses a column into an array of two elements (a key for the column name
100
+ # and a value of its contents).
101
+ # @param column_class [Class] a class inheriting Parsing::Rows::Column.
102
+ # @param column_string [String] a string containing a column from a row.
103
+ # @return [Array(Symbol, Hash), Array(Symbol, Array)]
104
+ def parse_column(column_class, column_string)
105
+ # Multiple format emojis are possible in some columns:
106
+ # - Head column, for multiple items.
107
+ # - Sources column, for multiple variants of an item.
108
+ # - Compact planned head column, for multiple items.
109
+ # This is the default case below the two guard clauses. It's more complex
110
+ # because there's possibly a string before the first format, and there's
111
+ # an extra level of nesting in the returned array.
112
+
113
+ # Simplest case: if the column is never split by format, return the
114
+ # column name and the parsed segment(s), which is either a Hash (if the
115
+ # column can't have multiple segments or if its segments are flattened)
116
+ # or an Array (if there are multiple segments and they're not flattened).
117
+ if !column_class.split_by_format?
118
+ parsed_column = parse_segments(column_class, column_string)
119
+ return [column_class.to_sym, parsed_column]
120
+ end
121
+
122
+ # Also simple: if the column *can* be split by format but in this row
123
+ # it doesn't contain any format emojis, return the same as above but
124
+ # with an extra level of nesting (except when the parsed result is nil).
125
+ if column_class.split_by_format? &&
126
+ !column_string.match?(config.deep_fetch(:regex, :formats))
127
+
128
+ parsed_column = parse_segments(column_class, column_string)
129
+ # Wrap a non-empty value in an array so that e.g. a head without
130
+ # emojis is still an array. This way the extra level of nesting can
131
+ # be consistently expected for columns that *can* be split by format.
132
+ parsed_column_nonempty_nested = [parsed_column.presence].compact
133
+ return [column_class.to_sym, parsed_column_nonempty_nested]
134
+ end
135
+
136
+ # The rest is the complex case: if the column *can and is* split by format.
137
+
138
+ # Each format plus the string after it.
139
+ format_strings = column_string.split(config.deep_fetch(:regex, :formats_split))
140
+
141
+ # If there's a string before the first format, e.g. "DNF" in Head column.
142
+ unless format_strings.first.match?(config.deep_fetch(:regex, :formats))
143
+ before_formats = parse_segment(column_class, format_strings.shift, before_formats: true)
144
+ end
145
+
146
+ # Parse each format-plus-string into an array of segments.
147
+ heads = format_strings.map { |string|
148
+ format_emoji = string[config.deep_fetch(:regex, :formats)]
149
+ string.remove!(format_emoji)
150
+ format = config.fetch(:formats).key(format_emoji)
151
+
152
+ parse_segments(column_class, string)
153
+ .merge(format: format)
154
+ }
155
+
156
+ # Combine values of conflicting keys so that in a compact planned
157
+ # Head column, sources from before_formats are not ignored.
158
+ if before_formats
159
+ heads.each do |head|
160
+ head.merge!(before_formats) do |k, old_v, new_v|
161
+ (new_v + old_v).uniq
162
+ end
163
+ end
164
+ end
165
+
166
+ [column_class.to_sym, heads]
167
+ end
168
+
169
+ # Parses a string of segments, e.g. "Utopia -- trans. Robert Adams -- ed. George Logan"
170
+ # @param column_class [Class] a class inheriting Parsing::Rows::Column.
171
+ # @param string [String] a string containing segments, which is either an
172
+ # entire column or (for columns that are split by format emoji) a string
173
+ # following a format emoji.
174
+ # @return [Array<Hash>, Hash] either an array of parsed segments (hashes),
175
+ # or a single hash if the column can't be split by segment or if the
176
+ # segments are flattened into one hash.
177
+ def parse_segments(column_class, string)
178
+ return {} if string.blank?
179
+
180
+ # If the column can't be split by segment, parse as a single segment.
181
+ if !column_class.split_by_segment?
182
+ return parse_segment(column_class, string)
183
+ end
184
+
185
+ # Add an extra level of nesting if the column can have segment groups,
186
+ # as in "2021/1/28..2/1 x4 -- ..2/3 x5 ---- 11/1 -- 11/2"
187
+ if column_class.split_by_segment_group?
188
+ segments = string
189
+ .split(column_class.segment_group_separator)
190
+ .map { |segment_group|
191
+ segment_group
192
+ .split(column_class.segment_separator)
193
+ .map.with_index { |segment, i|
194
+ parse_segment(column_class, segment, i)
195
+ }
196
+ }
197
+ else
198
+ segments = string
199
+ .split(column_class.segment_separator)
200
+ .map.with_index { |segment, i|
201
+ parse_segment(column_class, segment, i)
202
+ }
203
+ end
204
+
205
+ if column_class.flatten_into_arrays.any?
206
+ segments = segments.reduce { |merged, segment|
207
+ merged.merge!(segment) { |_k, old_v, new_v|
208
+ # old_v is already an array by this point, since its key should be
209
+ # in Column.flatten_into_arrays
210
+ old_v + new_v
211
+ }
212
+ }
213
+ end
214
+
215
+ segments
216
+ end
217
+
218
+ # Parses a segment using a regular expression from the column class.
219
+ # @param column_class [Class] a class inheriting Parsing::Rows::Column.
220
+ # @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
221
+ # @param segment_index [Integer] the position of the segment when it's in
222
+ # part of a series of segments; this can change which regular expressions
223
+ # are applicable to it.
224
+ # @param before_formats [Boolean] whether to use the before-formats regexes.
225
+ # @return [Hash{Symbol => Object}] the parsed segment, whose values are Strings
226
+ # unless changed via column_class.tweaks or column_class.flatten_into_arrays.
227
+ # Example: { author: "Bram Stoker", title: "Dracula"}
228
+ def parse_segment(column_class, segment, segment_index = 0, before_formats: false)
229
+ if before_formats
230
+ regexes = column_class.regexes_before_formats
231
+ else
232
+ regexes = column_class.regexes(segment_index)
233
+ end
234
+
235
+ parsed_segment = nil
236
+ regexes.each do |regex|
237
+ parsed_segment = parse_segment_with_regex(segment, regex)
238
+ break if parsed_segment
239
+ end
240
+
241
+ if parsed_segment.nil?
242
+ raise ParsingError, "Could not parse \"#{segment}\" in " \
243
+ "the #{column_class.column_name} column"
244
+ end
245
+
246
+ tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
247
+ end
248
+
249
+ # Parses a segment using the given regular expression.
250
+ # @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
251
+ # @param regex [Regexp] the regular expression with which to parse the segment.
252
+ # @return [Hash{Symbol => String}] e.g. { author: "Bram Stoker", title: "Dracula"}
253
+ def parse_segment_with_regex(segment, regex)
254
+ segment
255
+ .tr(config.fetch(:ignored_characters), "")
256
+ .strip
257
+ .match(regex)
258
+ &.named_captures
259
+ &.compact
260
+ &.transform_keys(&:to_sym)
261
+ &.transform_values(&:strip)
262
+ &.transform_values(&:presence)
263
+ end
264
+
265
+ # Modify the values of the parsed segment according to column_class.tweaks,
266
+ # and wrap them in an array according to column_class.flatten_into_arrays.
267
+ # @param parsed_segment [Hash] e.g. { author: "Bram Stoker", title: "Dracula"}
268
+ # @return [Hash{Symbol => Object}]
269
+ def tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
270
+ column_class.tweaks.each do |key, tweak|
271
+ if parsed_segment.has_key?(key)
272
+ parsed_segment[key] = tweak.call(parsed_segment[key])
273
+ end
274
+ end
275
+
276
+ # Ensure that values of keys in column_class.flatten_into_arrays are arrays.
277
+ column_class.flatten_into_arrays.each do |key|
278
+ if parsed_segment.has_key?(key)
279
+ val = parsed_segment[key]
280
+ # Not using Array(val) because that results in an empty array when
281
+ # val is nil, and the nil must be preserved for series name and
282
+ # volume arrays to line up with an equal number of elements (because
283
+ # the volume may be nil).
284
+ parsed_segment[key] = [val] if !val.is_a?(Array)
285
+ end
286
+ end
287
+
288
+ parsed_segment
289
+ end
290
+ end
291
+ end
292
+ end
@@ -0,0 +1,131 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ # The base class for all the columns in parsing/rows/compact_planned_columns
5
+ # and parsing/rows/regular_columns.
6
+ class Column
7
+ # The class name changed into a string, e.g. StartDates => "Start Dates"
8
+ # @return [String]
9
+ def self.column_name
10
+ class_name = name.split("::").last
11
+ class_name.gsub(/(.)([A-Z])/,'\1 \2')
12
+ end
13
+
14
+ # The class name changed into a symbol, e.g. StartDates => :start_dates
15
+ # @return [Symbol]
16
+ def self.to_sym
17
+ class_name = name.split("::").last
18
+ class_name
19
+ .gsub(/(.)([A-Z])/,'\1_\2')
20
+ .downcase
21
+ .to_sym
22
+ end
23
+
24
+ # Whether the column can contain "chunks" each set off by a format emoji.
25
+ # For example, the Head column of a compact planned row typically
26
+ # contains a list of multiple items. (The two others are the Sources
27
+ # column, for multiple variants of an item; and the regular Head column,
28
+ # for multiple items.)
29
+ # @return [Boolean]
30
+ def self.split_by_format?
31
+ false
32
+ end
33
+
34
+ # Whether the column can contain multiple segments, e.g. "Cosmos -- 2013 paperback"
35
+ # @return [Boolean]
36
+ def self.split_by_segment?
37
+ !!segment_separator
38
+ end
39
+
40
+ # The regular expression used to split segments (e.g. /\s*--\s*/),
41
+ # or nil if the column should not be split by segment.
42
+ # @return [Regexp, nil]
43
+ def self.segment_separator
44
+ nil
45
+ end
46
+
47
+ # Whether the column can contain multiple segment groups, e.g.
48
+ # "2021/1/28..2/1 x4 -- ..2/3 x5 ---- 11/1 -- 11/2"
49
+ # @return [Boolean]
50
+ def self.split_by_segment_group?
51
+ !!segment_group_separator
52
+ end
53
+
54
+ # The regular expression used to split segment groups (e.g. /\s*----\s*/),
55
+ # or nil if the column should not be split by segment group.
56
+ # @return [Regexp, nil]
57
+ def self.segment_group_separator
58
+ nil
59
+ end
60
+
61
+ # Adjustments that are made to captured values at the end of parsing
62
+ # the column. For example, if ::regexes includes a capture group named
63
+ # "sources" and it needs to be split by commas:
64
+ # { sources: -> { _1.split(/\s*,\s*/) } }
65
+ # @return [Hash{Symbol => Proc}]
66
+ def self.tweaks
67
+ {}
68
+ end
69
+
70
+ # Keys in the parsed output hash that should be converted to an array, even
71
+ # if only one value was in the input, as in { ... extra_info: ["ed. Jane Doe"] }
72
+ # @return [Array<Symbol>]
73
+ def self.flatten_into_arrays
74
+ []
75
+ end
76
+
77
+ # The regular expressions used to parse the column (except the part of
78
+ # the column before the first format emoji, which is in
79
+ # ::regexes_before_formats below). An array because sometimes it's
80
+ # simpler to try several smaller regular expressions in series, and
81
+ # because a regular expression might be applicable only for segments in
82
+ # a certain position. See parsing/rows/regular_columns/head.rb for an example.
83
+ # @param segment_index [Integer] the position of the current segment.
84
+ # @return [Array<Regexp>]
85
+ def self.regexes(segment_index)
86
+ []
87
+ end
88
+
89
+ # The regular expressions used to parse the part of the column before
90
+ # the first format emoji.
91
+ # @return [Array<Regexp>]
92
+ def self.regexes_before_formats
93
+ []
94
+ end
95
+
96
+ # Regular expressions that are shared across more than one column,
97
+ # placed here just to be DRY.
98
+ SHARED_REGEXES = {
99
+ progress: %r{
100
+ (DNF\s+)?(?<progress_percent>\d\d?)%
101
+ |
102
+ (DNF\s+)?p?(?<progress_pages>\d+)p?
103
+ |
104
+ (DNF\s+)?(?<progress_time>\d+:\d\d)
105
+ |
106
+ # just DNF
107
+ (?<progress_dnf>DNF)
108
+ }x,
109
+ series_and_extra_info: [
110
+ # just series
111
+ %r{\A
112
+ in\s(?<series_names>.+)
113
+ # empty volume so that names and volumes have equal sizes when turned into arrays
114
+ (?<series_volumes>)
115
+ \z}x,
116
+ # series and volume
117
+ %r{\A
118
+ (?<series_names>.+?)
119
+ ,?\s*
120
+ \#(?<series_volumes>\d+)
121
+ \z}x,
122
+ # extra info
123
+ %r{\A
124
+ (?<extra_info>.+)
125
+ \z}x,
126
+ ],
127
+ }.freeze
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,26 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ # A row that is a comment.
5
+ module Comment
6
+ using Util::HashArrayDeepFetch
7
+
8
+ # No columns; comments are parsed as if the row were blank.
9
+ # @return [Array]
10
+ def self.column_classes
11
+ []
12
+ end
13
+
14
+ # Starts with a comment character and does not include any format emojis.
15
+ # (Commented rows that DO include format emojis are matched as compact
16
+ # planned rows.)
17
+ # @param row_string [String]
18
+ # @param config [Hash]
19
+ # @return [Boolean]
20
+ def self.match?(row_string, config)
21
+ row_string.lstrip.start_with?(config.fetch(:comment_character))
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,30 @@
1
+ require_relative "column"
2
+ require_relative "compact_planned_columns/head"
3
+ require_relative "regular_columns/sources"
4
+
5
+ module Reading
6
+ module Parsing
7
+ module Rows
8
+ # A row that contains compact planned items.
9
+ module CompactPlanned
10
+ using Util::HashArrayDeepFetch
11
+
12
+ # The columns that are possible in this type of row.
13
+ # @return [Array<Class>]
14
+ def self.column_classes
15
+ [CompactPlanned::Head, Regular::Sources]
16
+ end
17
+
18
+ # Starts with a comment character and includes one or more format emojis.
19
+ # @param row_string [String]
20
+ # @param config [Hash]
21
+ # @return [Boolean]
22
+ def self.match?(row_string, config)
23
+ row_string.lstrip.start_with?(config.fetch(:comment_character)) &&
24
+ row_string.match?(config.deep_fetch(:regex, :formats)) &&
25
+ row_string.count(config.fetch(:column_separator)) <= column_classes.count - 1
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,60 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module CompactPlanned
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#compact-planned-items
6
+ # and the sections following.
7
+ class Head < Column
8
+ def self.split_by_format?
9
+ true
10
+ end
11
+
12
+ def self.regexes_before_formats
13
+ [
14
+ %r{\A
15
+ \\ # comment character
16
+ \s*
17
+ (
18
+ (?<genres>[^a-z]+)?
19
+ \s*
20
+ (?<sources>@.+)?
21
+ \s*:
22
+ )?
23
+ \z}x,
24
+ ]
25
+ end
26
+
27
+ def self.segment_separator
28
+ /\s*--\s*/
29
+ end
30
+
31
+ def self.flatten_into_arrays
32
+ %i[extra_info series_names series_volumes]
33
+ end
34
+
35
+ def self.tweaks
36
+ {
37
+ genres: -> { _1.downcase.split(/\s*,\s*/) },
38
+ sources: -> { _1.split(/\s*@/).map(&:presence).compact }
39
+ }
40
+ end
41
+
42
+ def self.regexes(segment_index)
43
+ [
44
+ # author, title, sources
45
+ (%r{\A
46
+ (
47
+ (?<author>[^@]+?)
48
+ \s+-\s+
49
+ )?
50
+ (?<title>[^@]+)
51
+ (?<sources>@.+)?
52
+ \z}x if segment_index.zero?),
53
+ *Column::SHARED_REGEXES[:series_and_extra_info],
54
+ ].compact
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,33 @@
1
+ require_relative "column"
2
+ require_relative "regular_columns/rating"
3
+ require_relative "regular_columns/head"
4
+ require_relative "regular_columns/sources"
5
+ require_relative "regular_columns/start_dates"
6
+ require_relative "regular_columns/end_dates"
7
+ require_relative "regular_columns/genres"
8
+ require_relative "regular_columns/length"
9
+ require_relative "regular_columns/notes"
10
+ require_relative "regular_columns/history"
11
+
12
+ module Reading
13
+ module Parsing
14
+ module Rows
15
+ # A normal row of (usually) one item.
16
+ module Regular
17
+ # The columns that are possible in this type of row.
18
+ # @return [Array<Class>]
19
+ def self.column_classes
20
+ [Rating, Head, Sources, StartDates, EndDates, Genres, Length, Notes, History]
21
+ end
22
+
23
+ # Does not start with a comment character.
24
+ # @param row_string [String]
25
+ # @param config [Hash]
26
+ # @return [Boolean]
27
+ def self.match?(row_string, config)
28
+ !row_string.lstrip.start_with?(config.fetch(:comment_character))
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,20 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#start-dates-and-end-dates-columns
6
+ class EndDates < Column
7
+ def self.segment_separator
8
+ /,\s*/
9
+ end
10
+
11
+ def self.regexes(segment_index)
12
+ [%r{\A
13
+ (?<date>\d{4}/\d\d?/\d\d?)
14
+ \z}x]
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#genres-column
6
+ class Genres < Column
7
+ def self.segment_separator
8
+ /,\s*/
9
+ end
10
+
11
+ def self.regexes(segment_index)
12
+ [%r{\A
13
+ (?<genre>.+)
14
+ \z}x]
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,45 @@
1
+ module Reading
2
+ module Parsing
3
+ module Rows
4
+ module Regular
5
+ # See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#head-column-title
6
+ # and https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#head-column-dnf
7
+ # and the sections following.
8
+ class Head < Column
9
+ def self.split_by_format?
10
+ true
11
+ end
12
+
13
+ def self.regexes_before_formats
14
+ [
15
+ /\A#{Column::SHARED_REGEXES[:progress]}\z/,
16
+ /.+/,
17
+ ]
18
+ end
19
+
20
+ def self.segment_separator
21
+ /\s*--\s*/
22
+ end
23
+
24
+ def self.flatten_into_arrays
25
+ %i[extra_info series_names series_volumes]
26
+ end
27
+
28
+ def self.regexes(segment_index)
29
+ [
30
+ # author and title
31
+ (%r{\A
32
+ (
33
+ (?<author>.+?)
34
+ \s+-\s+
35
+ )?
36
+ (?<title>.+)
37
+ \z}x if segment_index.zero?),
38
+ *Column::SHARED_REGEXES[:series_and_extra_info],
39
+ ].compact
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end