reading 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/bin/reading +5 -5
  3. data/bin/readingfile +31 -0
  4. data/lib/reading/config.rb +96 -108
  5. data/lib/reading/errors.rb +10 -66
  6. data/lib/reading/filter.rb +95 -0
  7. data/lib/reading/item/time_length.rb +140 -0
  8. data/lib/reading/item/view.rb +121 -0
  9. data/lib/reading/item.rb +117 -0
  10. data/lib/reading/parsing/attributes/attribute.rb +26 -0
  11. data/lib/reading/parsing/attributes/author.rb +15 -0
  12. data/lib/reading/parsing/attributes/experiences/dates_and_head_transformer.rb +106 -0
  13. data/lib/reading/parsing/attributes/experiences/history_transformer.rb +452 -0
  14. data/lib/reading/parsing/attributes/experiences/spans_validator.rb +149 -0
  15. data/lib/reading/parsing/attributes/experiences.rb +27 -0
  16. data/lib/reading/parsing/attributes/genres.rb +16 -0
  17. data/lib/reading/parsing/attributes/notes.rb +22 -0
  18. data/lib/reading/parsing/attributes/rating.rb +17 -0
  19. data/lib/reading/parsing/attributes/shared.rb +62 -0
  20. data/lib/reading/parsing/attributes/title.rb +21 -0
  21. data/lib/reading/parsing/attributes/variants.rb +77 -0
  22. data/lib/reading/parsing/csv.rb +112 -0
  23. data/lib/reading/parsing/parser.rb +292 -0
  24. data/lib/reading/parsing/rows/column.rb +131 -0
  25. data/lib/reading/parsing/rows/comment.rb +26 -0
  26. data/lib/reading/parsing/rows/compact_planned.rb +30 -0
  27. data/lib/reading/parsing/rows/compact_planned_columns/head.rb +60 -0
  28. data/lib/reading/parsing/rows/regular.rb +33 -0
  29. data/lib/reading/parsing/rows/regular_columns/end_dates.rb +20 -0
  30. data/lib/reading/parsing/rows/regular_columns/genres.rb +20 -0
  31. data/lib/reading/parsing/rows/regular_columns/head.rb +45 -0
  32. data/lib/reading/parsing/rows/regular_columns/history.rb +143 -0
  33. data/lib/reading/parsing/rows/regular_columns/length.rb +35 -0
  34. data/lib/reading/parsing/rows/regular_columns/notes.rb +32 -0
  35. data/lib/reading/parsing/rows/regular_columns/rating.rb +15 -0
  36. data/lib/reading/parsing/rows/regular_columns/sources.rb +94 -0
  37. data/lib/reading/parsing/rows/regular_columns/start_dates.rb +35 -0
  38. data/lib/reading/parsing/transformer.rb +70 -0
  39. data/lib/reading/util/hash_compact_by_template.rb +1 -0
  40. data/lib/reading/util/hash_deep_merge.rb +1 -1
  41. data/lib/reading/util/hash_to_data.rb +30 -0
  42. data/lib/reading/util/numeric_to_i_if_whole.rb +12 -0
  43. data/lib/reading/util/string_truncate.rb +13 -4
  44. data/lib/reading/version.rb +1 -1
  45. data/lib/reading.rb +49 -0
  46. metadata +76 -42
  47. data/lib/reading/attribute/all_attributes.rb +0 -83
  48. data/lib/reading/attribute/attribute.rb +0 -25
  49. data/lib/reading/attribute/experiences/dates_validator.rb +0 -94
  50. data/lib/reading/attribute/experiences/experiences_attribute.rb +0 -74
  51. data/lib/reading/attribute/experiences/progress_subattribute.rb +0 -48
  52. data/lib/reading/attribute/experiences/spans_subattribute.rb +0 -82
  53. data/lib/reading/attribute/variants/extra_info_subattribute.rb +0 -44
  54. data/lib/reading/attribute/variants/length_subattribute.rb +0 -45
  55. data/lib/reading/attribute/variants/series_subattribute.rb +0 -57
  56. data/lib/reading/attribute/variants/sources_subattribute.rb +0 -78
  57. data/lib/reading/attribute/variants/variants_attribute.rb +0 -69
  58. data/lib/reading/csv.rb +0 -76
  59. data/lib/reading/line.rb +0 -23
  60. data/lib/reading/row/blank_row.rb +0 -23
  61. data/lib/reading/row/compact_planned_row.rb +0 -130
  62. data/lib/reading/row/regular_row.rb +0 -99
  63. data/lib/reading/row/row.rb +0 -88
  64. data/lib/reading/util/hash_to_struct.rb +0 -29
@@ -0,0 +1,62 @@
1
+ module Reading
2
+ module Parsing
3
+ module Attributes
4
+ # Shared
5
+ module Shared
6
+ # Extracts the :progress sub-attribute (percent, pages, or time) from
7
+ # the given hash.
8
+ # @param hash [Hash] any parsed hash that contains progress.
9
+ # @return [Float, Integer, Item::TimeLength]
10
+ def self.progress(hash)
11
+ hash[:progress_percent]&.to_f&./(100) ||
12
+ hash[:progress_pages]&.to_i ||
13
+ hash[:progress_time]&.then { Item::TimeLength.parse _1 } ||
14
+ (0 if hash[:progress_dnf]) ||
15
+ (1.0 if hash[:progress_done]) ||
16
+ nil
17
+ end
18
+
19
+ # Extracts the :length sub-attribute (pages or time) from the given hash.
20
+ # @param hash [Hash] any parsed hash that contains length.
21
+ # @param key_name [Symbol] the first part of the keys to be checked.
22
+ # @param episodic [Boolean] whether to look for episodic (not total) length.
23
+ # If false, returns nil if hash contains :each. If true, returns a
24
+ # length only if hash contains :each or if it has repetitions, in
25
+ # which case repetitions are ignored. Examples of episodic lengths
26
+ # (before parsing) are "0:30 each" and "1:00 x14" (where the episodic
27
+ # length is 1:00). Examples of non-episodic lengths are "0:30" and "14:00".
28
+ # @param ignore_repetitions [Boolean] if true, ignores repetitions so
29
+ # that e.g. "1:00 x14" gives a length of 1 hour instead of 14 hours.
30
+ # This is useful for the History column, where that 1 hour can be used
31
+ # as the default amount.
32
+ # @return [Float, Integer, Item::TimeLength]
33
+ def self.length(hash, key_name: :length, episodic: false, ignore_repetitions: false)
34
+ return nil unless hash
35
+
36
+ length = hash[:"#{key_name}_pages"]&.to_i ||
37
+ hash[:"#{key_name}_time"]&.then { Item::TimeLength.parse _1 }
38
+
39
+ return nil unless length
40
+
41
+ if hash[:each]
42
+ # Length is calculated based on History column in this case.
43
+ if episodic
44
+ return length
45
+ else
46
+ return nil
47
+ end
48
+ end
49
+
50
+ if hash[:repetitions]
51
+ return length if episodic
52
+ length *= hash[:repetitions].to_i unless ignore_repetitions
53
+ else
54
+ return nil if episodic && !hash[:each]
55
+ end
56
+
57
+ length
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,21 @@
1
+ module Reading
2
+ module Parsing
3
+ module Attributes
4
+ # Transformer for the :title item attribute.
5
+ class Title < Attribute
6
+ # @param parsed_row [Hash] a parsed row (the intermediate hash).
7
+ # @param head_index [Integer] current item's position in the Head column.
8
+ # @return [String]
9
+ def transform_from_parsed(parsed_row, head_index)
10
+ title = parsed_row[:head][head_index][:title]
11
+
12
+ if title.nil? || title.end_with?(" -")
13
+ raise InvalidHeadError, "Missing title in the head #{parsed_row[:head][head_index]}"
14
+ end
15
+
16
+ title
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,77 @@
1
+ module Reading
2
+ module Parsing
3
+ module Attributes
4
+ # Transformer for the :variant item attribute.
5
+ class Variants < Attribute
6
+ using Util::HashArrayDeepFetch
7
+
8
+ # @param parsed_row [Hash] a parsed row (the intermediate hash).
9
+ # @param head_index [Integer] current item's position in the Head column.
10
+ # @return [Array<Hash>] an array of variants; see
11
+ # Config#default_config[:item][:template][:variants]
12
+ def transform_from_parsed(parsed_row, head_index)
13
+ head = parsed_row[:head][head_index]
14
+
15
+ # || [{}] in case there is no Sources column.
16
+ (parsed_row[:sources].presence || [{}])&.map { |variant|
17
+ {
18
+ format: variant[:format] || head[:format],
19
+ series: (series(head) + series(variant)).presence,
20
+ sources: sources(variant) || sources(head),
21
+ isbn: variant[:isbn] || variant[:asin],
22
+ length: Attributes::Shared.length(variant) ||
23
+ Attributes::Shared.length(parsed_row[:length]),
24
+ extra_info: Array(head[:extra_info]) + Array(variant[:extra_info]),
25
+ }.map { |k, v| [k, v || template.fetch(k)] }.to_h
26
+ }&.compact&.presence
27
+ end
28
+
29
+ # A shortcut to the variant template.
30
+ # @return [Hash]
31
+ def template
32
+ config.deep_fetch(:item, :template, :variants).first
33
+ end
34
+
35
+ # The :series sub-attribute for the given parsed hash.
36
+ # @param hash [Hash] any parsed hash that contains :series_names and :series_volumes.
37
+ # @return [Array<Hash>]
38
+ def series(hash)
39
+ (hash[:series_names] || [])
40
+ .zip(hash[:series_volumes] || [])
41
+ .map { |name, volume|
42
+ { name:, volume: Integer(volume, exception: false) }
43
+ }
44
+ end
45
+
46
+ # The :sources sub-attribute for the given parsed hash.
47
+ # @param hash [Hash] any parsed hash that contains :sources.
48
+ # @return [Array<Hash>]
49
+ def sources(hash)
50
+ hash[:sources]&.map { |source|
51
+ if source.match?(/\Ahttps?:\/\//)
52
+ { name: url_name(source), url: source }
53
+ else
54
+ { name: source, url: nil }
55
+ end
56
+ }
57
+ end
58
+
59
+ # The name for the given URL string, according to
60
+ # config[:source_names_from_urls], or nil.
61
+ # @param url [String] a URL.
62
+ # @return [String, nil]
63
+ def url_name(url)
64
+ config
65
+ .fetch(:source_names_from_urls)
66
+ .each do |url_part, name|
67
+ if url.include?(url_part)
68
+ return name
69
+ end
70
+ end
71
+
72
+ nil
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,112 @@
1
+ # Used throughout, in other files.
2
+ require_relative "../util/blank"
3
+ require_relative "../util/string_remove"
4
+ require_relative "../util/string_truncate"
5
+ require_relative "../util/numeric_to_i_if_whole"
6
+ require_relative "../util/hash_deep_merge"
7
+ require_relative "../util/hash_array_deep_fetch"
8
+ require_relative "../util/hash_compact_by_template"
9
+ require_relative "../errors"
10
+
11
+ # Used just here.
12
+ require_relative "../config"
13
+ require_relative "../item"
14
+ require_relative "parser"
15
+ require_relative "transformer"
16
+
17
+ module Reading
18
+ module Parsing
19
+ #
20
+ # Validates a path or stream (string, file, etc.) of a CSV reading log, then
21
+ # parses it into an array of Items.
22
+ #
23
+ # Parsing happens in two steps:
24
+ # (1) Parse a row string into an intermediate hash representing the columns.
25
+ # - See parsing/parser.rb, which uses parsing/rows/*
26
+ # (2) Transform the intermediate hash into an array of hashes structured
27
+ # around item attributes rather than CSV columns.
28
+ # - See parsing/transformer.rb, which uses parsing/attributes/*
29
+ #
30
+ # Keeping these steps separate makes the code easier to understand. It was
31
+ # inspired by the Parslet gem: https://kschiess.github.io/parslet/transform.html
32
+ #
33
+ class CSV
34
+ private attr_reader :parser, :transformer, :hash_output, :item_view
35
+
36
+ # Validates a path or stream (string, file, etc.) of a CSV reading log,
37
+ # builds the config, and initializes the parser and transformer.
38
+ # @param path [String] path to the CSV file; used if no stream is given.
39
+ # @param stream [Object] an object responding to #each_linewith CSV row(s);
40
+ # if nil, path is used instead.
41
+ # @param config [Hash] a custom config which overrides the defaults,
42
+ # e.g. { errors: { styling: :html } }
43
+ # @param hash_output [Boolean] whether an array of raw Hashes should be
44
+ # returned, without Items being created from them.
45
+ # @param view [Class, nil, Boolean] the class that will be used to build
46
+ # each Item's view object, or nil/false if no view object should be built.
47
+ # If you use a custom view class, the only requirement is that its
48
+ # #initialize take an Item and a full config as arguments.
49
+ def initialize(path = nil, stream: nil, config: {}, hash_output: false, item_view: Item::View)
50
+ validate_path_or_stream(path, stream)
51
+ full_config = Config.new(config).hash
52
+
53
+ @path = path
54
+ @stream = stream
55
+ @hash_output = hash_output
56
+ @item_view = item_view
57
+ @parser = Parser.new(full_config)
58
+ @transformer = Transformer.new(full_config)
59
+ end
60
+
61
+ # Parses and transforms the reading log into item data.
62
+ # @return [Array<Item>] an array of Items like the template in
63
+ # Config#default_config[:item][:template]. The Items are identical in
64
+ # structure to that Hash (with every inner Hash replaced by a Data for
65
+ # dot access).
66
+ def parse
67
+ input = @stream || File.open(@path)
68
+ items = []
69
+
70
+ input.each_line do |line|
71
+ begin
72
+ intermediate = parser.parse_row_to_intermediate_hash(line)
73
+ next if intermediate.empty? # When the row is blank or a comment.
74
+ row_items = transformer.transform_intermediate_hash_to_item_hashes(intermediate)
75
+ rescue Reading::Error => e
76
+ raise e.class, "#{e.message} in the row \"#{line}\""
77
+ end
78
+
79
+ items += row_items
80
+ end
81
+
82
+ if hash_output
83
+ items
84
+ else
85
+ items.map { |item_hash| Item.new(item_hash, view: item_view) }
86
+ end
87
+ ensure
88
+ input&.close if input.respond_to?(:close)
89
+ end
90
+
91
+ private
92
+
93
+ # Checks on the given stream and path (arguments to #initialize).
94
+ # @raise [FileError] if the given path is invalid.
95
+ # @raise [ArgumentError] if both stream and path are nil.
96
+ def validate_path_or_stream(path, stream)
97
+ if stream && stream.respond_to?(:each_line)
98
+ return true
99
+ elsif path
100
+ if !File.exist?(path)
101
+ raise FileError, "File not found! #{path}"
102
+ elsif File.directory?(path)
103
+ raise FileError, "A file is expected, but the path given is a directory: #{path}"
104
+ end
105
+ else
106
+ raise ArgumentError,
107
+ "Either a file path or a stream (string, file, etc.) must be provided."
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,292 @@
1
+ require_relative "rows/regular"
2
+ require_relative "rows/compact_planned"
3
+ require_relative "rows/comment"
4
+
5
+ module Reading
6
+ module Parsing
7
+ #
8
+ # Parses a string containing a row of a CSV reading log, into a hash
9
+ # mirroring the structure of the row. This hash is an intermediate form and
10
+ # not the final item data. It's the raw material for Parsing::Transformer to
11
+ # generate the final item data.
12
+ #
13
+ # Below is an example intermediate hash parsed from this row, which has a Rating
14
+ # column, then a Head column containing an author, title, series, and extra info:
15
+ #
16
+ # 3|📕Thomas More - Utopia -- trans. Robert Adams -- ed. George Logan -- in Cambridge History of Political Thought
17
+ #
18
+ # {
19
+ # rating: { number: "1" },
20
+ # head: [{
21
+ # author: "Thomas More",
22
+ # title: "Utopia",
23
+ # series_names: ["Cambridge History of Political Thought"],
24
+ # series_volumes: [nil],
25
+ # extra_info: ["trans. Robert Adams", "ed. George Logan"],
26
+ # format: :print,
27
+ # }]
28
+ # }
29
+ #
30
+ # The hash's top-level keys are column names. The nested keys come from
31
+ # regex capture group names in each column (for this example, see ::regexes
32
+ # in rating.rb and head.rb in parsing/rows/regular_columns).
33
+ #
34
+ # All the rest is just details of how the parts of a column are joined:
35
+ #
36
+ # - The :head value is an array because Head.split_by_format? is
37
+ # true (because a Head column can potentially contain multiple items).
38
+ # That's also where { format: :print } comes from.
39
+ #
40
+ # - The :series_names and :series_volumes values are arrays because these
41
+ # keys are in Head.flatten_into_arrays, which causes the column's segments
42
+ # (separated by " -- ") to be merged into one hash.
43
+ #
44
+ class Parser
45
+ using Util::HashArrayDeepFetch
46
+ using Util::StringRemove
47
+
48
+ attr_reader :config
49
+
50
+ # @param config [Hash] an entire config.
51
+ def initialize(config)
52
+ @config = config
53
+ end
54
+
55
+ # Parses a row string into a hash that mirrors the structure of the row.
56
+ # @param string [String] a string containing a row of a CSV reading log.
57
+ # @return [Hash]
58
+ def parse_row_to_intermediate_hash(string)
59
+ columns = extract_columns(string)
60
+
61
+ if config.fetch(:skip_compact_planned) && columns.has_key?(Rows::CompactPlanned::Head)
62
+ return {}
63
+ end
64
+
65
+ columns.map { |column, column_string|
66
+ parse_column(column, column_string)
67
+ }.to_h
68
+ end
69
+
70
+ private
71
+
72
+ # Splits the row string by column and pairs them in a hash with column
73
+ # classes, which contain the information necessary to parse each column.
74
+ # @param string [String] a string containing a row of a CSV reading log.
75
+ # @return [Hash{Class => String}] a hash whose keys are classes inheriting
76
+ # Parsing::Rows::Column.
77
+ def extract_columns(string)
78
+ string = string.dup.force_encoding(Encoding::UTF_8)
79
+ column_strings = string.split(config.fetch(:column_separator))
80
+
81
+ row_types = [Rows::Regular, Rows::CompactPlanned, Rows::Comment]
82
+ column_classes = row_types
83
+ .find { |row_type| row_type.match?(string, config) }
84
+ .column_classes
85
+ .filter { |column_class|
86
+ config.fetch(:enabled_columns).include?(column_class.to_sym)
87
+ }
88
+
89
+ if !column_classes.count.zero? && column_strings.count > column_classes.count
90
+ raise TooManyColumnsError, "Too many columns"
91
+ end
92
+
93
+ column_classes
94
+ .zip(column_strings)
95
+ .reject { |_class, string| string.nil? }
96
+ .to_h
97
+ end
98
+
99
+ # Parses a column into an array of two elements (a key for the column name
100
+ # and a value of its contents).
101
+ # @param column_class [Class] a class inheriting Parsing::Rows::Column.
102
+ # @param column_string [String] a string containing a column from a row.
103
+ # @return [Array(Symbol, Hash), Array(Symbol, Array)]
104
+ def parse_column(column_class, column_string)
105
+ # Multiple format emojis are possible in some columns:
106
+ # - Head column, for multiple items.
107
+ # - Sources column, for multiple variants of an item.
108
+ # - Compact planned head column, for multiple items.
109
+ # This is the default case below the two guard clauses. It's more complex
110
+ # because there's possibly a string before the first format, and there's
111
+ # an extra level of nesting in the returned array.
112
+
113
+ # Simplest case: if the column is never split by format, return the
114
+ # column name and the parsed segment(s), which is either a Hash (if the
115
+ # column can't have multiple segments or if its segments are flattened)
116
+ # or an Array (if there are multiple segments and they're not flattened).
117
+ if !column_class.split_by_format?
118
+ parsed_column = parse_segments(column_class, column_string)
119
+ return [column_class.to_sym, parsed_column]
120
+ end
121
+
122
+ # Also simple: if the column *can* be split by format but in this row
123
+ # it doesn't contain any format emojis, return the same as above but
124
+ # with an extra level of nesting (except when the parsed result is nil).
125
+ if column_class.split_by_format? &&
126
+ !column_string.match?(config.deep_fetch(:regex, :formats))
127
+
128
+ parsed_column = parse_segments(column_class, column_string)
129
+ # Wrap a non-empty value in an array so that e.g. a head without
130
+ # emojis is still an array. This way the extra level of nesting can
131
+ # be consistently expected for columns that *can* be split by format.
132
+ parsed_column_nonempty_nested = [parsed_column.presence].compact
133
+ return [column_class.to_sym, parsed_column_nonempty_nested]
134
+ end
135
+
136
+ # The rest is the complex case: if the column *can and is* split by format.
137
+
138
+ # Each format plus the string after it.
139
+ format_strings = column_string.split(config.deep_fetch(:regex, :formats_split))
140
+
141
+ # If there's a string before the first format, e.g. "DNF" in Head column.
142
+ unless format_strings.first.match?(config.deep_fetch(:regex, :formats))
143
+ before_formats = parse_segment(column_class, format_strings.shift, before_formats: true)
144
+ end
145
+
146
+ # Parse each format-plus-string into an array of segments.
147
+ heads = format_strings.map { |string|
148
+ format_emoji = string[config.deep_fetch(:regex, :formats)]
149
+ string.remove!(format_emoji)
150
+ format = config.fetch(:formats).key(format_emoji)
151
+
152
+ parse_segments(column_class, string)
153
+ .merge(format: format)
154
+ }
155
+
156
+ # Combine values of conflicting keys so that in a compact planned
157
+ # Head column, sources from before_formats are not ignored.
158
+ if before_formats
159
+ heads.each do |head|
160
+ head.merge!(before_formats) do |k, old_v, new_v|
161
+ (new_v + old_v).uniq
162
+ end
163
+ end
164
+ end
165
+
166
+ [column_class.to_sym, heads]
167
+ end
168
+
169
+ # Parses a string of segments, e.g. "Utopia -- trans. Robert Adams -- ed. George Logan"
170
+ # @param column_class [Class] a class inheriting Parsing::Rows::Column.
171
+ # @param string [String] a string containing segments, which is either an
172
+ # entire column or (for columns that are split by format emoji) a string
173
+ # following a format emoji.
174
+ # @return [Array<Hash>, Hash] either an array of parsed segments (hashes),
175
+ # or a single hash if the column can't be split by segment or if the
176
+ # segments are flattened into one hash.
177
+ def parse_segments(column_class, string)
178
+ return {} if string.blank?
179
+
180
+ # If the column can't be split by segment, parse as a single segment.
181
+ if !column_class.split_by_segment?
182
+ return parse_segment(column_class, string)
183
+ end
184
+
185
+ # Add an extra level of nesting if the column can have segment groups,
186
+ # as in "2021/1/28..2/1 x4 -- ..2/3 x5 ---- 11/1 -- 11/2"
187
+ if column_class.split_by_segment_group?
188
+ segments = string
189
+ .split(column_class.segment_group_separator)
190
+ .map { |segment_group|
191
+ segment_group
192
+ .split(column_class.segment_separator)
193
+ .map.with_index { |segment, i|
194
+ parse_segment(column_class, segment, i)
195
+ }
196
+ }
197
+ else
198
+ segments = string
199
+ .split(column_class.segment_separator)
200
+ .map.with_index { |segment, i|
201
+ parse_segment(column_class, segment, i)
202
+ }
203
+ end
204
+
205
+ if column_class.flatten_into_arrays.any?
206
+ segments = segments.reduce { |merged, segment|
207
+ merged.merge!(segment) { |_k, old_v, new_v|
208
+ # old_v is already an array by this point, since its key should be
209
+ # in Column.flatten_into_arrays
210
+ old_v + new_v
211
+ }
212
+ }
213
+ end
214
+
215
+ segments
216
+ end
217
+
218
+ # Parses a segment using a regular expression from the column class.
219
+ # @param column_class [Class] a class inheriting Parsing::Rows::Column.
220
+ # @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
221
+ # @param segment_index [Integer] the position of the segment when it's in
222
+ # part of a series of segments; this can change which regular expressions
223
+ # are applicable to it.
224
+ # @param before_formats [Boolean] whether to use the before-formats regexes.
225
+ # @return [Hash{Symbol => Object}] the parsed segment, whose values are Strings
226
+ # unless changed via column_class.tweaks or column_class.flatten_into_arrays.
227
+ # Example: { author: "Bram Stoker", title: "Dracula"}
228
+ def parse_segment(column_class, segment, segment_index = 0, before_formats: false)
229
+ if before_formats
230
+ regexes = column_class.regexes_before_formats
231
+ else
232
+ regexes = column_class.regexes(segment_index)
233
+ end
234
+
235
+ parsed_segment = nil
236
+ regexes.each do |regex|
237
+ parsed_segment = parse_segment_with_regex(segment, regex)
238
+ break if parsed_segment
239
+ end
240
+
241
+ if parsed_segment.nil?
242
+ raise ParsingError, "Could not parse \"#{segment}\" in " \
243
+ "the #{column_class.column_name} column"
244
+ end
245
+
246
+ tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
247
+ end
248
+
249
+ # Parses a segment using the given regular expression.
250
+ # @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
251
+ # @param regex [Regexp] the regular expression with which to parse the segment.
252
+ # @return [Hash{Symbol => String}] e.g. { author: "Bram Stoker", title: "Dracula"}
253
+ def parse_segment_with_regex(segment, regex)
254
+ segment
255
+ .tr(config.fetch(:ignored_characters), "")
256
+ .strip
257
+ .match(regex)
258
+ &.named_captures
259
+ &.compact
260
+ &.transform_keys(&:to_sym)
261
+ &.transform_values(&:strip)
262
+ &.transform_values(&:presence)
263
+ end
264
+
265
+ # Modify the values of the parsed segment according to column_class.tweaks,
266
+ # and wrap them in an array according to column_class.flatten_into_arrays.
267
+ # @param parsed_segment [Hash] e.g. { author: "Bram Stoker", title: "Dracula"}
268
+ # @return [Hash{Symbol => Object}]
269
+ def tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
270
+ column_class.tweaks.each do |key, tweak|
271
+ if parsed_segment.has_key?(key)
272
+ parsed_segment[key] = tweak.call(parsed_segment[key])
273
+ end
274
+ end
275
+
276
+ # Ensure that values of keys in column_class.flatten_into_arrays are arrays.
277
+ column_class.flatten_into_arrays.each do |key|
278
+ if parsed_segment.has_key?(key)
279
+ val = parsed_segment[key]
280
+ # Not using Array(val) because that results in an empty array when
281
+ # val is nil, and the nil must be preserved for series name and
282
+ # volume arrays to line up with an equal number of elements (because
283
+ # the volume may be nil).
284
+ parsed_segment[key] = [val] if !val.is_a?(Array)
285
+ end
286
+ end
287
+
288
+ parsed_segment
289
+ end
290
+ end
291
+ end
292
+ end