reading 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/reading +5 -5
- data/bin/readingfile +31 -0
- data/lib/reading/config.rb +115 -149
- data/lib/reading/errors.rb +10 -66
- data/lib/reading/item/time_length.rb +138 -0
- data/lib/reading/parsing/attributes/attribute.rb +26 -0
- data/lib/reading/parsing/attributes/author.rb +15 -0
- data/lib/reading/parsing/attributes/experiences/dates_and_head_transformer.rb +106 -0
- data/lib/reading/parsing/attributes/experiences/history_transformer.rb +452 -0
- data/lib/reading/parsing/attributes/experiences/spans_validator.rb +149 -0
- data/lib/reading/parsing/attributes/experiences.rb +27 -0
- data/lib/reading/parsing/attributes/genres.rb +16 -0
- data/lib/reading/parsing/attributes/notes.rb +22 -0
- data/lib/reading/parsing/attributes/rating.rb +17 -0
- data/lib/reading/parsing/attributes/shared.rb +62 -0
- data/lib/reading/parsing/attributes/title.rb +21 -0
- data/lib/reading/parsing/attributes/variants.rb +77 -0
- data/lib/reading/parsing/csv.rb +101 -0
- data/lib/reading/parsing/parser.rb +292 -0
- data/lib/reading/parsing/rows/column.rb +131 -0
- data/lib/reading/parsing/rows/comment.rb +26 -0
- data/lib/reading/parsing/rows/compact_planned.rb +30 -0
- data/lib/reading/parsing/rows/compact_planned_columns/head.rb +60 -0
- data/lib/reading/parsing/rows/regular.rb +33 -0
- data/lib/reading/parsing/rows/regular_columns/end_dates.rb +20 -0
- data/lib/reading/parsing/rows/regular_columns/genres.rb +20 -0
- data/lib/reading/parsing/rows/regular_columns/head.rb +45 -0
- data/lib/reading/parsing/rows/regular_columns/history.rb +143 -0
- data/lib/reading/parsing/rows/regular_columns/length.rb +35 -0
- data/lib/reading/parsing/rows/regular_columns/notes.rb +32 -0
- data/lib/reading/parsing/rows/regular_columns/rating.rb +15 -0
- data/lib/reading/parsing/rows/regular_columns/sources.rb +94 -0
- data/lib/reading/parsing/rows/regular_columns/start_dates.rb +35 -0
- data/lib/reading/parsing/transformer.rb +70 -0
- data/lib/reading/util/hash_compact_by_template.rb +1 -0
- data/lib/reading/util/hash_deep_merge.rb +1 -1
- data/lib/reading/util/hash_to_struct.rb +1 -0
- data/lib/reading/util/numeric_to_i_if_whole.rb +12 -0
- data/lib/reading/util/string_truncate.rb +13 -4
- data/lib/reading/version.rb +1 -1
- data/lib/reading.rb +18 -0
- metadata +58 -41
- data/lib/reading/attribute/all_attributes.rb +0 -83
- data/lib/reading/attribute/attribute.rb +0 -25
- data/lib/reading/attribute/experiences/dates_validator.rb +0 -94
- data/lib/reading/attribute/experiences/experiences_attribute.rb +0 -74
- data/lib/reading/attribute/experiences/progress_subattribute.rb +0 -48
- data/lib/reading/attribute/experiences/spans_subattribute.rb +0 -82
- data/lib/reading/attribute/variants/extra_info_subattribute.rb +0 -44
- data/lib/reading/attribute/variants/length_subattribute.rb +0 -45
- data/lib/reading/attribute/variants/series_subattribute.rb +0 -57
- data/lib/reading/attribute/variants/sources_subattribute.rb +0 -78
- data/lib/reading/attribute/variants/variants_attribute.rb +0 -69
- data/lib/reading/csv.rb +0 -76
- data/lib/reading/line.rb +0 -23
- data/lib/reading/row/blank_row.rb +0 -23
- data/lib/reading/row/compact_planned_row.rb +0 -130
- data/lib/reading/row/regular_row.rb +0 -99
- data/lib/reading/row/row.rb +0 -88
@@ -0,0 +1,292 @@
|
|
1
|
+
require_relative "rows/regular"
|
2
|
+
require_relative "rows/compact_planned"
|
3
|
+
require_relative "rows/comment"
|
4
|
+
|
5
|
+
module Reading
|
6
|
+
module Parsing
|
7
|
+
#
|
8
|
+
# Parses a string containing a row of a CSV reading log, into a hash
|
9
|
+
# mirroring the structure of the row. This hash is an intermediate form and
|
10
|
+
# not the final item data. It's the raw material for Parsing::Transformer to
|
11
|
+
# generate the final item data.
|
12
|
+
#
|
13
|
+
# Below is an example intermediate hash parsed from this row, which has a Rating
|
14
|
+
# column, then a Head column containing an author, title, series, and extra info:
|
15
|
+
#
|
16
|
+
# 3|📕Thomas More - Utopia -- trans. Robert Adams -- ed. George Logan -- in Cambridge History of Political Thought
|
17
|
+
#
|
18
|
+
# {
|
19
|
+
# rating: { number: "1" },
|
20
|
+
# head: [{
|
21
|
+
# author: "Thomas More",
|
22
|
+
# title: "Utopia",
|
23
|
+
# series_names: ["Cambridge History of Political Thought"],
|
24
|
+
# series_volumes: [nil],
|
25
|
+
# extra_info: ["trans. Robert Adams", "ed. George Logan"],
|
26
|
+
# format: :print,
|
27
|
+
# }]
|
28
|
+
# }
|
29
|
+
#
|
30
|
+
# The hash's top-level keys are column names. The nested keys come from
|
31
|
+
# regex capture group names in each column (for this example, see ::regexes
|
32
|
+
# in rating.rb and head.rb in parsing/rows/regular_columns).
|
33
|
+
#
|
34
|
+
# All the rest is just details of how the parts of a column are joined:
|
35
|
+
#
|
36
|
+
# - The :head value is an array because Head.split_by_format? is
|
37
|
+
# true (because a Head column can potentially contain multiple items).
|
38
|
+
# That's also where { format: :print } comes from.
|
39
|
+
#
|
40
|
+
# - The :series_names and :series_volumes values are arrays because these
|
41
|
+
# keys are in Head.flatten_into_arrays, which causes the column's segments
|
42
|
+
# (separated by " -- ") to be merged into one hash.
|
43
|
+
#
|
44
|
+
class Parser
|
45
|
+
using Util::HashArrayDeepFetch
|
46
|
+
using Util::StringRemove
|
47
|
+
|
48
|
+
attr_reader :config
|
49
|
+
|
50
|
+
# @param config [Hash] an entire config.
|
51
|
+
def initialize(config)
|
52
|
+
@config = config
|
53
|
+
end
|
54
|
+
|
55
|
+
# Parses a row string into a hash that mirrors the structure of the row.
|
56
|
+
# @param string [String] a string containing a row of a CSV reading log.
|
57
|
+
# @return [Hash]
|
58
|
+
def parse_row_to_intermediate_hash(string)
|
59
|
+
columns = extract_columns(string)
|
60
|
+
|
61
|
+
if config.fetch(:skip_compact_planned) && columns.has_key?(Rows::CompactPlanned::Head)
|
62
|
+
return {}
|
63
|
+
end
|
64
|
+
|
65
|
+
columns.map { |column, column_string|
|
66
|
+
parse_column(column, column_string)
|
67
|
+
}.to_h
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
# Splits the row string by column and pairs them in a hash with column
|
73
|
+
# classes, which contain the information necessary to parse each column.
|
74
|
+
# @param string [String] a string containing a row of a CSV reading log.
|
75
|
+
# @return [Hash{Class => String}] a hash whose keys are classes inheriting
|
76
|
+
# Parsing::Rows::Column.
|
77
|
+
def extract_columns(string)
|
78
|
+
clean_string = string.dup.force_encoding(Encoding::UTF_8)
|
79
|
+
column_strings = clean_string.split(config.fetch(:column_separator))
|
80
|
+
|
81
|
+
row_types = [Rows::Regular, Rows::CompactPlanned, Rows::Comment]
|
82
|
+
column_classes = row_types
|
83
|
+
.find { |row_type| row_type.match?(string, config) }
|
84
|
+
.column_classes
|
85
|
+
.filter { |column_class|
|
86
|
+
config.fetch(:enabled_columns).include?(column_class.to_sym)
|
87
|
+
}
|
88
|
+
|
89
|
+
if !column_classes.count.zero? && column_strings.count > column_classes.count
|
90
|
+
raise TooManyColumnsError, "Too many columns"
|
91
|
+
end
|
92
|
+
|
93
|
+
column_classes
|
94
|
+
.zip(column_strings)
|
95
|
+
.reject { |_class, string| string.nil? }
|
96
|
+
.to_h
|
97
|
+
end
|
98
|
+
|
99
|
+
# Parses a column into an array of two elements (a key for the column name
|
100
|
+
# and a value of its contents).
|
101
|
+
# @param column_class [Class] a class inheriting Parsing::Rows::Column.
|
102
|
+
# @param column_string [String] a string containing a column from a row.
|
103
|
+
# @return [Array(Symbol, Hash), Array(Symbol, Array)]
|
104
|
+
def parse_column(column_class, column_string)
|
105
|
+
# Multiple format emojis are possible in some columns:
|
106
|
+
# - Head column, for multiple items.
|
107
|
+
# - Sources column, for multiple variants of an item.
|
108
|
+
# - Compact planned head column, for multiple items.
|
109
|
+
# This is the default case below the two guard clauses. It's more complex
|
110
|
+
# because there's possibly a string before the first format, and there's
|
111
|
+
# an extra level of nesting in the returned array.
|
112
|
+
|
113
|
+
# Simplest case: if the column is never split by format, return the
|
114
|
+
# column name and the parsed segment(s), which is either a Hash (if the
|
115
|
+
# column can't have multiple segments or if its segments are flattened)
|
116
|
+
# or an Array (if there are multiple segments and they're not flattened).
|
117
|
+
if !column_class.split_by_format?
|
118
|
+
parsed_column = parse_segments(column_class, column_string)
|
119
|
+
return [column_class.to_sym, parsed_column]
|
120
|
+
end
|
121
|
+
|
122
|
+
# Also simple: if the column *can* be split by format but in this row
|
123
|
+
# it doesn't contain any format emojis, return the same as above but
|
124
|
+
# with an extra level of nesting (except when the parsed result is nil).
|
125
|
+
if column_class.split_by_format? &&
|
126
|
+
!column_string.match?(config.deep_fetch(:regex, :formats))
|
127
|
+
|
128
|
+
parsed_column = parse_segments(column_class, column_string)
|
129
|
+
# Wrap a non-empty value in an array so that e.g. a head without
|
130
|
+
# emojis is still an array. This way the extra level of nesting can
|
131
|
+
# be consistently expected for columns that *can* be split by format.
|
132
|
+
parsed_column_nonempty_nested = [parsed_column.presence].compact
|
133
|
+
return [column_class.to_sym, parsed_column_nonempty_nested]
|
134
|
+
end
|
135
|
+
|
136
|
+
# The rest is the complex case: if the column *can and is* split by format.
|
137
|
+
|
138
|
+
# Each format plus the string after it.
|
139
|
+
format_strings = column_string.split(config.deep_fetch(:regex, :formats_split))
|
140
|
+
|
141
|
+
# If there's a string before the first format, e.g. "DNF" in Head column.
|
142
|
+
unless format_strings.first.match?(config.deep_fetch(:regex, :formats))
|
143
|
+
before_formats = parse_segment(column_class, format_strings.shift, before_formats: true)
|
144
|
+
end
|
145
|
+
|
146
|
+
# Parse each format-plus-string into an array of segments.
|
147
|
+
heads = format_strings.map { |string|
|
148
|
+
format_emoji = string[config.deep_fetch(:regex, :formats)]
|
149
|
+
string.remove!(format_emoji)
|
150
|
+
format = config.fetch(:formats).key(format_emoji)
|
151
|
+
|
152
|
+
parse_segments(column_class, string)
|
153
|
+
.merge(format: format)
|
154
|
+
}
|
155
|
+
|
156
|
+
# Combine values of conflicting keys so that in a compact planned
|
157
|
+
# Head column, sources from before_formats are not ignored.
|
158
|
+
if before_formats
|
159
|
+
heads.each do |head|
|
160
|
+
head.merge!(before_formats) do |k, old_v, new_v|
|
161
|
+
(new_v + old_v).uniq
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
[column_class.to_sym, heads]
|
167
|
+
end
|
168
|
+
|
169
|
+
# Parses a string of segments, e.g. "Utopia -- trans. Robert Adams -- ed. George Logan"
|
170
|
+
# @param column_class [Class] a class inheriting Parsing::Rows::Column.
|
171
|
+
# @param string [String] a string containing segments, which is either an
|
172
|
+
# entire column or (for columns that are split by format emoji) a string
|
173
|
+
# following a format emoji.
|
174
|
+
# @return [Array<Hash>, Hash] either an array of parsed segments (hashes),
|
175
|
+
# or a single hash if the column can't be split by segment or if the
|
176
|
+
# segments are flattened into one hash.
|
177
|
+
def parse_segments(column_class, string)
|
178
|
+
return {} if string.blank?
|
179
|
+
|
180
|
+
# If the column can't be split by segment, parse as a single segment.
|
181
|
+
if !column_class.split_by_segment?
|
182
|
+
return parse_segment(column_class, string)
|
183
|
+
end
|
184
|
+
|
185
|
+
# Add an extra level of nesting if the column can have segment groups,
|
186
|
+
# as in "2021/1/28..2/1 x4 -- ..2/3 x5 ---- 11/1 -- 11/2"
|
187
|
+
if column_class.split_by_segment_group?
|
188
|
+
segments = string
|
189
|
+
.split(column_class.segment_group_separator)
|
190
|
+
.map { |segment_group|
|
191
|
+
segment_group
|
192
|
+
.split(column_class.segment_separator)
|
193
|
+
.map.with_index { |segment, i|
|
194
|
+
parse_segment(column_class, segment, i)
|
195
|
+
}
|
196
|
+
}
|
197
|
+
else
|
198
|
+
segments = string
|
199
|
+
.split(column_class.segment_separator)
|
200
|
+
.map.with_index { |segment, i|
|
201
|
+
parse_segment(column_class, segment, i)
|
202
|
+
}
|
203
|
+
end
|
204
|
+
|
205
|
+
if column_class.flatten_into_arrays.any?
|
206
|
+
segments = segments.reduce { |merged, segment|
|
207
|
+
merged.merge!(segment) { |_k, old_v, new_v|
|
208
|
+
# old_v is already an array by this point, since its key should be
|
209
|
+
# in Column.flatten_into_arrays
|
210
|
+
old_v + new_v
|
211
|
+
}
|
212
|
+
}
|
213
|
+
end
|
214
|
+
|
215
|
+
segments
|
216
|
+
end
|
217
|
+
|
218
|
+
# Parses a segment using a regular expression from the column class.
|
219
|
+
# @param column_class [Class] a class inheriting Parsing::Rows::Column.
|
220
|
+
# @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
|
221
|
+
# @param segment_index [Integer] the position of the segment when it's in
|
222
|
+
# part of a series of segments; this can change which regular expressions
|
223
|
+
# are applicable to it.
|
224
|
+
# @param before_formats [Boolean] whether to use the before-formats regexes.
|
225
|
+
# @return [Hash{Symbol => Object}] the parsed segment, whose values are Strings
|
226
|
+
# unless changed via column_class.tweaks or column_class.flatten_into_arrays.
|
227
|
+
# Example: { author: "Bram Stoker", title: "Dracula"}
|
228
|
+
def parse_segment(column_class, segment, segment_index = 0, before_formats: false)
|
229
|
+
if before_formats
|
230
|
+
regexes = column_class.regexes_before_formats
|
231
|
+
else
|
232
|
+
regexes = column_class.regexes(segment_index)
|
233
|
+
end
|
234
|
+
|
235
|
+
parsed_segment = nil
|
236
|
+
regexes.each do |regex|
|
237
|
+
parsed_segment = parse_segment_with_regex(segment, regex)
|
238
|
+
break if parsed_segment
|
239
|
+
end
|
240
|
+
|
241
|
+
if parsed_segment.nil?
|
242
|
+
raise ParsingError, "Could not parse \"#{segment}\" in " \
|
243
|
+
"the #{column_class.column_name} column"
|
244
|
+
end
|
245
|
+
|
246
|
+
tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
|
247
|
+
end
|
248
|
+
|
249
|
+
# Parses a segment using the given regular expression.
|
250
|
+
# @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
|
251
|
+
# @param regex [Regexp] the regular expression with which to parse the segment.
|
252
|
+
# @return [Hash{Symbol => String}] e.g. { author: "Bram Stoker", title: "Dracula"}
|
253
|
+
def parse_segment_with_regex(segment, regex)
|
254
|
+
segment
|
255
|
+
.tr(config.fetch(:ignored_characters), "")
|
256
|
+
.strip
|
257
|
+
.match(regex)
|
258
|
+
&.named_captures
|
259
|
+
&.compact
|
260
|
+
&.transform_keys(&:to_sym)
|
261
|
+
&.transform_values(&:strip)
|
262
|
+
&.transform_values(&:presence)
|
263
|
+
end
|
264
|
+
|
265
|
+
# Modify the values of the parsed segment according to column_class.tweaks,
|
266
|
+
# and wrap them in an array according to column_class.flatten_into_arrays.
|
267
|
+
# @param parsed_segment [Hash] e.g. { author: "Bram Stoker", title: "Dracula"}
|
268
|
+
# @return [Hash{Symbol => Object}]
|
269
|
+
def tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
|
270
|
+
column_class.tweaks.each do |key, tweak|
|
271
|
+
if parsed_segment.has_key?(key)
|
272
|
+
parsed_segment[key] = tweak.call(parsed_segment[key])
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
# Ensure that values of keys in column_class.flatten_into_arrays are arrays.
|
277
|
+
column_class.flatten_into_arrays.each do |key|
|
278
|
+
if parsed_segment.has_key?(key)
|
279
|
+
val = parsed_segment[key]
|
280
|
+
# Not using Array(val) because that results in an empty array when
|
281
|
+
# val is nil, and the nil must be preserved for series name and
|
282
|
+
# volume arrays to line up with an equal number of elements (because
|
283
|
+
# the volume may be nil).
|
284
|
+
parsed_segment[key] = [val] if !val.is_a?(Array)
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
parsed_segment
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
# The base class for all the columns in parsing/rows/compact_planned_columns
|
5
|
+
# and parsing/rows/regular_columns.
|
6
|
+
class Column
|
7
|
+
# The class name changed into a string, e.g. StartDates => "Start Dates"
|
8
|
+
# @return [String]
|
9
|
+
def self.column_name
|
10
|
+
class_name = name.split("::").last
|
11
|
+
class_name.gsub(/(.)([A-Z])/,'\1 \2')
|
12
|
+
end
|
13
|
+
|
14
|
+
# The class name changed into a symbol, e.g. StartDates => :start_dates
|
15
|
+
# @return [Symbol]
|
16
|
+
def self.to_sym
|
17
|
+
class_name = name.split("::").last
|
18
|
+
class_name
|
19
|
+
.gsub(/(.)([A-Z])/,'\1_\2')
|
20
|
+
.downcase
|
21
|
+
.to_sym
|
22
|
+
end
|
23
|
+
|
24
|
+
# Whether the column can contain "chunks" each set off by a format emoji.
|
25
|
+
# For example, the Head column of a compact planned row typically
|
26
|
+
# contains a list of multiple items. (The two others are the Sources
|
27
|
+
# column, for multiple variants of an item; and the regular Head column,
|
28
|
+
# for multiple items.)
|
29
|
+
# @return [Boolean]
|
30
|
+
def self.split_by_format?
|
31
|
+
false
|
32
|
+
end
|
33
|
+
|
34
|
+
# Whether the column can contain multiple segments, e.g. "Cosmos -- 2013 paperback"
|
35
|
+
# @return [Boolean]
|
36
|
+
def self.split_by_segment?
|
37
|
+
!!segment_separator
|
38
|
+
end
|
39
|
+
|
40
|
+
# The regular expression used to split segments (e.g. /\s*--\s*/),
|
41
|
+
# or nil if the column should not be split by segment.
|
42
|
+
# @return [Regexp, nil]
|
43
|
+
def self.segment_separator
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
|
47
|
+
# Whether the column can contain multiple segment groups, e.g.
|
48
|
+
# "2021/1/28..2/1 x4 -- ..2/3 x5 ---- 11/1 -- 11/2"
|
49
|
+
# @return [Boolean]
|
50
|
+
def self.split_by_segment_group?
|
51
|
+
!!segment_group_separator
|
52
|
+
end
|
53
|
+
|
54
|
+
# The regular expression used to split segment groups (e.g. /\s*----\s*/),
|
55
|
+
# or nil if the column should not be split by segment group.
|
56
|
+
# @return [Regexp, nil]
|
57
|
+
def self.segment_group_separator
|
58
|
+
nil
|
59
|
+
end
|
60
|
+
|
61
|
+
# Adjustments that are made to captured values at the end of parsing
|
62
|
+
# the column. For example, if ::regexes includes a capture group named
|
63
|
+
# "sources" and it needs to be split by commas:
|
64
|
+
# { sources: -> { _1.split(/\s*,\s*/) } }
|
65
|
+
# @return [Hash{Symbol => Proc}]
|
66
|
+
def self.tweaks
|
67
|
+
{}
|
68
|
+
end
|
69
|
+
|
70
|
+
# Keys in the parsed output hash that should be converted to an array, even
|
71
|
+
# if only one value was in the input, as in { ... extra_info: ["ed. Jane Doe"] }
|
72
|
+
# @return [Array<Symbol>]
|
73
|
+
def self.flatten_into_arrays
|
74
|
+
[]
|
75
|
+
end
|
76
|
+
|
77
|
+
# The regular expressions used to parse the column (except the part of
|
78
|
+
# the column before the first format emoji, which is in
|
79
|
+
# ::regexes_before_formats below). An array because sometimes it's
|
80
|
+
# simpler to try several smaller regular expressions in series, and
|
81
|
+
# because a regular expression might be applicable only for segments in
|
82
|
+
# a certain position. See parsing/rows/regular_columns/head.rb for an example.
|
83
|
+
# @param segment_index [Integer] the position of the current segment.
|
84
|
+
# @return [Array<Regexp>]
|
85
|
+
def self.regexes(segment_index)
|
86
|
+
[]
|
87
|
+
end
|
88
|
+
|
89
|
+
# The regular expressions used to parse the part of the column before
|
90
|
+
# the first format emoji.
|
91
|
+
# @return [Array<Regexp>]
|
92
|
+
def self.regexes_before_formats
|
93
|
+
[]
|
94
|
+
end
|
95
|
+
|
96
|
+
# Regular expressions that are shared across more than one column,
|
97
|
+
# placed here just to be DRY.
|
98
|
+
SHARED_REGEXES = {
|
99
|
+
progress: %r{
|
100
|
+
(DNF\s+)?(?<progress_percent>\d\d?)%
|
101
|
+
|
|
102
|
+
(DNF\s+)?p?(?<progress_pages>\d+)p?
|
103
|
+
|
|
104
|
+
(DNF\s+)?(?<progress_time>\d+:\d\d)
|
105
|
+
|
|
106
|
+
# just DNF
|
107
|
+
(?<progress_dnf>DNF)
|
108
|
+
}x,
|
109
|
+
series_and_extra_info: [
|
110
|
+
# just series
|
111
|
+
%r{\A
|
112
|
+
in\s(?<series_names>.+)
|
113
|
+
# empty volume so that names and volumes have equal sizes when turned into arrays
|
114
|
+
(?<series_volumes>)
|
115
|
+
\z}x,
|
116
|
+
# series and volume
|
117
|
+
%r{\A
|
118
|
+
(?<series_names>.+?)
|
119
|
+
,?\s*
|
120
|
+
\#(?<series_volumes>\d+)
|
121
|
+
\z}x,
|
122
|
+
# extra info
|
123
|
+
%r{\A
|
124
|
+
(?<extra_info>.+)
|
125
|
+
\z}x,
|
126
|
+
],
|
127
|
+
}.freeze
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
# A row that is a comment.
|
5
|
+
module Comment
|
6
|
+
using Util::HashArrayDeepFetch
|
7
|
+
|
8
|
+
# No columns; comments are parsed as if the row were blank.
|
9
|
+
# @return [Array]
|
10
|
+
def self.column_classes
|
11
|
+
[]
|
12
|
+
end
|
13
|
+
|
14
|
+
# Starts with a comment character and does not include any format emojis.
|
15
|
+
# (Commented rows that DO include format emojis are matched as compact
|
16
|
+
# planned rows.)
|
17
|
+
# @param row_string [String]
|
18
|
+
# @param config [Hash]
|
19
|
+
# @return [Boolean]
|
20
|
+
def self.match?(row_string, config)
|
21
|
+
row_string.lstrip.start_with?(config.fetch(:comment_character))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require_relative "column"
|
2
|
+
require_relative "compact_planned_columns/head"
|
3
|
+
require_relative "regular_columns/sources"
|
4
|
+
|
5
|
+
module Reading
|
6
|
+
module Parsing
|
7
|
+
module Rows
|
8
|
+
# A row that contains compact planned items.
|
9
|
+
module CompactPlanned
|
10
|
+
using Util::HashArrayDeepFetch
|
11
|
+
|
12
|
+
# The columns that are possible in this type of row.
|
13
|
+
# @return [Array<Class>]
|
14
|
+
def self.column_classes
|
15
|
+
[CompactPlanned::Head, Regular::Sources]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Starts with a comment character and includes one or more format emojis.
|
19
|
+
# @param row_string [String]
|
20
|
+
# @param config [Hash]
|
21
|
+
# @return [Boolean]
|
22
|
+
def self.match?(row_string, config)
|
23
|
+
row_string.lstrip.start_with?(config.fetch(:comment_character)) &&
|
24
|
+
row_string.match?(config.deep_fetch(:regex, :formats)) &&
|
25
|
+
row_string.count(config.fetch(:column_separator)) <= column_classes.count - 1
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module CompactPlanned
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#compact-planned-items
|
6
|
+
# and the sections following.
|
7
|
+
class Head < Column
|
8
|
+
def self.split_by_format?
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.regexes_before_formats
|
13
|
+
[
|
14
|
+
%r{\A
|
15
|
+
\\ # comment character
|
16
|
+
\s*
|
17
|
+
(
|
18
|
+
(?<genres>[^a-z]+)?
|
19
|
+
\s*
|
20
|
+
(?<sources>@.+)?
|
21
|
+
\s*:
|
22
|
+
)?
|
23
|
+
\z}x,
|
24
|
+
]
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.segment_separator
|
28
|
+
/\s*--\s*/
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.flatten_into_arrays
|
32
|
+
%i[extra_info series_names series_volumes]
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.tweaks
|
36
|
+
{
|
37
|
+
genres: -> { _1.downcase.split(/\s*,\s*/) },
|
38
|
+
sources: -> { _1.split(/\s*@/).map(&:presence).compact }
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.regexes(segment_index)
|
43
|
+
[
|
44
|
+
# author, title, sources
|
45
|
+
(%r{\A
|
46
|
+
(
|
47
|
+
(?<author>[^@]+?)
|
48
|
+
\s+-\s+
|
49
|
+
)?
|
50
|
+
(?<title>[^@]+)
|
51
|
+
(?<sources>@.+)?
|
52
|
+
\z}x if segment_index.zero?),
|
53
|
+
*Column::SHARED_REGEXES[:series_and_extra_info],
|
54
|
+
].compact
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require_relative "column"
|
2
|
+
require_relative "regular_columns/rating"
|
3
|
+
require_relative "regular_columns/head"
|
4
|
+
require_relative "regular_columns/sources"
|
5
|
+
require_relative "regular_columns/start_dates"
|
6
|
+
require_relative "regular_columns/end_dates"
|
7
|
+
require_relative "regular_columns/genres"
|
8
|
+
require_relative "regular_columns/length"
|
9
|
+
require_relative "regular_columns/notes"
|
10
|
+
require_relative "regular_columns/history"
|
11
|
+
|
12
|
+
module Reading
|
13
|
+
module Parsing
|
14
|
+
module Rows
|
15
|
+
# A normal row of (usually) one item.
|
16
|
+
module Regular
|
17
|
+
# The columns that are possible in this type of row.
|
18
|
+
# @return [Array<Class>]
|
19
|
+
def self.column_classes
|
20
|
+
[Rating, Head, Sources, StartDates, EndDates, Genres, Length, Notes, History]
|
21
|
+
end
|
22
|
+
|
23
|
+
# Does not start with a comment character.
|
24
|
+
# @param row_string [String]
|
25
|
+
# @param config [Hash]
|
26
|
+
# @return [Boolean]
|
27
|
+
def self.match?(row_string, config)
|
28
|
+
!row_string.lstrip.start_with?(config.fetch(:comment_character))
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#start-dates-and-end-dates-columns
|
6
|
+
class EndDates < Column
|
7
|
+
def self.segment_separator
|
8
|
+
/,\s*/
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.regexes(segment_index)
|
12
|
+
[%r{\A
|
13
|
+
(?<date>\d{4}/\d\d?/\d\d?)
|
14
|
+
\z}x]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#genres-column
|
6
|
+
class Genres < Column
|
7
|
+
def self.segment_separator
|
8
|
+
/,\s*/
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.regexes(segment_index)
|
12
|
+
[%r{\A
|
13
|
+
(?<genre>.+)
|
14
|
+
\z}x]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#head-column-title
|
6
|
+
# and https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#head-column-dnf
|
7
|
+
# and the sections following.
|
8
|
+
class Head < Column
|
9
|
+
def self.split_by_format?
|
10
|
+
true
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.regexes_before_formats
|
14
|
+
[
|
15
|
+
/\A#{Column::SHARED_REGEXES[:progress]}\z/,
|
16
|
+
/.+/,
|
17
|
+
]
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.segment_separator
|
21
|
+
/\s*--\s*/
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.flatten_into_arrays
|
25
|
+
%i[extra_info series_names series_volumes]
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.regexes(segment_index)
|
29
|
+
[
|
30
|
+
# author and title
|
31
|
+
(%r{\A
|
32
|
+
(
|
33
|
+
(?<author>.+?)
|
34
|
+
\s+-\s+
|
35
|
+
)?
|
36
|
+
(?<title>.+)
|
37
|
+
\z}x if segment_index.zero?),
|
38
|
+
*Column::SHARED_REGEXES[:series_and_extra_info],
|
39
|
+
].compact
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|