reading 0.6.1 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/reading +5 -5
- data/bin/readingfile +31 -0
- data/lib/reading/config.rb +115 -149
- data/lib/reading/errors.rb +10 -66
- data/lib/reading/item/time_length.rb +138 -0
- data/lib/reading/parsing/attributes/attribute.rb +26 -0
- data/lib/reading/parsing/attributes/author.rb +15 -0
- data/lib/reading/parsing/attributes/experiences/dates_and_head_transformer.rb +106 -0
- data/lib/reading/parsing/attributes/experiences/history_transformer.rb +452 -0
- data/lib/reading/parsing/attributes/experiences/spans_validator.rb +149 -0
- data/lib/reading/parsing/attributes/experiences.rb +27 -0
- data/lib/reading/parsing/attributes/genres.rb +16 -0
- data/lib/reading/parsing/attributes/notes.rb +22 -0
- data/lib/reading/parsing/attributes/rating.rb +17 -0
- data/lib/reading/parsing/attributes/shared.rb +62 -0
- data/lib/reading/parsing/attributes/title.rb +21 -0
- data/lib/reading/parsing/attributes/variants.rb +77 -0
- data/lib/reading/parsing/csv.rb +101 -0
- data/lib/reading/parsing/parser.rb +292 -0
- data/lib/reading/parsing/rows/column.rb +131 -0
- data/lib/reading/parsing/rows/comment.rb +26 -0
- data/lib/reading/parsing/rows/compact_planned.rb +30 -0
- data/lib/reading/parsing/rows/compact_planned_columns/head.rb +60 -0
- data/lib/reading/parsing/rows/regular.rb +33 -0
- data/lib/reading/parsing/rows/regular_columns/end_dates.rb +20 -0
- data/lib/reading/parsing/rows/regular_columns/genres.rb +20 -0
- data/lib/reading/parsing/rows/regular_columns/head.rb +45 -0
- data/lib/reading/parsing/rows/regular_columns/history.rb +143 -0
- data/lib/reading/parsing/rows/regular_columns/length.rb +35 -0
- data/lib/reading/parsing/rows/regular_columns/notes.rb +32 -0
- data/lib/reading/parsing/rows/regular_columns/rating.rb +15 -0
- data/lib/reading/parsing/rows/regular_columns/sources.rb +94 -0
- data/lib/reading/parsing/rows/regular_columns/start_dates.rb +35 -0
- data/lib/reading/parsing/transformer.rb +70 -0
- data/lib/reading/util/hash_compact_by_template.rb +1 -0
- data/lib/reading/util/hash_deep_merge.rb +1 -1
- data/lib/reading/util/hash_to_struct.rb +1 -0
- data/lib/reading/util/numeric_to_i_if_whole.rb +12 -0
- data/lib/reading/util/string_truncate.rb +13 -4
- data/lib/reading/version.rb +1 -1
- data/lib/reading.rb +18 -0
- metadata +58 -41
- data/lib/reading/attribute/all_attributes.rb +0 -83
- data/lib/reading/attribute/attribute.rb +0 -25
- data/lib/reading/attribute/experiences/dates_validator.rb +0 -94
- data/lib/reading/attribute/experiences/experiences_attribute.rb +0 -74
- data/lib/reading/attribute/experiences/progress_subattribute.rb +0 -48
- data/lib/reading/attribute/experiences/spans_subattribute.rb +0 -82
- data/lib/reading/attribute/variants/extra_info_subattribute.rb +0 -44
- data/lib/reading/attribute/variants/length_subattribute.rb +0 -45
- data/lib/reading/attribute/variants/series_subattribute.rb +0 -57
- data/lib/reading/attribute/variants/sources_subattribute.rb +0 -78
- data/lib/reading/attribute/variants/variants_attribute.rb +0 -69
- data/lib/reading/csv.rb +0 -76
- data/lib/reading/line.rb +0 -23
- data/lib/reading/row/blank_row.rb +0 -23
- data/lib/reading/row/compact_planned_row.rb +0 -130
- data/lib/reading/row/regular_row.rb +0 -99
- data/lib/reading/row/row.rb +0 -88
@@ -0,0 +1,292 @@
|
|
1
|
+
require_relative "rows/regular"
|
2
|
+
require_relative "rows/compact_planned"
|
3
|
+
require_relative "rows/comment"
|
4
|
+
|
5
|
+
module Reading
|
6
|
+
module Parsing
|
7
|
+
#
|
8
|
+
# Parses a string containing a row of a CSV reading log, into a hash
|
9
|
+
# mirroring the structure of the row. This hash is an intermediate form and
|
10
|
+
# not the final item data. It's the raw material for Parsing::Transformer to
|
11
|
+
# generate the final item data.
|
12
|
+
#
|
13
|
+
# Below is an example intermediate hash parsed from this row, which has a Rating
|
14
|
+
# column, then a Head column containing an author, title, series, and extra info:
|
15
|
+
#
|
16
|
+
# 3|📕Thomas More - Utopia -- trans. Robert Adams -- ed. George Logan -- in Cambridge History of Political Thought
|
17
|
+
#
|
18
|
+
# {
|
19
|
+
# rating: { number: "1" },
|
20
|
+
# head: [{
|
21
|
+
# author: "Thomas More",
|
22
|
+
# title: "Utopia",
|
23
|
+
# series_names: ["Cambridge History of Political Thought"],
|
24
|
+
# series_volumes: [nil],
|
25
|
+
# extra_info: ["trans. Robert Adams", "ed. George Logan"],
|
26
|
+
# format: :print,
|
27
|
+
# }]
|
28
|
+
# }
|
29
|
+
#
|
30
|
+
# The hash's top-level keys are column names. The nested keys come from
|
31
|
+
# regex capture group names in each column (for this example, see ::regexes
|
32
|
+
# in rating.rb and head.rb in parsing/rows/regular_columns).
|
33
|
+
#
|
34
|
+
# All the rest is just details of how the parts of a column are joined:
|
35
|
+
#
|
36
|
+
# - The :head value is an array because Head.split_by_format? is
|
37
|
+
# true (because a Head column can potentially contain multiple items).
|
38
|
+
# That's also where { format: :print } comes from.
|
39
|
+
#
|
40
|
+
# - The :series_names and :series_volumes values are arrays because these
|
41
|
+
# keys are in Head.flatten_into_arrays, which causes the column's segments
|
42
|
+
# (separated by " -- ") to be merged into one hash.
|
43
|
+
#
|
44
|
+
class Parser
|
45
|
+
using Util::HashArrayDeepFetch
|
46
|
+
using Util::StringRemove
|
47
|
+
|
48
|
+
attr_reader :config
|
49
|
+
|
50
|
+
# @param config [Hash] an entire config.
|
51
|
+
def initialize(config)
|
52
|
+
@config = config
|
53
|
+
end
|
54
|
+
|
55
|
+
# Parses a row string into a hash that mirrors the structure of the row.
|
56
|
+
# @param string [String] a string containing a row of a CSV reading log.
|
57
|
+
# @return [Hash]
|
58
|
+
def parse_row_to_intermediate_hash(string)
|
59
|
+
columns = extract_columns(string)
|
60
|
+
|
61
|
+
if config.fetch(:skip_compact_planned) && columns.has_key?(Rows::CompactPlanned::Head)
|
62
|
+
return {}
|
63
|
+
end
|
64
|
+
|
65
|
+
columns.map { |column, column_string|
|
66
|
+
parse_column(column, column_string)
|
67
|
+
}.to_h
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
# Splits the row string by column and pairs them in a hash with column
|
73
|
+
# classes, which contain the information necessary to parse each column.
|
74
|
+
# @param string [String] a string containing a row of a CSV reading log.
|
75
|
+
# @return [Hash{Class => String}] a hash whose keys are classes inheriting
|
76
|
+
# Parsing::Rows::Column.
|
77
|
+
def extract_columns(string)
|
78
|
+
clean_string = string.dup.force_encoding(Encoding::UTF_8)
|
79
|
+
column_strings = clean_string.split(config.fetch(:column_separator))
|
80
|
+
|
81
|
+
row_types = [Rows::Regular, Rows::CompactPlanned, Rows::Comment]
|
82
|
+
column_classes = row_types
|
83
|
+
.find { |row_type| row_type.match?(string, config) }
|
84
|
+
.column_classes
|
85
|
+
.filter { |column_class|
|
86
|
+
config.fetch(:enabled_columns).include?(column_class.to_sym)
|
87
|
+
}
|
88
|
+
|
89
|
+
if !column_classes.count.zero? && column_strings.count > column_classes.count
|
90
|
+
raise TooManyColumnsError, "Too many columns"
|
91
|
+
end
|
92
|
+
|
93
|
+
column_classes
|
94
|
+
.zip(column_strings)
|
95
|
+
.reject { |_class, string| string.nil? }
|
96
|
+
.to_h
|
97
|
+
end
|
98
|
+
|
99
|
+
# Parses a column into an array of two elements (a key for the column name
|
100
|
+
# and a value of its contents).
|
101
|
+
# @param column_class [Class] a class inheriting Parsing::Rows::Column.
|
102
|
+
# @param column_string [String] a string containing a column from a row.
|
103
|
+
# @return [Array(Symbol, Hash), Array(Symbol, Array)]
|
104
|
+
def parse_column(column_class, column_string)
|
105
|
+
# Multiple format emojis are possible in some columns:
|
106
|
+
# - Head column, for multiple items.
|
107
|
+
# - Sources column, for multiple variants of an item.
|
108
|
+
# - Compact planned head column, for multiple items.
|
109
|
+
# This is the default case below the two guard clauses. It's more complex
|
110
|
+
# because there's possibly a string before the first format, and there's
|
111
|
+
# an extra level of nesting in the returned array.
|
112
|
+
|
113
|
+
# Simplest case: if the column is never split by format, return the
|
114
|
+
# column name and the parsed segment(s), which is either a Hash (if the
|
115
|
+
# column can't have multiple segments or if its segments are flattened)
|
116
|
+
# or an Array (if there are multiple segments and they're not flattened).
|
117
|
+
if !column_class.split_by_format?
|
118
|
+
parsed_column = parse_segments(column_class, column_string)
|
119
|
+
return [column_class.to_sym, parsed_column]
|
120
|
+
end
|
121
|
+
|
122
|
+
# Also simple: if the column *can* be split by format but in this row
|
123
|
+
# it doesn't contain any format emojis, return the same as above but
|
124
|
+
# with an extra level of nesting (except when the parsed result is nil).
|
125
|
+
if column_class.split_by_format? &&
|
126
|
+
!column_string.match?(config.deep_fetch(:regex, :formats))
|
127
|
+
|
128
|
+
parsed_column = parse_segments(column_class, column_string)
|
129
|
+
# Wrap a non-empty value in an array so that e.g. a head without
|
130
|
+
# emojis is still an array. This way the extra level of nesting can
|
131
|
+
# be consistently expected for columns that *can* be split by format.
|
132
|
+
parsed_column_nonempty_nested = [parsed_column.presence].compact
|
133
|
+
return [column_class.to_sym, parsed_column_nonempty_nested]
|
134
|
+
end
|
135
|
+
|
136
|
+
# The rest is the complex case: if the column *can and is* split by format.
|
137
|
+
|
138
|
+
# Each format plus the string after it.
|
139
|
+
format_strings = column_string.split(config.deep_fetch(:regex, :formats_split))
|
140
|
+
|
141
|
+
# If there's a string before the first format, e.g. "DNF" in Head column.
|
142
|
+
unless format_strings.first.match?(config.deep_fetch(:regex, :formats))
|
143
|
+
before_formats = parse_segment(column_class, format_strings.shift, before_formats: true)
|
144
|
+
end
|
145
|
+
|
146
|
+
# Parse each format-plus-string into an array of segments.
|
147
|
+
heads = format_strings.map { |string|
|
148
|
+
format_emoji = string[config.deep_fetch(:regex, :formats)]
|
149
|
+
string.remove!(format_emoji)
|
150
|
+
format = config.fetch(:formats).key(format_emoji)
|
151
|
+
|
152
|
+
parse_segments(column_class, string)
|
153
|
+
.merge(format: format)
|
154
|
+
}
|
155
|
+
|
156
|
+
# Combine values of conflicting keys so that in a compact planned
|
157
|
+
# Head column, sources from before_formats are not ignored.
|
158
|
+
if before_formats
|
159
|
+
heads.each do |head|
|
160
|
+
head.merge!(before_formats) do |k, old_v, new_v|
|
161
|
+
(new_v + old_v).uniq
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
[column_class.to_sym, heads]
|
167
|
+
end
|
168
|
+
|
169
|
+
# Parses a string of segments, e.g. "Utopia -- trans. Robert Adams -- ed. George Logan"
|
170
|
+
# @param column_class [Class] a class inheriting Parsing::Rows::Column.
|
171
|
+
# @param string [String] a string containing segments, which is either an
|
172
|
+
# entire column or (for columns that are split by format emoji) a string
|
173
|
+
# following a format emoji.
|
174
|
+
# @return [Array<Hash>, Hash] either an array of parsed segments (hashes),
|
175
|
+
# or a single hash if the column can't be split by segment or if the
|
176
|
+
# segments are flattened into one hash.
|
177
|
+
def parse_segments(column_class, string)
|
178
|
+
return {} if string.blank?
|
179
|
+
|
180
|
+
# If the column can't be split by segment, parse as a single segment.
|
181
|
+
if !column_class.split_by_segment?
|
182
|
+
return parse_segment(column_class, string)
|
183
|
+
end
|
184
|
+
|
185
|
+
# Add an extra level of nesting if the column can have segment groups,
|
186
|
+
# as in "2021/1/28..2/1 x4 -- ..2/3 x5 ---- 11/1 -- 11/2"
|
187
|
+
if column_class.split_by_segment_group?
|
188
|
+
segments = string
|
189
|
+
.split(column_class.segment_group_separator)
|
190
|
+
.map { |segment_group|
|
191
|
+
segment_group
|
192
|
+
.split(column_class.segment_separator)
|
193
|
+
.map.with_index { |segment, i|
|
194
|
+
parse_segment(column_class, segment, i)
|
195
|
+
}
|
196
|
+
}
|
197
|
+
else
|
198
|
+
segments = string
|
199
|
+
.split(column_class.segment_separator)
|
200
|
+
.map.with_index { |segment, i|
|
201
|
+
parse_segment(column_class, segment, i)
|
202
|
+
}
|
203
|
+
end
|
204
|
+
|
205
|
+
if column_class.flatten_into_arrays.any?
|
206
|
+
segments = segments.reduce { |merged, segment|
|
207
|
+
merged.merge!(segment) { |_k, old_v, new_v|
|
208
|
+
# old_v is already an array by this point, since its key should be
|
209
|
+
# in Column.flatten_into_arrays
|
210
|
+
old_v + new_v
|
211
|
+
}
|
212
|
+
}
|
213
|
+
end
|
214
|
+
|
215
|
+
segments
|
216
|
+
end
|
217
|
+
|
218
|
+
# Parses a segment using a regular expression from the column class.
|
219
|
+
# @param column_class [Class] a class inheriting Parsing::Rows::Column.
|
220
|
+
# @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
|
221
|
+
# @param segment_index [Integer] the position of the segment when it's in
|
222
|
+
# part of a series of segments; this can change which regular expressions
|
223
|
+
# are applicable to it.
|
224
|
+
# @param before_formats [Boolean] whether to use the before-formats regexes.
|
225
|
+
# @return [Hash{Symbol => Object}] the parsed segment, whose values are Strings
|
226
|
+
# unless changed via column_class.tweaks or column_class.flatten_into_arrays.
|
227
|
+
# Example: { author: "Bram Stoker", title: "Dracula"}
|
228
|
+
def parse_segment(column_class, segment, segment_index = 0, before_formats: false)
|
229
|
+
if before_formats
|
230
|
+
regexes = column_class.regexes_before_formats
|
231
|
+
else
|
232
|
+
regexes = column_class.regexes(segment_index)
|
233
|
+
end
|
234
|
+
|
235
|
+
parsed_segment = nil
|
236
|
+
regexes.each do |regex|
|
237
|
+
parsed_segment = parse_segment_with_regex(segment, regex)
|
238
|
+
break if parsed_segment
|
239
|
+
end
|
240
|
+
|
241
|
+
if parsed_segment.nil?
|
242
|
+
raise ParsingError, "Could not parse \"#{segment}\" in " \
|
243
|
+
"the #{column_class.column_name} column"
|
244
|
+
end
|
245
|
+
|
246
|
+
tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
|
247
|
+
end
|
248
|
+
|
249
|
+
# Parses a segment using the given regular expression.
|
250
|
+
# @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
|
251
|
+
# @param regex [Regexp] the regular expression with which to parse the segment.
|
252
|
+
# @return [Hash{Symbol => String}] e.g. { author: "Bram Stoker", title: "Dracula"}
|
253
|
+
def parse_segment_with_regex(segment, regex)
|
254
|
+
segment
|
255
|
+
.tr(config.fetch(:ignored_characters), "")
|
256
|
+
.strip
|
257
|
+
.match(regex)
|
258
|
+
&.named_captures
|
259
|
+
&.compact
|
260
|
+
&.transform_keys(&:to_sym)
|
261
|
+
&.transform_values(&:strip)
|
262
|
+
&.transform_values(&:presence)
|
263
|
+
end
|
264
|
+
|
265
|
+
# Modify the values of the parsed segment according to column_class.tweaks,
|
266
|
+
# and wrap them in an array according to column_class.flatten_into_arrays.
|
267
|
+
# @param parsed_segment [Hash] e.g. { author: "Bram Stoker", title: "Dracula"}
|
268
|
+
# @return [Hash{Symbol => Object}]
|
269
|
+
def tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
|
270
|
+
column_class.tweaks.each do |key, tweak|
|
271
|
+
if parsed_segment.has_key?(key)
|
272
|
+
parsed_segment[key] = tweak.call(parsed_segment[key])
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
# Ensure that values of keys in column_class.flatten_into_arrays are arrays.
|
277
|
+
column_class.flatten_into_arrays.each do |key|
|
278
|
+
if parsed_segment.has_key?(key)
|
279
|
+
val = parsed_segment[key]
|
280
|
+
# Not using Array(val) because that results in an empty array when
|
281
|
+
# val is nil, and the nil must be preserved for series name and
|
282
|
+
# volume arrays to line up with an equal number of elements (because
|
283
|
+
# the volume may be nil).
|
284
|
+
parsed_segment[key] = [val] if !val.is_a?(Array)
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
parsed_segment
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
# The base class for all the columns in parsing/rows/compact_planned_columns
|
5
|
+
# and parsing/rows/regular_columns.
|
6
|
+
class Column
|
7
|
+
# The class name changed into a string, e.g. StartDates => "Start Dates"
|
8
|
+
# @return [String]
|
9
|
+
def self.column_name
|
10
|
+
class_name = name.split("::").last
|
11
|
+
class_name.gsub(/(.)([A-Z])/,'\1 \2')
|
12
|
+
end
|
13
|
+
|
14
|
+
# The class name changed into a symbol, e.g. StartDates => :start_dates
|
15
|
+
# @return [Symbol]
|
16
|
+
def self.to_sym
|
17
|
+
class_name = name.split("::").last
|
18
|
+
class_name
|
19
|
+
.gsub(/(.)([A-Z])/,'\1_\2')
|
20
|
+
.downcase
|
21
|
+
.to_sym
|
22
|
+
end
|
23
|
+
|
24
|
+
# Whether the column can contain "chunks" each set off by a format emoji.
|
25
|
+
# For example, the Head column of a compact planned row typically
|
26
|
+
# contains a list of multiple items. (The two others are the Sources
|
27
|
+
# column, for multiple variants of an item; and the regular Head column,
|
28
|
+
# for multiple items.)
|
29
|
+
# @return [Boolean]
|
30
|
+
def self.split_by_format?
|
31
|
+
false
|
32
|
+
end
|
33
|
+
|
34
|
+
# Whether the column can contain multiple segments, e.g. "Cosmos -- 2013 paperback"
|
35
|
+
# @return [Boolean]
|
36
|
+
def self.split_by_segment?
|
37
|
+
!!segment_separator
|
38
|
+
end
|
39
|
+
|
40
|
+
# The regular expression used to split segments (e.g. /\s*--\s*/),
|
41
|
+
# or nil if the column should not be split by segment.
|
42
|
+
# @return [Regexp, nil]
|
43
|
+
def self.segment_separator
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
|
47
|
+
# Whether the column can contain multiple segment groups, e.g.
|
48
|
+
# "2021/1/28..2/1 x4 -- ..2/3 x5 ---- 11/1 -- 11/2"
|
49
|
+
# @return [Boolean]
|
50
|
+
def self.split_by_segment_group?
|
51
|
+
!!segment_group_separator
|
52
|
+
end
|
53
|
+
|
54
|
+
# The regular expression used to split segment groups (e.g. /\s*----\s*/),
|
55
|
+
# or nil if the column should not be split by segment group.
|
56
|
+
# @return [Regexp, nil]
|
57
|
+
def self.segment_group_separator
|
58
|
+
nil
|
59
|
+
end
|
60
|
+
|
61
|
+
# Adjustments that are made to captured values at the end of parsing
|
62
|
+
# the column. For example, if ::regexes includes a capture group named
|
63
|
+
# "sources" and it needs to be split by commas:
|
64
|
+
# { sources: -> { _1.split(/\s*,\s*/) } }
|
65
|
+
# @return [Hash{Symbol => Proc}]
|
66
|
+
def self.tweaks
|
67
|
+
{}
|
68
|
+
end
|
69
|
+
|
70
|
+
# Keys in the parsed output hash that should be converted to an array, even
|
71
|
+
# if only one value was in the input, as in { ... extra_info: ["ed. Jane Doe"] }
|
72
|
+
# @return [Array<Symbol>]
|
73
|
+
def self.flatten_into_arrays
|
74
|
+
[]
|
75
|
+
end
|
76
|
+
|
77
|
+
# The regular expressions used to parse the column (except the part of
|
78
|
+
# the column before the first format emoji, which is in
|
79
|
+
# ::regexes_before_formats below). An array because sometimes it's
|
80
|
+
# simpler to try several smaller regular expressions in series, and
|
81
|
+
# because a regular expression might be applicable only for segments in
|
82
|
+
# a certain position. See parsing/rows/regular_columns/head.rb for an example.
|
83
|
+
# @param segment_index [Integer] the position of the current segment.
|
84
|
+
# @return [Array<Regexp>]
|
85
|
+
def self.regexes(segment_index)
|
86
|
+
[]
|
87
|
+
end
|
88
|
+
|
89
|
+
# The regular expressions used to parse the part of the column before
|
90
|
+
# the first format emoji.
|
91
|
+
# @return [Array<Regexp>]
|
92
|
+
def self.regexes_before_formats
|
93
|
+
[]
|
94
|
+
end
|
95
|
+
|
96
|
+
# Regular expressions that are shared across more than one column,
|
97
|
+
# placed here just to be DRY.
|
98
|
+
SHARED_REGEXES = {
|
99
|
+
progress: %r{
|
100
|
+
(DNF\s+)?(?<progress_percent>\d\d?)%
|
101
|
+
|
|
102
|
+
(DNF\s+)?p?(?<progress_pages>\d+)p?
|
103
|
+
|
|
104
|
+
(DNF\s+)?(?<progress_time>\d+:\d\d)
|
105
|
+
|
|
106
|
+
# just DNF
|
107
|
+
(?<progress_dnf>DNF)
|
108
|
+
}x,
|
109
|
+
series_and_extra_info: [
|
110
|
+
# just series
|
111
|
+
%r{\A
|
112
|
+
in\s(?<series_names>.+)
|
113
|
+
# empty volume so that names and volumes have equal sizes when turned into arrays
|
114
|
+
(?<series_volumes>)
|
115
|
+
\z}x,
|
116
|
+
# series and volume
|
117
|
+
%r{\A
|
118
|
+
(?<series_names>.+?)
|
119
|
+
,?\s*
|
120
|
+
\#(?<series_volumes>\d+)
|
121
|
+
\z}x,
|
122
|
+
# extra info
|
123
|
+
%r{\A
|
124
|
+
(?<extra_info>.+)
|
125
|
+
\z}x,
|
126
|
+
],
|
127
|
+
}.freeze
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
# A row that is a comment.
|
5
|
+
module Comment
|
6
|
+
using Util::HashArrayDeepFetch
|
7
|
+
|
8
|
+
# No columns; comments are parsed as if the row were blank.
|
9
|
+
# @return [Array]
|
10
|
+
def self.column_classes
|
11
|
+
[]
|
12
|
+
end
|
13
|
+
|
14
|
+
# Starts with a comment character and does not include any format emojis.
|
15
|
+
# (Commented rows that DO include format emojis are matched as compact
|
16
|
+
# planned rows.)
|
17
|
+
# @param row_string [String]
|
18
|
+
# @param config [Hash]
|
19
|
+
# @return [Boolean]
|
20
|
+
def self.match?(row_string, config)
|
21
|
+
row_string.lstrip.start_with?(config.fetch(:comment_character))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require_relative "column"
|
2
|
+
require_relative "compact_planned_columns/head"
|
3
|
+
require_relative "regular_columns/sources"
|
4
|
+
|
5
|
+
module Reading
|
6
|
+
module Parsing
|
7
|
+
module Rows
|
8
|
+
# A row that contains compact planned items.
|
9
|
+
module CompactPlanned
|
10
|
+
using Util::HashArrayDeepFetch
|
11
|
+
|
12
|
+
# The columns that are possible in this type of row.
|
13
|
+
# @return [Array<Class>]
|
14
|
+
def self.column_classes
|
15
|
+
[CompactPlanned::Head, Regular::Sources]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Starts with a comment character and includes one or more format emojis.
|
19
|
+
# @param row_string [String]
|
20
|
+
# @param config [Hash]
|
21
|
+
# @return [Boolean]
|
22
|
+
def self.match?(row_string, config)
|
23
|
+
row_string.lstrip.start_with?(config.fetch(:comment_character)) &&
|
24
|
+
row_string.match?(config.deep_fetch(:regex, :formats)) &&
|
25
|
+
row_string.count(config.fetch(:column_separator)) <= column_classes.count - 1
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module CompactPlanned
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#compact-planned-items
|
6
|
+
# and the sections following.
|
7
|
+
class Head < Column
|
8
|
+
def self.split_by_format?
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.regexes_before_formats
|
13
|
+
[
|
14
|
+
%r{\A
|
15
|
+
\\ # comment character
|
16
|
+
\s*
|
17
|
+
(
|
18
|
+
(?<genres>[^a-z]+)?
|
19
|
+
\s*
|
20
|
+
(?<sources>@.+)?
|
21
|
+
\s*:
|
22
|
+
)?
|
23
|
+
\z}x,
|
24
|
+
]
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.segment_separator
|
28
|
+
/\s*--\s*/
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.flatten_into_arrays
|
32
|
+
%i[extra_info series_names series_volumes]
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.tweaks
|
36
|
+
{
|
37
|
+
genres: -> { _1.downcase.split(/\s*,\s*/) },
|
38
|
+
sources: -> { _1.split(/\s*@/).map(&:presence).compact }
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.regexes(segment_index)
|
43
|
+
[
|
44
|
+
# author, title, sources
|
45
|
+
(%r{\A
|
46
|
+
(
|
47
|
+
(?<author>[^@]+?)
|
48
|
+
\s+-\s+
|
49
|
+
)?
|
50
|
+
(?<title>[^@]+)
|
51
|
+
(?<sources>@.+)?
|
52
|
+
\z}x if segment_index.zero?),
|
53
|
+
*Column::SHARED_REGEXES[:series_and_extra_info],
|
54
|
+
].compact
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require_relative "column"
|
2
|
+
require_relative "regular_columns/rating"
|
3
|
+
require_relative "regular_columns/head"
|
4
|
+
require_relative "regular_columns/sources"
|
5
|
+
require_relative "regular_columns/start_dates"
|
6
|
+
require_relative "regular_columns/end_dates"
|
7
|
+
require_relative "regular_columns/genres"
|
8
|
+
require_relative "regular_columns/length"
|
9
|
+
require_relative "regular_columns/notes"
|
10
|
+
require_relative "regular_columns/history"
|
11
|
+
|
12
|
+
module Reading
|
13
|
+
module Parsing
|
14
|
+
module Rows
|
15
|
+
# A normal row of (usually) one item.
|
16
|
+
module Regular
|
17
|
+
# The columns that are possible in this type of row.
|
18
|
+
# @return [Array<Class>]
|
19
|
+
def self.column_classes
|
20
|
+
[Rating, Head, Sources, StartDates, EndDates, Genres, Length, Notes, History]
|
21
|
+
end
|
22
|
+
|
23
|
+
# Does not start with a comment character.
|
24
|
+
# @param row_string [String]
|
25
|
+
# @param config [Hash]
|
26
|
+
# @return [Boolean]
|
27
|
+
def self.match?(row_string, config)
|
28
|
+
!row_string.lstrip.start_with?(config.fetch(:comment_character))
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#start-dates-and-end-dates-columns
|
6
|
+
class EndDates < Column
|
7
|
+
def self.segment_separator
|
8
|
+
/,\s*/
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.regexes(segment_index)
|
12
|
+
[%r{\A
|
13
|
+
(?<date>\d{4}/\d\d?/\d\d?)
|
14
|
+
\z}x]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#genres-column
|
6
|
+
class Genres < Column
|
7
|
+
def self.segment_separator
|
8
|
+
/,\s*/
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.regexes(segment_index)
|
12
|
+
[%r{\A
|
13
|
+
(?<genre>.+)
|
14
|
+
\z}x]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#head-column-title
|
6
|
+
# and https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#head-column-dnf
|
7
|
+
# and the sections following.
|
8
|
+
class Head < Column
|
9
|
+
def self.split_by_format?
|
10
|
+
true
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.regexes_before_formats
|
14
|
+
[
|
15
|
+
/\A#{Column::SHARED_REGEXES[:progress]}\z/,
|
16
|
+
/.+/,
|
17
|
+
]
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.segment_separator
|
21
|
+
/\s*--\s*/
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.flatten_into_arrays
|
25
|
+
%i[extra_info series_names series_volumes]
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.regexes(segment_index)
|
29
|
+
[
|
30
|
+
# author and title
|
31
|
+
(%r{\A
|
32
|
+
(
|
33
|
+
(?<author>.+?)
|
34
|
+
\s+-\s+
|
35
|
+
)?
|
36
|
+
(?<title>.+)
|
37
|
+
\z}x if segment_index.zero?),
|
38
|
+
*Column::SHARED_REGEXES[:series_and_extra_info],
|
39
|
+
].compact
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|