reading 0.6.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/reading +5 -5
- data/bin/readingfile +31 -0
- data/lib/reading/config.rb +96 -108
- data/lib/reading/errors.rb +10 -66
- data/lib/reading/filter.rb +95 -0
- data/lib/reading/item/time_length.rb +140 -0
- data/lib/reading/item/view.rb +121 -0
- data/lib/reading/item.rb +117 -0
- data/lib/reading/parsing/attributes/attribute.rb +26 -0
- data/lib/reading/parsing/attributes/author.rb +15 -0
- data/lib/reading/parsing/attributes/experiences/dates_and_head_transformer.rb +106 -0
- data/lib/reading/parsing/attributes/experiences/history_transformer.rb +452 -0
- data/lib/reading/parsing/attributes/experiences/spans_validator.rb +149 -0
- data/lib/reading/parsing/attributes/experiences.rb +27 -0
- data/lib/reading/parsing/attributes/genres.rb +16 -0
- data/lib/reading/parsing/attributes/notes.rb +22 -0
- data/lib/reading/parsing/attributes/rating.rb +17 -0
- data/lib/reading/parsing/attributes/shared.rb +62 -0
- data/lib/reading/parsing/attributes/title.rb +21 -0
- data/lib/reading/parsing/attributes/variants.rb +77 -0
- data/lib/reading/parsing/csv.rb +112 -0
- data/lib/reading/parsing/parser.rb +292 -0
- data/lib/reading/parsing/rows/column.rb +131 -0
- data/lib/reading/parsing/rows/comment.rb +26 -0
- data/lib/reading/parsing/rows/compact_planned.rb +30 -0
- data/lib/reading/parsing/rows/compact_planned_columns/head.rb +60 -0
- data/lib/reading/parsing/rows/regular.rb +33 -0
- data/lib/reading/parsing/rows/regular_columns/end_dates.rb +20 -0
- data/lib/reading/parsing/rows/regular_columns/genres.rb +20 -0
- data/lib/reading/parsing/rows/regular_columns/head.rb +45 -0
- data/lib/reading/parsing/rows/regular_columns/history.rb +143 -0
- data/lib/reading/parsing/rows/regular_columns/length.rb +35 -0
- data/lib/reading/parsing/rows/regular_columns/notes.rb +32 -0
- data/lib/reading/parsing/rows/regular_columns/rating.rb +15 -0
- data/lib/reading/parsing/rows/regular_columns/sources.rb +94 -0
- data/lib/reading/parsing/rows/regular_columns/start_dates.rb +35 -0
- data/lib/reading/parsing/transformer.rb +70 -0
- data/lib/reading/util/hash_compact_by_template.rb +1 -0
- data/lib/reading/util/hash_deep_merge.rb +1 -1
- data/lib/reading/util/hash_to_data.rb +30 -0
- data/lib/reading/util/numeric_to_i_if_whole.rb +12 -0
- data/lib/reading/util/string_truncate.rb +13 -4
- data/lib/reading/version.rb +1 -1
- data/lib/reading.rb +49 -0
- metadata +76 -42
- data/lib/reading/attribute/all_attributes.rb +0 -83
- data/lib/reading/attribute/attribute.rb +0 -25
- data/lib/reading/attribute/experiences/dates_validator.rb +0 -94
- data/lib/reading/attribute/experiences/experiences_attribute.rb +0 -74
- data/lib/reading/attribute/experiences/progress_subattribute.rb +0 -48
- data/lib/reading/attribute/experiences/spans_subattribute.rb +0 -82
- data/lib/reading/attribute/variants/extra_info_subattribute.rb +0 -44
- data/lib/reading/attribute/variants/length_subattribute.rb +0 -45
- data/lib/reading/attribute/variants/series_subattribute.rb +0 -57
- data/lib/reading/attribute/variants/sources_subattribute.rb +0 -78
- data/lib/reading/attribute/variants/variants_attribute.rb +0 -69
- data/lib/reading/csv.rb +0 -76
- data/lib/reading/line.rb +0 -23
- data/lib/reading/row/blank_row.rb +0 -23
- data/lib/reading/row/compact_planned_row.rb +0 -130
- data/lib/reading/row/regular_row.rb +0 -99
- data/lib/reading/row/row.rb +0 -88
- data/lib/reading/util/hash_to_struct.rb +0 -29
@@ -0,0 +1,62 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Attributes
|
4
|
+
# Shared
|
5
|
+
module Shared
|
6
|
+
# Extracts the :progress sub-attribute (percent, pages, or time) from
|
7
|
+
# the given hash.
|
8
|
+
# @param hash [Hash] any parsed hash that contains progress.
|
9
|
+
# @return [Float, Integer, Item::TimeLength]
|
10
|
+
def self.progress(hash)
|
11
|
+
hash[:progress_percent]&.to_f&./(100) ||
|
12
|
+
hash[:progress_pages]&.to_i ||
|
13
|
+
hash[:progress_time]&.then { Item::TimeLength.parse _1 } ||
|
14
|
+
(0 if hash[:progress_dnf]) ||
|
15
|
+
(1.0 if hash[:progress_done]) ||
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
|
19
|
+
# Extracts the :length sub-attribute (pages or time) from the given hash.
|
20
|
+
# @param hash [Hash] any parsed hash that contains length.
|
21
|
+
# @param key_name [Symbol] the first part of the keys to be checked.
|
22
|
+
# @param episodic [Boolean] whether to look for episodic (not total) length.
|
23
|
+
# If false, returns nil if hash contains :each. If true, returns a
|
24
|
+
# length only if hash contains :each or if it has repetitions, in
|
25
|
+
# which case repetitions are ignored. Examples of episodic lengths
|
26
|
+
# (before parsing) are "0:30 each" and "1:00 x14" (where the episodic
|
27
|
+
# length is 1:00). Examples of non-episodic lengths are "0:30" and "14:00".
|
28
|
+
# @param ignore_repetitions [Boolean] if true, ignores repetitions so
|
29
|
+
# that e.g. "1:00 x14" gives a length of 1 hour instead of 14 hours.
|
30
|
+
# This is useful for the History column, where that 1 hour can be used
|
31
|
+
# as the default amount.
|
32
|
+
# @return [Float, Integer, Item::TimeLength]
|
33
|
+
def self.length(hash, key_name: :length, episodic: false, ignore_repetitions: false)
|
34
|
+
return nil unless hash
|
35
|
+
|
36
|
+
length = hash[:"#{key_name}_pages"]&.to_i ||
|
37
|
+
hash[:"#{key_name}_time"]&.then { Item::TimeLength.parse _1 }
|
38
|
+
|
39
|
+
return nil unless length
|
40
|
+
|
41
|
+
if hash[:each]
|
42
|
+
# Length is calculated based on History column in this case.
|
43
|
+
if episodic
|
44
|
+
return length
|
45
|
+
else
|
46
|
+
return nil
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
if hash[:repetitions]
|
51
|
+
return length if episodic
|
52
|
+
length *= hash[:repetitions].to_i unless ignore_repetitions
|
53
|
+
else
|
54
|
+
return nil if episodic && !hash[:each]
|
55
|
+
end
|
56
|
+
|
57
|
+
length
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Attributes
|
4
|
+
# Transformer for the :title item attribute.
|
5
|
+
class Title < Attribute
|
6
|
+
# @param parsed_row [Hash] a parsed row (the intermediate hash).
|
7
|
+
# @param head_index [Integer] current item's position in the Head column.
|
8
|
+
# @return [String]
|
9
|
+
def transform_from_parsed(parsed_row, head_index)
|
10
|
+
title = parsed_row[:head][head_index][:title]
|
11
|
+
|
12
|
+
if title.nil? || title.end_with?(" -")
|
13
|
+
raise InvalidHeadError, "Missing title in the head #{parsed_row[:head][head_index]}"
|
14
|
+
end
|
15
|
+
|
16
|
+
title
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Attributes
|
4
|
+
# Transformer for the :variant item attribute.
|
5
|
+
class Variants < Attribute
|
6
|
+
using Util::HashArrayDeepFetch
|
7
|
+
|
8
|
+
# @param parsed_row [Hash] a parsed row (the intermediate hash).
|
9
|
+
# @param head_index [Integer] current item's position in the Head column.
|
10
|
+
# @return [Array<Hash>] an array of variants; see
|
11
|
+
# Config#default_config[:item][:template][:variants]
|
12
|
+
def transform_from_parsed(parsed_row, head_index)
|
13
|
+
head = parsed_row[:head][head_index]
|
14
|
+
|
15
|
+
# || [{}] in case there is no Sources column.
|
16
|
+
(parsed_row[:sources].presence || [{}])&.map { |variant|
|
17
|
+
{
|
18
|
+
format: variant[:format] || head[:format],
|
19
|
+
series: (series(head) + series(variant)).presence,
|
20
|
+
sources: sources(variant) || sources(head),
|
21
|
+
isbn: variant[:isbn] || variant[:asin],
|
22
|
+
length: Attributes::Shared.length(variant) ||
|
23
|
+
Attributes::Shared.length(parsed_row[:length]),
|
24
|
+
extra_info: Array(head[:extra_info]) + Array(variant[:extra_info]),
|
25
|
+
}.map { |k, v| [k, v || template.fetch(k)] }.to_h
|
26
|
+
}&.compact&.presence
|
27
|
+
end
|
28
|
+
|
29
|
+
# A shortcut to the variant template.
|
30
|
+
# @return [Hash]
|
31
|
+
def template
|
32
|
+
config.deep_fetch(:item, :template, :variants).first
|
33
|
+
end
|
34
|
+
|
35
|
+
# The :series sub-attribute for the given parsed hash.
|
36
|
+
# @param hash [Hash] any parsed hash that contains :series_names and :series_volumes.
|
37
|
+
# @return [Array<Hash>]
|
38
|
+
def series(hash)
|
39
|
+
(hash[:series_names] || [])
|
40
|
+
.zip(hash[:series_volumes] || [])
|
41
|
+
.map { |name, volume|
|
42
|
+
{ name:, volume: Integer(volume, exception: false) }
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
# The :sources sub-attribute for the given parsed hash.
|
47
|
+
# @param hash [Hash] any parsed hash that contains :sources.
|
48
|
+
# @return [Array<Hash>]
|
49
|
+
def sources(hash)
|
50
|
+
hash[:sources]&.map { |source|
|
51
|
+
if source.match?(/\Ahttps?:\/\//)
|
52
|
+
{ name: url_name(source), url: source }
|
53
|
+
else
|
54
|
+
{ name: source, url: nil }
|
55
|
+
end
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
# The name for the given URL string, according to
|
60
|
+
# config[:source_names_from_urls], or nil.
|
61
|
+
# @param url [String] a URL.
|
62
|
+
# @return [String, nil]
|
63
|
+
def url_name(url)
|
64
|
+
config
|
65
|
+
.fetch(:source_names_from_urls)
|
66
|
+
.each do |url_part, name|
|
67
|
+
if url.include?(url_part)
|
68
|
+
return name
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# Used throughout, in other files.
|
2
|
+
require_relative "../util/blank"
|
3
|
+
require_relative "../util/string_remove"
|
4
|
+
require_relative "../util/string_truncate"
|
5
|
+
require_relative "../util/numeric_to_i_if_whole"
|
6
|
+
require_relative "../util/hash_deep_merge"
|
7
|
+
require_relative "../util/hash_array_deep_fetch"
|
8
|
+
require_relative "../util/hash_compact_by_template"
|
9
|
+
require_relative "../errors"
|
10
|
+
|
11
|
+
# Used just here.
|
12
|
+
require_relative "../config"
|
13
|
+
require_relative "../item"
|
14
|
+
require_relative "parser"
|
15
|
+
require_relative "transformer"
|
16
|
+
|
17
|
+
module Reading
|
18
|
+
module Parsing
|
19
|
+
#
|
20
|
+
# Validates a path or stream (string, file, etc.) of a CSV reading log, then
|
21
|
+
# parses it into an array of Items.
|
22
|
+
#
|
23
|
+
# Parsing happens in two steps:
|
24
|
+
# (1) Parse a row string into an intermediate hash representing the columns.
|
25
|
+
# - See parsing/parser.rb, which uses parsing/rows/*
|
26
|
+
# (2) Transform the intermediate hash into an array of hashes structured
|
27
|
+
# around item attributes rather than CSV columns.
|
28
|
+
# - See parsing/transformer.rb, which uses parsing/attributes/*
|
29
|
+
#
|
30
|
+
# Keeping these steps separate makes the code easier to understand. It was
|
31
|
+
# inspired by the Parslet gem: https://kschiess.github.io/parslet/transform.html
|
32
|
+
#
|
33
|
+
class CSV
|
34
|
+
private attr_reader :parser, :transformer, :hash_output, :item_view
|
35
|
+
|
36
|
+
# Validates a path or stream (string, file, etc.) of a CSV reading log,
|
37
|
+
# builds the config, and initializes the parser and transformer.
|
38
|
+
# @param path [String] path to the CSV file; used if no stream is given.
|
39
|
+
# @param stream [Object] an object responding to #each_linewith CSV row(s);
|
40
|
+
# if nil, path is used instead.
|
41
|
+
# @param config [Hash] a custom config which overrides the defaults,
|
42
|
+
# e.g. { errors: { styling: :html } }
|
43
|
+
# @param hash_output [Boolean] whether an array of raw Hashes should be
|
44
|
+
# returned, without Items being created from them.
|
45
|
+
# @param view [Class, nil, Boolean] the class that will be used to build
|
46
|
+
# each Item's view object, or nil/false if no view object should be built.
|
47
|
+
# If you use a custom view class, the only requirement is that its
|
48
|
+
# #initialize take an Item and a full config as arguments.
|
49
|
+
def initialize(path = nil, stream: nil, config: {}, hash_output: false, item_view: Item::View)
|
50
|
+
validate_path_or_stream(path, stream)
|
51
|
+
full_config = Config.new(config).hash
|
52
|
+
|
53
|
+
@path = path
|
54
|
+
@stream = stream
|
55
|
+
@hash_output = hash_output
|
56
|
+
@item_view = item_view
|
57
|
+
@parser = Parser.new(full_config)
|
58
|
+
@transformer = Transformer.new(full_config)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Parses and transforms the reading log into item data.
|
62
|
+
# @return [Array<Item>] an array of Items like the template in
|
63
|
+
# Config#default_config[:item][:template]. The Items are identical in
|
64
|
+
# structure to that Hash (with every inner Hash replaced by a Data for
|
65
|
+
# dot access).
|
66
|
+
def parse
|
67
|
+
input = @stream || File.open(@path)
|
68
|
+
items = []
|
69
|
+
|
70
|
+
input.each_line do |line|
|
71
|
+
begin
|
72
|
+
intermediate = parser.parse_row_to_intermediate_hash(line)
|
73
|
+
next if intermediate.empty? # When the row is blank or a comment.
|
74
|
+
row_items = transformer.transform_intermediate_hash_to_item_hashes(intermediate)
|
75
|
+
rescue Reading::Error => e
|
76
|
+
raise e.class, "#{e.message} in the row \"#{line}\""
|
77
|
+
end
|
78
|
+
|
79
|
+
items += row_items
|
80
|
+
end
|
81
|
+
|
82
|
+
if hash_output
|
83
|
+
items
|
84
|
+
else
|
85
|
+
items.map { |item_hash| Item.new(item_hash, view: item_view) }
|
86
|
+
end
|
87
|
+
ensure
|
88
|
+
input&.close if input.respond_to?(:close)
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
# Checks on the given stream and path (arguments to #initialize).
|
94
|
+
# @raise [FileError] if the given path is invalid.
|
95
|
+
# @raise [ArgumentError] if both stream and path are nil.
|
96
|
+
def validate_path_or_stream(path, stream)
|
97
|
+
if stream && stream.respond_to?(:each_line)
|
98
|
+
return true
|
99
|
+
elsif path
|
100
|
+
if !File.exist?(path)
|
101
|
+
raise FileError, "File not found! #{path}"
|
102
|
+
elsif File.directory?(path)
|
103
|
+
raise FileError, "A file is expected, but the path given is a directory: #{path}"
|
104
|
+
end
|
105
|
+
else
|
106
|
+
raise ArgumentError,
|
107
|
+
"Either a file path or a stream (string, file, etc.) must be provided."
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,292 @@
|
|
1
|
+
require_relative "rows/regular"
|
2
|
+
require_relative "rows/compact_planned"
|
3
|
+
require_relative "rows/comment"
|
4
|
+
|
5
|
+
module Reading
|
6
|
+
module Parsing
|
7
|
+
#
|
8
|
+
# Parses a string containing a row of a CSV reading log, into a hash
|
9
|
+
# mirroring the structure of the row. This hash is an intermediate form and
|
10
|
+
# not the final item data. It's the raw material for Parsing::Transformer to
|
11
|
+
# generate the final item data.
|
12
|
+
#
|
13
|
+
# Below is an example intermediate hash parsed from this row, which has a Rating
|
14
|
+
# column, then a Head column containing an author, title, series, and extra info:
|
15
|
+
#
|
16
|
+
# 3|📕Thomas More - Utopia -- trans. Robert Adams -- ed. George Logan -- in Cambridge History of Political Thought
|
17
|
+
#
|
18
|
+
# {
|
19
|
+
# rating: { number: "1" },
|
20
|
+
# head: [{
|
21
|
+
# author: "Thomas More",
|
22
|
+
# title: "Utopia",
|
23
|
+
# series_names: ["Cambridge History of Political Thought"],
|
24
|
+
# series_volumes: [nil],
|
25
|
+
# extra_info: ["trans. Robert Adams", "ed. George Logan"],
|
26
|
+
# format: :print,
|
27
|
+
# }]
|
28
|
+
# }
|
29
|
+
#
|
30
|
+
# The hash's top-level keys are column names. The nested keys come from
|
31
|
+
# regex capture group names in each column (for this example, see ::regexes
|
32
|
+
# in rating.rb and head.rb in parsing/rows/regular_columns).
|
33
|
+
#
|
34
|
+
# All the rest is just details of how the parts of a column are joined:
|
35
|
+
#
|
36
|
+
# - The :head value is an array because Head.split_by_format? is
|
37
|
+
# true (because a Head column can potentially contain multiple items).
|
38
|
+
# That's also where { format: :print } comes from.
|
39
|
+
#
|
40
|
+
# - The :series_names and :series_volumes values are arrays because these
|
41
|
+
# keys are in Head.flatten_into_arrays, which causes the column's segments
|
42
|
+
# (separated by " -- ") to be merged into one hash.
|
43
|
+
#
|
44
|
+
class Parser
|
45
|
+
using Util::HashArrayDeepFetch
|
46
|
+
using Util::StringRemove
|
47
|
+
|
48
|
+
attr_reader :config
|
49
|
+
|
50
|
+
# @param config [Hash] an entire config.
|
51
|
+
def initialize(config)
|
52
|
+
@config = config
|
53
|
+
end
|
54
|
+
|
55
|
+
# Parses a row string into a hash that mirrors the structure of the row.
|
56
|
+
# @param string [String] a string containing a row of a CSV reading log.
|
57
|
+
# @return [Hash]
|
58
|
+
def parse_row_to_intermediate_hash(string)
|
59
|
+
columns = extract_columns(string)
|
60
|
+
|
61
|
+
if config.fetch(:skip_compact_planned) && columns.has_key?(Rows::CompactPlanned::Head)
|
62
|
+
return {}
|
63
|
+
end
|
64
|
+
|
65
|
+
columns.map { |column, column_string|
|
66
|
+
parse_column(column, column_string)
|
67
|
+
}.to_h
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
# Splits the row string by column and pairs them in a hash with column
|
73
|
+
# classes, which contain the information necessary to parse each column.
|
74
|
+
# @param string [String] a string containing a row of a CSV reading log.
|
75
|
+
# @return [Hash{Class => String}] a hash whose keys are classes inheriting
|
76
|
+
# Parsing::Rows::Column.
|
77
|
+
def extract_columns(string)
|
78
|
+
string = string.dup.force_encoding(Encoding::UTF_8)
|
79
|
+
column_strings = string.split(config.fetch(:column_separator))
|
80
|
+
|
81
|
+
row_types = [Rows::Regular, Rows::CompactPlanned, Rows::Comment]
|
82
|
+
column_classes = row_types
|
83
|
+
.find { |row_type| row_type.match?(string, config) }
|
84
|
+
.column_classes
|
85
|
+
.filter { |column_class|
|
86
|
+
config.fetch(:enabled_columns).include?(column_class.to_sym)
|
87
|
+
}
|
88
|
+
|
89
|
+
if !column_classes.count.zero? && column_strings.count > column_classes.count
|
90
|
+
raise TooManyColumnsError, "Too many columns"
|
91
|
+
end
|
92
|
+
|
93
|
+
column_classes
|
94
|
+
.zip(column_strings)
|
95
|
+
.reject { |_class, string| string.nil? }
|
96
|
+
.to_h
|
97
|
+
end
|
98
|
+
|
99
|
+
# Parses a column into an array of two elements (a key for the column name
|
100
|
+
# and a value of its contents).
|
101
|
+
# @param column_class [Class] a class inheriting Parsing::Rows::Column.
|
102
|
+
# @param column_string [String] a string containing a column from a row.
|
103
|
+
# @return [Array(Symbol, Hash), Array(Symbol, Array)]
|
104
|
+
def parse_column(column_class, column_string)
|
105
|
+
# Multiple format emojis are possible in some columns:
|
106
|
+
# - Head column, for multiple items.
|
107
|
+
# - Sources column, for multiple variants of an item.
|
108
|
+
# - Compact planned head column, for multiple items.
|
109
|
+
# This is the default case below the two guard clauses. It's more complex
|
110
|
+
# because there's possibly a string before the first format, and there's
|
111
|
+
# an extra level of nesting in the returned array.
|
112
|
+
|
113
|
+
# Simplest case: if the column is never split by format, return the
|
114
|
+
# column name and the parsed segment(s), which is either a Hash (if the
|
115
|
+
# column can't have multiple segments or if its segments are flattened)
|
116
|
+
# or an Array (if there are multiple segments and they're not flattened).
|
117
|
+
if !column_class.split_by_format?
|
118
|
+
parsed_column = parse_segments(column_class, column_string)
|
119
|
+
return [column_class.to_sym, parsed_column]
|
120
|
+
end
|
121
|
+
|
122
|
+
# Also simple: if the column *can* be split by format but in this row
|
123
|
+
# it doesn't contain any format emojis, return the same as above but
|
124
|
+
# with an extra level of nesting (except when the parsed result is nil).
|
125
|
+
if column_class.split_by_format? &&
|
126
|
+
!column_string.match?(config.deep_fetch(:regex, :formats))
|
127
|
+
|
128
|
+
parsed_column = parse_segments(column_class, column_string)
|
129
|
+
# Wrap a non-empty value in an array so that e.g. a head without
|
130
|
+
# emojis is still an array. This way the extra level of nesting can
|
131
|
+
# be consistently expected for columns that *can* be split by format.
|
132
|
+
parsed_column_nonempty_nested = [parsed_column.presence].compact
|
133
|
+
return [column_class.to_sym, parsed_column_nonempty_nested]
|
134
|
+
end
|
135
|
+
|
136
|
+
# The rest is the complex case: if the column *can and is* split by format.
|
137
|
+
|
138
|
+
# Each format plus the string after it.
|
139
|
+
format_strings = column_string.split(config.deep_fetch(:regex, :formats_split))
|
140
|
+
|
141
|
+
# If there's a string before the first format, e.g. "DNF" in Head column.
|
142
|
+
unless format_strings.first.match?(config.deep_fetch(:regex, :formats))
|
143
|
+
before_formats = parse_segment(column_class, format_strings.shift, before_formats: true)
|
144
|
+
end
|
145
|
+
|
146
|
+
# Parse each format-plus-string into an array of segments.
|
147
|
+
heads = format_strings.map { |string|
|
148
|
+
format_emoji = string[config.deep_fetch(:regex, :formats)]
|
149
|
+
string.remove!(format_emoji)
|
150
|
+
format = config.fetch(:formats).key(format_emoji)
|
151
|
+
|
152
|
+
parse_segments(column_class, string)
|
153
|
+
.merge(format: format)
|
154
|
+
}
|
155
|
+
|
156
|
+
# Combine values of conflicting keys so that in a compact planned
|
157
|
+
# Head column, sources from before_formats are not ignored.
|
158
|
+
if before_formats
|
159
|
+
heads.each do |head|
|
160
|
+
head.merge!(before_formats) do |k, old_v, new_v|
|
161
|
+
(new_v + old_v).uniq
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
[column_class.to_sym, heads]
|
167
|
+
end
|
168
|
+
|
169
|
+
# Parses a string of segments, e.g. "Utopia -- trans. Robert Adams -- ed. George Logan"
|
170
|
+
# @param column_class [Class] a class inheriting Parsing::Rows::Column.
|
171
|
+
# @param string [String] a string containing segments, which is either an
|
172
|
+
# entire column or (for columns that are split by format emoji) a string
|
173
|
+
# following a format emoji.
|
174
|
+
# @return [Array<Hash>, Hash] either an array of parsed segments (hashes),
|
175
|
+
# or a single hash if the column can't be split by segment or if the
|
176
|
+
# segments are flattened into one hash.
|
177
|
+
def parse_segments(column_class, string)
|
178
|
+
return {} if string.blank?
|
179
|
+
|
180
|
+
# If the column can't be split by segment, parse as a single segment.
|
181
|
+
if !column_class.split_by_segment?
|
182
|
+
return parse_segment(column_class, string)
|
183
|
+
end
|
184
|
+
|
185
|
+
# Add an extra level of nesting if the column can have segment groups,
|
186
|
+
# as in "2021/1/28..2/1 x4 -- ..2/3 x5 ---- 11/1 -- 11/2"
|
187
|
+
if column_class.split_by_segment_group?
|
188
|
+
segments = string
|
189
|
+
.split(column_class.segment_group_separator)
|
190
|
+
.map { |segment_group|
|
191
|
+
segment_group
|
192
|
+
.split(column_class.segment_separator)
|
193
|
+
.map.with_index { |segment, i|
|
194
|
+
parse_segment(column_class, segment, i)
|
195
|
+
}
|
196
|
+
}
|
197
|
+
else
|
198
|
+
segments = string
|
199
|
+
.split(column_class.segment_separator)
|
200
|
+
.map.with_index { |segment, i|
|
201
|
+
parse_segment(column_class, segment, i)
|
202
|
+
}
|
203
|
+
end
|
204
|
+
|
205
|
+
if column_class.flatten_into_arrays.any?
|
206
|
+
segments = segments.reduce { |merged, segment|
|
207
|
+
merged.merge!(segment) { |_k, old_v, new_v|
|
208
|
+
# old_v is already an array by this point, since its key should be
|
209
|
+
# in Column.flatten_into_arrays
|
210
|
+
old_v + new_v
|
211
|
+
}
|
212
|
+
}
|
213
|
+
end
|
214
|
+
|
215
|
+
segments
|
216
|
+
end
|
217
|
+
|
218
|
+
# Parses a segment using a regular expression from the column class.
|
219
|
+
# @param column_class [Class] a class inheriting Parsing::Rows::Column.
|
220
|
+
# @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
|
221
|
+
# @param segment_index [Integer] the position of the segment when it's in
|
222
|
+
# part of a series of segments; this can change which regular expressions
|
223
|
+
# are applicable to it.
|
224
|
+
# @param before_formats [Boolean] whether to use the before-formats regexes.
|
225
|
+
# @return [Hash{Symbol => Object}] the parsed segment, whose values are Strings
|
226
|
+
# unless changed via column_class.tweaks or column_class.flatten_into_arrays.
|
227
|
+
# Example: { author: "Bram Stoker", title: "Dracula"}
|
228
|
+
def parse_segment(column_class, segment, segment_index = 0, before_formats: false)
|
229
|
+
if before_formats
|
230
|
+
regexes = column_class.regexes_before_formats
|
231
|
+
else
|
232
|
+
regexes = column_class.regexes(segment_index)
|
233
|
+
end
|
234
|
+
|
235
|
+
parsed_segment = nil
|
236
|
+
regexes.each do |regex|
|
237
|
+
parsed_segment = parse_segment_with_regex(segment, regex)
|
238
|
+
break if parsed_segment
|
239
|
+
end
|
240
|
+
|
241
|
+
if parsed_segment.nil?
|
242
|
+
raise ParsingError, "Could not parse \"#{segment}\" in " \
|
243
|
+
"the #{column_class.column_name} column"
|
244
|
+
end
|
245
|
+
|
246
|
+
tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
|
247
|
+
end
|
248
|
+
|
249
|
+
# Parses a segment using the given regular expression.
|
250
|
+
# @param segment [String] a segment, e.g. "Bram Stoker - Dracula".
|
251
|
+
# @param regex [Regexp] the regular expression with which to parse the segment.
|
252
|
+
# @return [Hash{Symbol => String}] e.g. { author: "Bram Stoker", title: "Dracula"}
|
253
|
+
def parse_segment_with_regex(segment, regex)
|
254
|
+
segment
|
255
|
+
.tr(config.fetch(:ignored_characters), "")
|
256
|
+
.strip
|
257
|
+
.match(regex)
|
258
|
+
&.named_captures
|
259
|
+
&.compact
|
260
|
+
&.transform_keys(&:to_sym)
|
261
|
+
&.transform_values(&:strip)
|
262
|
+
&.transform_values(&:presence)
|
263
|
+
end
|
264
|
+
|
265
|
+
# Modify the values of the parsed segment according to column_class.tweaks,
|
266
|
+
# and wrap them in an array according to column_class.flatten_into_arrays.
|
267
|
+
# @param parsed_segment [Hash] e.g. { author: "Bram Stoker", title: "Dracula"}
|
268
|
+
# @return [Hash{Symbol => Object}]
|
269
|
+
def tweak_and_arrayify_parsed_segment(parsed_segment, column_class)
|
270
|
+
column_class.tweaks.each do |key, tweak|
|
271
|
+
if parsed_segment.has_key?(key)
|
272
|
+
parsed_segment[key] = tweak.call(parsed_segment[key])
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
# Ensure that values of keys in column_class.flatten_into_arrays are arrays.
|
277
|
+
column_class.flatten_into_arrays.each do |key|
|
278
|
+
if parsed_segment.has_key?(key)
|
279
|
+
val = parsed_segment[key]
|
280
|
+
# Not using Array(val) because that results in an empty array when
|
281
|
+
# val is nil, and the nil must be preserved for series name and
|
282
|
+
# volume arrays to line up with an equal number of elements (because
|
283
|
+
# the volume may be nil).
|
284
|
+
parsed_segment[key] = [val] if !val.is_a?(Array)
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
parsed_segment
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|