reading 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/reading +5 -5
- data/bin/readingfile +31 -0
- data/lib/reading/config.rb +115 -149
- data/lib/reading/errors.rb +10 -66
- data/lib/reading/item/time_length.rb +138 -0
- data/lib/reading/parsing/attributes/attribute.rb +26 -0
- data/lib/reading/parsing/attributes/author.rb +15 -0
- data/lib/reading/parsing/attributes/experiences/dates_and_head_transformer.rb +106 -0
- data/lib/reading/parsing/attributes/experiences/history_transformer.rb +452 -0
- data/lib/reading/parsing/attributes/experiences/spans_validator.rb +149 -0
- data/lib/reading/parsing/attributes/experiences.rb +27 -0
- data/lib/reading/parsing/attributes/genres.rb +16 -0
- data/lib/reading/parsing/attributes/notes.rb +22 -0
- data/lib/reading/parsing/attributes/rating.rb +17 -0
- data/lib/reading/parsing/attributes/shared.rb +62 -0
- data/lib/reading/parsing/attributes/title.rb +21 -0
- data/lib/reading/parsing/attributes/variants.rb +77 -0
- data/lib/reading/parsing/csv.rb +101 -0
- data/lib/reading/parsing/parser.rb +292 -0
- data/lib/reading/parsing/rows/column.rb +131 -0
- data/lib/reading/parsing/rows/comment.rb +26 -0
- data/lib/reading/parsing/rows/compact_planned.rb +30 -0
- data/lib/reading/parsing/rows/compact_planned_columns/head.rb +60 -0
- data/lib/reading/parsing/rows/regular.rb +33 -0
- data/lib/reading/parsing/rows/regular_columns/end_dates.rb +20 -0
- data/lib/reading/parsing/rows/regular_columns/genres.rb +20 -0
- data/lib/reading/parsing/rows/regular_columns/head.rb +45 -0
- data/lib/reading/parsing/rows/regular_columns/history.rb +143 -0
- data/lib/reading/parsing/rows/regular_columns/length.rb +35 -0
- data/lib/reading/parsing/rows/regular_columns/notes.rb +32 -0
- data/lib/reading/parsing/rows/regular_columns/rating.rb +15 -0
- data/lib/reading/parsing/rows/regular_columns/sources.rb +94 -0
- data/lib/reading/parsing/rows/regular_columns/start_dates.rb +35 -0
- data/lib/reading/parsing/transformer.rb +70 -0
- data/lib/reading/util/hash_compact_by_template.rb +1 -0
- data/lib/reading/util/hash_deep_merge.rb +1 -1
- data/lib/reading/util/hash_to_struct.rb +1 -0
- data/lib/reading/util/numeric_to_i_if_whole.rb +12 -0
- data/lib/reading/util/string_truncate.rb +13 -4
- data/lib/reading/version.rb +1 -1
- data/lib/reading.rb +18 -0
- metadata +58 -41
- data/lib/reading/attribute/all_attributes.rb +0 -83
- data/lib/reading/attribute/attribute.rb +0 -25
- data/lib/reading/attribute/experiences/dates_validator.rb +0 -94
- data/lib/reading/attribute/experiences/experiences_attribute.rb +0 -74
- data/lib/reading/attribute/experiences/progress_subattribute.rb +0 -48
- data/lib/reading/attribute/experiences/spans_subattribute.rb +0 -82
- data/lib/reading/attribute/variants/extra_info_subattribute.rb +0 -44
- data/lib/reading/attribute/variants/length_subattribute.rb +0 -45
- data/lib/reading/attribute/variants/series_subattribute.rb +0 -57
- data/lib/reading/attribute/variants/sources_subattribute.rb +0 -78
- data/lib/reading/attribute/variants/variants_attribute.rb +0 -69
- data/lib/reading/csv.rb +0 -76
- data/lib/reading/line.rb +0 -23
- data/lib/reading/row/blank_row.rb +0 -23
- data/lib/reading/row/compact_planned_row.rb +0 -130
- data/lib/reading/row/regular_row.rb +0 -99
- data/lib/reading/row/row.rb +0 -88
@@ -0,0 +1,143 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#history-column
|
6
|
+
class History < Column
|
7
|
+
def self.segment_separator
|
8
|
+
/\s*--\s*/
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.segment_group_separator
|
12
|
+
/\s*----\s*/
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.tweaks
|
16
|
+
{
|
17
|
+
except_dates: ->(dates_list) {
|
18
|
+
dates_list
|
19
|
+
.split(/\s*,\s*/)
|
20
|
+
.map { |date|
|
21
|
+
date.match(
|
22
|
+
%r{\A
|
23
|
+
#{START_END_DATES_REGEX}
|
24
|
+
\z}xo
|
25
|
+
)
|
26
|
+
&.named_captures
|
27
|
+
&.compact
|
28
|
+
&.transform_keys(&:to_sym)
|
29
|
+
&.presence
|
30
|
+
}
|
31
|
+
.compact
|
32
|
+
},
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.regexes(segment_index)
|
37
|
+
[
|
38
|
+
# entry of exception dates ("but not on these dates")
|
39
|
+
%r{\A
|
40
|
+
not
|
41
|
+
\s+
|
42
|
+
(?<except_dates>.+)
|
43
|
+
\z}x,
|
44
|
+
# normal entry
|
45
|
+
%r{\A
|
46
|
+
\(?\s*
|
47
|
+
# variant, group before first start date
|
48
|
+
(
|
49
|
+
(
|
50
|
+
v(?<variant>\d)
|
51
|
+
(\s+|\z)
|
52
|
+
)?
|
53
|
+
(
|
54
|
+
🤝🏼(?<group>.+?)
|
55
|
+
)?
|
56
|
+
(?=(\d{4}/)?\d\d?/\d\d?)
|
57
|
+
)?
|
58
|
+
# planned or dates
|
59
|
+
(
|
60
|
+
(
|
61
|
+
(?<planned>\?\?)
|
62
|
+
|
|
63
|
+
(#{START_END_DATES_REGEX})
|
64
|
+
)
|
65
|
+
(\s*\)?\s*\z|\s+)
|
66
|
+
)?
|
67
|
+
# progress
|
68
|
+
(
|
69
|
+
# requires the at symbol, unlike the shared progress regex in Column
|
70
|
+
# and also adds the done option
|
71
|
+
(
|
72
|
+
(DNF\s+)?@?(?<progress_percent>\d\d?)%
|
73
|
+
|
|
74
|
+
(DNF\s+)?@p?(?<progress_pages>\d+)p?
|
75
|
+
|
|
76
|
+
(DNF\s+)?@(?<progress_time>\d+:\d\d)
|
77
|
+
|
|
78
|
+
# just DNF
|
79
|
+
(?<progress_dnf>DNF)
|
80
|
+
|
|
81
|
+
# done
|
82
|
+
(?<progress_done>done)
|
83
|
+
)
|
84
|
+
(\s*\)?\s*\z|\s+)
|
85
|
+
)?
|
86
|
+
# amount, repetitions, frequency
|
87
|
+
(
|
88
|
+
(
|
89
|
+
p?(?<amount_pages>\d+)p?
|
90
|
+
|
|
91
|
+
(?<amount_time>\d+:\d\d)
|
92
|
+
)?
|
93
|
+
(
|
94
|
+
\s*
|
95
|
+
x(?<repetitions>\d+)
|
96
|
+
)?
|
97
|
+
(
|
98
|
+
/(?<frequency>day|week|month)
|
99
|
+
)?
|
100
|
+
(\s*\)?\s*\z|\s+)
|
101
|
+
)?
|
102
|
+
# favorite, name
|
103
|
+
(
|
104
|
+
(?<favorite>⭐)?
|
105
|
+
\s*
|
106
|
+
(?<name>[^\d].*)
|
107
|
+
)?
|
108
|
+
\z}xo,
|
109
|
+
]
|
110
|
+
end
|
111
|
+
|
112
|
+
private
|
113
|
+
|
114
|
+
START_END_DATES_REGEX =
|
115
|
+
%r{
|
116
|
+
(
|
117
|
+
(?<start_year>\d{4})
|
118
|
+
/
|
119
|
+
)?
|
120
|
+
(
|
121
|
+
(?<start_month>\d\d?)
|
122
|
+
/
|
123
|
+
)?
|
124
|
+
(?<start_day>\d\d?)?
|
125
|
+
(?<range>\.\.)?
|
126
|
+
(
|
127
|
+
(?<=\.\.)
|
128
|
+
(
|
129
|
+
(?<end_year>\d{4})
|
130
|
+
/
|
131
|
+
)?
|
132
|
+
(
|
133
|
+
(?<end_month>\d\d?)
|
134
|
+
/
|
135
|
+
)?
|
136
|
+
(?<end_day>\d\d?)?
|
137
|
+
)?
|
138
|
+
}x
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#length-column
|
6
|
+
class Length < Column
|
7
|
+
def self.regexes(segment_index)
|
8
|
+
[%r{\A
|
9
|
+
# length
|
10
|
+
(
|
11
|
+
(
|
12
|
+
(?<length_pages>\d+)p?
|
13
|
+
|
|
14
|
+
(?<length_time>\d+:\d\d)
|
15
|
+
)
|
16
|
+
(\s+|\z)
|
17
|
+
)
|
18
|
+
# each or repetitions, used in conjunction with the History column
|
19
|
+
(
|
20
|
+
# each
|
21
|
+
(?<each>each)
|
22
|
+
|
|
23
|
+
# repetitions
|
24
|
+
(
|
25
|
+
x
|
26
|
+
(?<repetitions>\d+)
|
27
|
+
)
|
28
|
+
)?
|
29
|
+
\z}x]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#notes-column
|
6
|
+
# and https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#notes-column-special-notes
|
7
|
+
class Notes < Column
|
8
|
+
def self.segment_separator
|
9
|
+
/\s*--\s*/
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.regexes(segment_index)
|
13
|
+
[
|
14
|
+
# blurb note
|
15
|
+
%r{\A
|
16
|
+
💬\s*(?<note_blurb>.+)
|
17
|
+
\z}x,
|
18
|
+
# private note
|
19
|
+
%r{\A
|
20
|
+
🔒\s*(?<note_private>.+)
|
21
|
+
\z}x,
|
22
|
+
# regular note
|
23
|
+
%r{\A
|
24
|
+
(?<note_regular>.+)
|
25
|
+
\z}x,
|
26
|
+
]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#rating-column
|
6
|
+
class Rating < Column
|
7
|
+
def self.regexes(segment_index)
|
8
|
+
# integer or float
|
9
|
+
[/\A(?<number>\d+\.?\d*)?\z/]
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#sources-column
|
6
|
+
# and https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#sources-column-variants
|
7
|
+
class Sources < Column
|
8
|
+
SOURCES_PARSING_ERRORS = {
|
9
|
+
"Missing comma before URL(s) in the Sources column" =>
|
10
|
+
->(source) {
|
11
|
+
source.match?(/\shttps?:\/\//) || source.scan(/https?:\/\//).count > 1
|
12
|
+
},
|
13
|
+
"The ISBN/ASIN must be placed after sources in the Sources column" =>
|
14
|
+
->(source) {
|
15
|
+
source.match?(/\A#{ISBN_REGEX}/o) || source.match(/\A#{ASIN_REGEX}/o)
|
16
|
+
},
|
17
|
+
}
|
18
|
+
|
19
|
+
|
20
|
+
def self.split_by_format?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.segment_separator
|
25
|
+
/\s*--\s*/
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.flatten_into_arrays
|
29
|
+
%i[extra_info series_names series_volumes]
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.tweaks
|
33
|
+
{
|
34
|
+
sources: -> {
|
35
|
+
sources = _1.split(/\s*,\s*/)
|
36
|
+
|
37
|
+
SOURCES_PARSING_ERRORS.each do |message, check|
|
38
|
+
if sources.any? { |source| check.call(source) }
|
39
|
+
raise ParsingError, message
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
sources
|
44
|
+
},
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.regexes(segment_index)
|
49
|
+
[
|
50
|
+
# ISBN/ASIN and length (without sources)
|
51
|
+
(%r{\A
|
52
|
+
(
|
53
|
+
(?<isbn>(\d{3}[-\s]?)?[A-Z\d]{10})
|
54
|
+
,?(\s+|\z)
|
55
|
+
)?
|
56
|
+
(
|
57
|
+
(?<length_pages>\d+)p?
|
58
|
+
|
|
59
|
+
(?<length_time>\d+:\d\d)
|
60
|
+
)?
|
61
|
+
\z}x if segment_index.zero?),
|
62
|
+
# sources, ISBN/ASIN, length
|
63
|
+
(%r{\A
|
64
|
+
(
|
65
|
+
(?<sources>.+?)
|
66
|
+
,?(\s+|\z)
|
67
|
+
)?
|
68
|
+
(
|
69
|
+
(
|
70
|
+
(?<isbn>#{ISBN_REGEX})
|
71
|
+
|
|
72
|
+
(?<asin>#{ASIN_REGEX})
|
73
|
+
)
|
74
|
+
,?(\s+|\z)
|
75
|
+
)?
|
76
|
+
(
|
77
|
+
(?<length_pages>\d+)p?
|
78
|
+
|
|
79
|
+
(?<length_time>\d+:\d\d)
|
80
|
+
)?
|
81
|
+
\z}xo if segment_index.zero?),
|
82
|
+
*Column::SHARED_REGEXES[:series_and_extra_info],
|
83
|
+
].compact
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
ISBN_REGEX = /(\d{3}[-\s]?)?\d{10}/
|
89
|
+
ASIN_REGEX = /B0[A-Z\d]{8}/
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Reading
|
2
|
+
module Parsing
|
3
|
+
module Rows
|
4
|
+
module Regular
|
5
|
+
# See https://github.com/fpsvogel/reading/blob/main/doc/csv-format.md#start-dates-and-end-dates-columns
|
6
|
+
class StartDates < Column
|
7
|
+
def self.segment_separator
|
8
|
+
/,\s*/
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.regexes(segment_index)
|
12
|
+
# dnf/progress, date, variant number, group
|
13
|
+
[%r{\A
|
14
|
+
(
|
15
|
+
#{Column::SHARED_REGEXES[:progress]}
|
16
|
+
(\s+|\z)
|
17
|
+
)?
|
18
|
+
(
|
19
|
+
(?<date>\d{4}/\d\d?/\d\d?)
|
20
|
+
(\s+|\z)
|
21
|
+
)?
|
22
|
+
(
|
23
|
+
v(?<variant>\d)
|
24
|
+
(\s+|\z)
|
25
|
+
)?
|
26
|
+
(
|
27
|
+
🤝🏼(?<group>.+)
|
28
|
+
)?
|
29
|
+
\z}x]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require_relative "attributes/shared"
|
2
|
+
require_relative "attributes/attribute"
|
3
|
+
require_relative "attributes/rating"
|
4
|
+
require_relative "attributes/author"
|
5
|
+
require_relative "attributes/title"
|
6
|
+
require_relative "attributes/genres"
|
7
|
+
require_relative "attributes/variants"
|
8
|
+
require_relative "attributes/experiences"
|
9
|
+
require_relative "attributes/notes"
|
10
|
+
|
11
|
+
module Reading
|
12
|
+
module Parsing
|
13
|
+
#
|
14
|
+
# Transforms an intermediate hash (parsed from a CSV row) into item data.
|
15
|
+
# While the intermediate hash mirrors the structure of a row, the output of
|
16
|
+
# Transformer is based around item attributes, which are listed in
|
17
|
+
# Config#default_config[:item_template] and in the files in parsing/attributes.
|
18
|
+
#
|
19
|
+
class Transformer
|
20
|
+
using Util::HashArrayDeepFetch
|
21
|
+
using Util::HashCompactByTemplate
|
22
|
+
|
23
|
+
attr_reader :config
|
24
|
+
private attr_reader :attributes
|
25
|
+
|
26
|
+
# @param config [Hash] an entire config.
|
27
|
+
def initialize(config)
|
28
|
+
@config = config
|
29
|
+
|
30
|
+
set_attributes
|
31
|
+
end
|
32
|
+
|
33
|
+
# Transforms the intermediate hash of a row into item data.
|
34
|
+
# @param parsed_row [Hash{Symbol => Hash, Array}] output from
|
35
|
+
# Parsing::Parser#parse_row_to_intermediate_hash.
|
36
|
+
# @return [Array<Hash>] an array of Hashes like the template in
|
37
|
+
# Config#default_config[:item_template].
|
38
|
+
def transform_intermediate_hash_to_item_hashes(parsed_row)
|
39
|
+
if parsed_row[:head].blank?
|
40
|
+
raise InvalidHeadError, "Blank or missing Head column"
|
41
|
+
end
|
42
|
+
|
43
|
+
template = config.fetch(:item_template)
|
44
|
+
|
45
|
+
parsed_row[:head].map.with_index { |_head, head_index|
|
46
|
+
template.map { |attribute_name, default_value|
|
47
|
+
attribute = attributes.fetch(attribute_name)
|
48
|
+
transformed_value = attribute.transform_from_parsed(parsed_row, head_index)
|
49
|
+
|
50
|
+
[attribute_name, transformed_value || default_value]
|
51
|
+
}.to_h
|
52
|
+
.compact_by(template:)
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# Sets the attributes classes which do all the transforming work.
|
59
|
+
# See parsing/attributes/*.
|
60
|
+
def set_attributes
|
61
|
+
@attributes ||= config.fetch(:item_template).map { |attribute_name, _default|
|
62
|
+
attribute_name_camelcase = attribute_name.to_s.split("_").map(&:capitalize).join
|
63
|
+
attribute_class = Attributes.const_get(attribute_name_camelcase)
|
64
|
+
|
65
|
+
[attribute_name, attribute_class.new(config)]
|
66
|
+
}.to_h
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -9,6 +9,7 @@ module Reading
|
|
9
9
|
# If no parsed data has been added to the template values for these, they
|
10
10
|
# are considered blank, and are replaced with an empty array so that their
|
11
11
|
# emptiness is more apparent, e.g. item[:experiences].empty? will return true.
|
12
|
+
# @return [Hash]
|
12
13
|
def compact_by(template:)
|
13
14
|
map { |key, val|
|
14
15
|
if is_array_of_hashes?(val)
|
@@ -3,6 +3,7 @@ module Reading
|
|
3
3
|
# Converts a Hash to a Struct. Converts inner hashes (and inner arrays of hashes) as well.
|
4
4
|
module HashToStruct
|
5
5
|
refine Hash do
|
6
|
+
# @return [Struct]
|
6
7
|
def to_struct
|
7
8
|
MEMOIZED_STRUCTS[keys] ||= Struct.new(*keys)
|
8
9
|
struct_class = MEMOIZED_STRUCTS[keys]
|
@@ -1,13 +1,22 @@
|
|
1
1
|
module Reading
|
2
2
|
module Util
|
3
|
+
# Shortens the String to a given length.
|
3
4
|
module StringTruncate
|
4
5
|
refine String do
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
# @param length [Integer]
|
7
|
+
# @return [String]
|
8
|
+
def truncate(length)
|
9
|
+
if length < self.length - ELLIPSIS.length
|
10
|
+
"#{self[0...length]}#{ELLIPSIS}"
|
11
|
+
else
|
12
|
+
self
|
13
|
+
end
|
9
14
|
end
|
10
15
|
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
ELLIPSIS = "...".freeze
|
11
20
|
end
|
12
21
|
end
|
13
22
|
end
|
data/lib/reading/version.rb
CHANGED
data/lib/reading.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative "reading/parsing/csv"
|
2
|
+
require_relative "reading/item/time_length.rb"
|
3
|
+
|
4
|
+
# The gem's public API. See https://github.com/fpsvogel/reading#usage
|
5
|
+
|
6
|
+
module Reading
|
7
|
+
# Parses a CSV file or string. See Parsing::CSV#initialize and #parse for details.
|
8
|
+
def self.parse(...)
|
9
|
+
csv = Parsing::CSV.new(...)
|
10
|
+
csv.parse
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param string [String] a time duration in "h:mm" format.
|
14
|
+
# @return [Reading::Item::TimeLength]
|
15
|
+
def self.time(string)
|
16
|
+
Reading::Item::TimeLength.parse(string)
|
17
|
+
end
|
18
|
+
end
|