dreader 0.5.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.org +92 -0
- data/Gemfile.lock +20 -7
- data/README.org +821 -0
- data/dreader.gemspec +6 -4
- data/examples/age/age.rb +41 -25
- data/examples/age_with_multiple_checks/Birthdays.ods +0 -0
- data/examples/age_with_multiple_checks/age_with_multiple_checks.rb +64 -0
- data/examples/local_vars/local_vars.rb +28 -0
- data/examples/template/template_generation.rb +37 -0
- data/examples/wikipedia_big_us_cities/big_us_cities.rb +24 -20
- data/examples/wikipedia_us_cities/us_cities.rb +31 -28
- data/examples/wikipedia_us_cities/us_cities_bulk_declare.rb +25 -23
- data/lib/dreader/column.rb +39 -0
- data/lib/dreader/engine.rb +495 -0
- data/lib/dreader/options.rb +16 -0
- data/lib/dreader/util.rb +86 -0
- data/lib/dreader/version.rb +1 -1
- data/lib/dreader.rb +5 -411
- metadata +60 -24
- data/Changelog.org +0 -20
- data/README.md +0 -469
data/dreader.gemspec
CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |spec|
|
|
7
7
|
spec.name = "dreader"
|
8
8
|
spec.version = Dreader::VERSION
|
9
9
|
spec.authors = ["Adolfo Villafiorita"]
|
10
|
-
spec.email = ["adolfo
|
10
|
+
spec.email = ["adolfo@shair.tech"]
|
11
11
|
|
12
12
|
spec.summary = %q{Process and import data from cvs and spreadsheets}
|
13
13
|
spec.description = %q{Use this gem to specify the structure of some tabular data
|
@@ -19,7 +19,7 @@ Rails application, but the gem can used in any Ruby application.
|
|
19
19
|
|
20
20
|
The gem should be relatively easy to use, despite its name. (Dread
|
21
21
|
stands for *d*ata *r*eader)}
|
22
|
-
spec.homepage = "https://
|
22
|
+
spec.homepage = "https://redmine.shair.tech/projects/dreader"
|
23
23
|
spec.license = "MIT"
|
24
24
|
|
25
25
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
@@ -29,8 +29,10 @@ stands for *d*ata *r*eader)}
|
|
29
29
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
30
30
|
spec.require_paths = ["lib"]
|
31
31
|
|
32
|
+
spec.add_runtime_dependency "roo"
|
33
|
+
spec.add_runtime_dependency "fast_excel"
|
34
|
+
|
35
|
+
spec.add_development_dependency "debug", ">= 1.0.0"
|
32
36
|
spec.add_development_dependency "bundler", "~> 1.16"
|
33
37
|
spec.add_development_dependency "rake", "~> 10.0"
|
34
|
-
|
35
|
-
spec.add_runtime_dependency "roo"
|
36
38
|
end
|
data/examples/age/age.rb
CHANGED
@@ -1,39 +1,55 @@
|
|
1
|
-
require
|
1
|
+
require "dreader"
|
2
2
|
|
3
|
-
|
3
|
+
class Reader
|
4
|
+
extend Dreader::Engine
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
|
6
|
+
options do
|
7
|
+
first_row 2
|
8
|
+
debug true
|
9
|
+
end
|
8
10
|
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
column :name do
|
12
|
+
doc "A is the name string"
|
13
|
+
colref 'A'
|
14
|
+
end
|
12
15
|
|
13
|
-
|
14
|
-
|
16
|
+
column :birthdate do
|
17
|
+
doc "Birthdate contains a full date (i.e., including the year)"
|
18
|
+
colref 'B'
|
15
19
|
|
16
|
-
|
17
|
-
|
20
|
+
process do |c|
|
21
|
+
Date.parse(c)
|
22
|
+
end
|
18
23
|
end
|
19
|
-
end
|
20
24
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
25
|
+
virtual_column :age do
|
26
|
+
process do |row|
|
27
|
+
birthdate = row[:birthdate][:value]
|
28
|
+
birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
|
29
|
+
today = Date.today
|
26
30
|
|
27
|
-
|
31
|
+
[0, today.year - birthdate.year - (birthday < today ? 1 : 0)].max
|
32
|
+
end
|
28
33
|
end
|
29
|
-
end
|
30
34
|
|
31
|
-
|
32
|
-
|
33
|
-
|
35
|
+
mapping do |row|
|
36
|
+
r = Dreader::Util.simplify(row)
|
37
|
+
puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
|
38
|
+
end
|
34
39
|
end
|
35
40
|
|
41
|
+
i = Reader
|
36
42
|
i.read filename: "Birthdays.ods"
|
37
43
|
i.virtual_columns
|
38
|
-
i.
|
39
|
-
|
44
|
+
i.mappings
|
45
|
+
|
46
|
+
#
|
47
|
+
# Here we can do further processing on the data
|
48
|
+
#
|
49
|
+
File.open("ages.txt", "w") do |file|
|
50
|
+
i.table.each do |row|
|
51
|
+
unless row[:row_errors].any?
|
52
|
+
file.puts "#{row[:name][:value]} #{row[:age][:value]}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
Binary file
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'dreader'
|
2
|
+
|
3
|
+
class Reader
|
4
|
+
extend Dreader::Engine
|
5
|
+
|
6
|
+
options { first_row 2; debug true }
|
7
|
+
|
8
|
+
#
|
9
|
+
# Here we intentionally fail on good data
|
10
|
+
#
|
11
|
+
column :name do
|
12
|
+
doc "A is the name string"
|
13
|
+
|
14
|
+
colref 'A'
|
15
|
+
|
16
|
+
check_raw :name_too_short do |cell|
|
17
|
+
cell&.size < 7
|
18
|
+
end
|
19
|
+
|
20
|
+
check_raw :name_too_long do |cell|
|
21
|
+
cell&.size > 10
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
column :birthdate do
|
26
|
+
doc "Birthdate contains a full date (i.e., including the year)"
|
27
|
+
colref 'B'
|
28
|
+
|
29
|
+
process do |c|
|
30
|
+
Date.parse(c)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
virtual_column :age do
|
35
|
+
process do |row|
|
36
|
+
birthdate = row[:birthdate][:value]
|
37
|
+
birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
|
38
|
+
today = Date.today
|
39
|
+
|
40
|
+
[0, today.year - birthdate.year - (birthday < today ? 1 : 0)].max
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
mapping do |row|
|
45
|
+
errors_for_row = row[:row_errors]
|
46
|
+
|
47
|
+
if errors_for_row.any?
|
48
|
+
puts "ATTENTION: Record at row #{row[:row_number]} has the following errors:"
|
49
|
+
errors_for_row.each do |error|
|
50
|
+
puts " " + error[:message].to_s
|
51
|
+
end
|
52
|
+
else
|
53
|
+
r = Dreader::Util.simplify(row)
|
54
|
+
puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
i = Reader
|
60
|
+
|
61
|
+
i.read filename: "Birthdays.ods"
|
62
|
+
i.virtual_columns
|
63
|
+
i.mappings
|
64
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#
|
2
|
+
# This demonstrates that variables are local
|
3
|
+
#
|
4
|
+
|
5
|
+
require "dreader"
|
6
|
+
|
7
|
+
class OneReader
|
8
|
+
extend Dreader::Engine
|
9
|
+
|
10
|
+
options do
|
11
|
+
first_row 2
|
12
|
+
debug true
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class AnotherReader
|
17
|
+
extend Dreader::Engine
|
18
|
+
|
19
|
+
options do
|
20
|
+
filename "filename"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
r1 = OneReader
|
25
|
+
r2 = AnotherReader
|
26
|
+
|
27
|
+
puts r1.declared_options
|
28
|
+
puts r2.declared_options
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'dreader'
|
2
|
+
|
3
|
+
i = Dreader::Engine.new
|
4
|
+
|
5
|
+
i.options do
|
6
|
+
first_row 2
|
7
|
+
end
|
8
|
+
|
9
|
+
i.column :name do
|
10
|
+
doc "A is the name string"
|
11
|
+
colref 'A'
|
12
|
+
end
|
13
|
+
|
14
|
+
i.column :birthdate do
|
15
|
+
doc "Birthdate contains a full date (i.e., including the year)"
|
16
|
+
colref 'B'
|
17
|
+
|
18
|
+
process do |c|
|
19
|
+
Date.parse(c)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
i.column :place_of_birth do
|
24
|
+
doc "This is a string, a city (the interesting part is that it is in column E"
|
25
|
+
colref 'E'
|
26
|
+
end
|
27
|
+
|
28
|
+
i.example({ name: "Galileo Galilei", birthdate: "15/02/1564", place_of_birth: "Pisa" })
|
29
|
+
i.example({ name: "Cyrus McCormick", birthdate: "15/02/1809" })
|
30
|
+
i.example({ name: "Charles Lewis Tiffany", birthdate: "15/02/1812" })
|
31
|
+
i.example({ name: "Ernest Shackleton", birthdate: "15/02/1874" })
|
32
|
+
i.example({ name: "Yelena Bonner", birthdate: "15/02/1923" })
|
33
|
+
i.example({ name: "Matt Groening", birthdate: "15/02/1954", place_of_birth: "USA" })
|
34
|
+
|
35
|
+
# only xlsx at the moment
|
36
|
+
i.generate_template template_filename: "birthdays.xlsx"
|
37
|
+
|
@@ -2,29 +2,33 @@
|
|
2
2
|
|
3
3
|
require 'dreader'
|
4
4
|
|
5
|
-
|
5
|
+
class Processor
|
6
|
+
extend Dreader::Engine
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
end
|
8
|
+
options do
|
9
|
+
first_row 2
|
10
|
+
filename "cities_by_state.ods"
|
11
|
+
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
end
|
13
|
+
column :state do |col|
|
14
|
+
col.colref 'A'
|
15
|
+
end
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
column :cities do |col|
|
18
|
+
col.colref 'B'
|
19
|
+
col.check do |data|
|
20
|
+
data.class == Integer
|
21
|
+
end
|
20
22
|
end
|
21
|
-
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
24
|
+
mapping do |row|
|
25
|
+
hash = Dreader::Util.simplify row
|
26
|
+
puts " states: #{hash[:state]} number of cities: #{hash[:cities]}"
|
27
|
+
end
|
26
28
|
end
|
27
29
|
|
30
|
+
processor = Processor
|
31
|
+
|
28
32
|
printf "Loading the spreadsheet..."
|
29
33
|
processor.load
|
30
34
|
puts "done!"
|
@@ -41,8 +45,8 @@ else
|
|
41
45
|
end
|
42
46
|
puts "done!"
|
43
47
|
|
44
|
-
puts "
|
45
|
-
processor.
|
48
|
+
puts "Applying mapping rules to the spreadsheet..."
|
49
|
+
processor.mappings
|
46
50
|
puts "... done"
|
47
|
-
|
48
|
-
|
51
|
+
|
52
|
+
|
@@ -13,43 +13,46 @@ class City
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
class Importer
|
17
|
+
extend Dreader::Engine
|
18
|
+
|
19
|
+
# read from us_cities.tsv, lines from 2 to 10 (included)
|
20
|
+
options do
|
21
|
+
filename "us_cities.tsv"
|
22
|
+
first_row 2
|
23
|
+
last_row 10
|
24
|
+
end
|
24
25
|
|
25
|
-
# these are the columns for which we only need to specify column and name
|
26
|
-
[[:city, 2], [:state, 3], [:latlon, 11]].each do |val|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
26
|
+
# these are the columns for which we only need to specify column and name
|
27
|
+
[[:city, 2], [:state, 3], [:latlon, 11]].each do |val|
|
28
|
+
# val[0] -> :city
|
29
|
+
# val[1] -> 2
|
30
|
+
column val[0] do |col|
|
31
|
+
col.colref val[1]
|
32
|
+
col.process do |val|
|
33
|
+
val.strip
|
34
|
+
end
|
33
35
|
end
|
34
36
|
end
|
35
|
-
end
|
36
37
|
|
37
|
-
# the population column requires more work
|
38
|
-
|
39
|
-
|
38
|
+
# the population column requires more work
|
39
|
+
column :population do |col|
|
40
|
+
col.colref 4
|
40
41
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
# make "3,000" into 3000 (int)
|
43
|
+
col.process do |value|
|
44
|
+
value.gsub(",", "").to_i
|
45
|
+
end
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
47
|
+
col.check do |value|
|
48
|
+
value > 0
|
49
|
+
end
|
49
50
|
|
51
|
+
end
|
50
52
|
end
|
51
53
|
|
52
54
|
cities = []
|
55
|
+
importer = Importer
|
53
56
|
|
54
57
|
importer.mapping do |row|
|
55
58
|
# remove all additional information stored in each cell
|
@@ -80,7 +83,7 @@ importer.debug process: false, check: false
|
|
80
83
|
# load and process
|
81
84
|
importer.load
|
82
85
|
cities = []
|
83
|
-
importer.
|
86
|
+
importer.mappings
|
84
87
|
|
85
88
|
# output everything to see whether it works
|
86
89
|
puts "First ten cities in the US (source Wikipedia)"
|
@@ -13,36 +13,38 @@ class City
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
class Importer
|
17
|
+
extend Dreader::Engine
|
18
|
+
|
19
|
+
# read from us_cities.tsv, lines from 2 to 10 (included)
|
20
|
+
options do
|
21
|
+
filename "us_cities.tsv"
|
22
|
+
first_row 2
|
23
|
+
last_row 10
|
24
|
+
end
|
24
25
|
|
25
|
-
# these are the columns for which we only need to specify column and name
|
26
|
-
|
27
|
-
|
28
|
-
end
|
26
|
+
# these are the columns for which we only need to specify column and name
|
27
|
+
columns ({city: 2, state: 3, latlon: 11}) do
|
28
|
+
process { |val| val.strip }
|
29
|
+
end
|
29
30
|
|
30
|
-
# the population column requires more work
|
31
|
-
|
32
|
-
|
31
|
+
# the population column requires more work
|
32
|
+
column :population do |col|
|
33
|
+
col.colref 4
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
35
|
+
# make "3,000" into 3000 (int)
|
36
|
+
col.process do |value|
|
37
|
+
value.gsub(",", "").to_i
|
38
|
+
end
|
38
39
|
|
39
|
-
|
40
|
-
|
40
|
+
col.check do |value|
|
41
|
+
value > 0
|
42
|
+
end
|
41
43
|
end
|
42
|
-
|
43
44
|
end
|
44
45
|
|
45
46
|
cities = []
|
47
|
+
importer = Importer
|
46
48
|
|
47
49
|
importer.mapping do |row|
|
48
50
|
# remove all additional information stored in each cell
|
@@ -73,7 +75,7 @@ importer.debug process: false, check: false
|
|
73
75
|
# load and process
|
74
76
|
importer.load
|
75
77
|
cities = []
|
76
|
-
importer.
|
78
|
+
importer.mappings
|
77
79
|
|
78
80
|
# output everything to see whether it works
|
79
81
|
puts "First ten cities in the US (source Wikipedia)"
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Dreader
|
2
|
+
# service class to implement the column DSL language
|
3
|
+
class Column
|
4
|
+
def initialize
|
5
|
+
@checks_raw = {}
|
6
|
+
@checks = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def doc(doc)
|
10
|
+
@doc = doc
|
11
|
+
end
|
12
|
+
|
13
|
+
def colref(colref)
|
14
|
+
@colref = colref
|
15
|
+
end
|
16
|
+
|
17
|
+
def check_raw(message = :unnamed_check, &block)
|
18
|
+
@checks_raw[message] = block
|
19
|
+
end
|
20
|
+
|
21
|
+
def process(&block)
|
22
|
+
@process = block
|
23
|
+
end
|
24
|
+
|
25
|
+
def check(message = :unnamed_check, &block)
|
26
|
+
@checks[message] = block
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_hash
|
30
|
+
{
|
31
|
+
checks_raw: @checks_raw,
|
32
|
+
process: @process,
|
33
|
+
checks: @checks,
|
34
|
+
colref: @colref,
|
35
|
+
doc: @doc
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|