dreader 0.5.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.org +92 -0
- data/Gemfile.lock +20 -7
- data/README.org +821 -0
- data/dreader.gemspec +6 -4
- data/examples/age/age.rb +41 -25
- data/examples/age_with_multiple_checks/Birthdays.ods +0 -0
- data/examples/age_with_multiple_checks/age_with_multiple_checks.rb +64 -0
- data/examples/local_vars/local_vars.rb +28 -0
- data/examples/template/template_generation.rb +37 -0
- data/examples/wikipedia_big_us_cities/big_us_cities.rb +24 -20
- data/examples/wikipedia_us_cities/us_cities.rb +31 -28
- data/examples/wikipedia_us_cities/us_cities_bulk_declare.rb +25 -23
- data/lib/dreader/column.rb +39 -0
- data/lib/dreader/engine.rb +495 -0
- data/lib/dreader/options.rb +16 -0
- data/lib/dreader/util.rb +86 -0
- data/lib/dreader/version.rb +1 -1
- data/lib/dreader.rb +5 -411
- metadata +60 -24
- data/Changelog.org +0 -20
- data/README.md +0 -469
data/dreader.gemspec
CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |spec|
|
|
7
7
|
spec.name = "dreader"
|
8
8
|
spec.version = Dreader::VERSION
|
9
9
|
spec.authors = ["Adolfo Villafiorita"]
|
10
|
-
spec.email = ["adolfo
|
10
|
+
spec.email = ["adolfo@shair.tech"]
|
11
11
|
|
12
12
|
spec.summary = %q{Process and import data from cvs and spreadsheets}
|
13
13
|
spec.description = %q{Use this gem to specify the structure of some tabular data
|
@@ -19,7 +19,7 @@ Rails application, but the gem can used in any Ruby application.
|
|
19
19
|
|
20
20
|
The gem should be relatively easy to use, despite its name. (Dread
|
21
21
|
stands for *d*ata *r*eader)}
|
22
|
-
spec.homepage = "https://
|
22
|
+
spec.homepage = "https://redmine.shair.tech/projects/dreader"
|
23
23
|
spec.license = "MIT"
|
24
24
|
|
25
25
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
@@ -29,8 +29,10 @@ stands for *d*ata *r*eader)}
|
|
29
29
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
30
30
|
spec.require_paths = ["lib"]
|
31
31
|
|
32
|
+
spec.add_runtime_dependency "roo"
|
33
|
+
spec.add_runtime_dependency "fast_excel"
|
34
|
+
|
35
|
+
spec.add_development_dependency "debug", ">= 1.0.0"
|
32
36
|
spec.add_development_dependency "bundler", "~> 1.16"
|
33
37
|
spec.add_development_dependency "rake", "~> 10.0"
|
34
|
-
|
35
|
-
spec.add_runtime_dependency "roo"
|
36
38
|
end
|
data/examples/age/age.rb
CHANGED
@@ -1,39 +1,55 @@
|
|
1
|
-
require
|
1
|
+
require "dreader"
|
2
2
|
|
3
|
-
|
3
|
+
class Reader
|
4
|
+
extend Dreader::Engine
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
|
6
|
+
options do
|
7
|
+
first_row 2
|
8
|
+
debug true
|
9
|
+
end
|
8
10
|
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
column :name do
|
12
|
+
doc "A is the name string"
|
13
|
+
colref 'A'
|
14
|
+
end
|
12
15
|
|
13
|
-
|
14
|
-
|
16
|
+
column :birthdate do
|
17
|
+
doc "Birthdate contains a full date (i.e., including the year)"
|
18
|
+
colref 'B'
|
15
19
|
|
16
|
-
|
17
|
-
|
20
|
+
process do |c|
|
21
|
+
Date.parse(c)
|
22
|
+
end
|
18
23
|
end
|
19
|
-
end
|
20
24
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
25
|
+
virtual_column :age do
|
26
|
+
process do |row|
|
27
|
+
birthdate = row[:birthdate][:value]
|
28
|
+
birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
|
29
|
+
today = Date.today
|
26
30
|
|
27
|
-
|
31
|
+
[0, today.year - birthdate.year - (birthday < today ? 1 : 0)].max
|
32
|
+
end
|
28
33
|
end
|
29
|
-
end
|
30
34
|
|
31
|
-
|
32
|
-
|
33
|
-
|
35
|
+
mapping do |row|
|
36
|
+
r = Dreader::Util.simplify(row)
|
37
|
+
puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
|
38
|
+
end
|
34
39
|
end
|
35
40
|
|
41
|
+
i = Reader
|
36
42
|
i.read filename: "Birthdays.ods"
|
37
43
|
i.virtual_columns
|
38
|
-
i.
|
39
|
-
|
44
|
+
i.mappings
|
45
|
+
|
46
|
+
#
|
47
|
+
# Here we can do further processing on the data
|
48
|
+
#
|
49
|
+
File.open("ages.txt", "w") do |file|
|
50
|
+
i.table.each do |row|
|
51
|
+
unless row[:row_errors].any?
|
52
|
+
file.puts "#{row[:name][:value]} #{row[:age][:value]}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
Binary file
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'dreader'
|
2
|
+
|
3
|
+
class Reader
|
4
|
+
extend Dreader::Engine
|
5
|
+
|
6
|
+
options { first_row 2; debug true }
|
7
|
+
|
8
|
+
#
|
9
|
+
# Here we intentionally fail on good data
|
10
|
+
#
|
11
|
+
column :name do
|
12
|
+
doc "A is the name string"
|
13
|
+
|
14
|
+
colref 'A'
|
15
|
+
|
16
|
+
check_raw :name_too_short do |cell|
|
17
|
+
cell&.size < 7
|
18
|
+
end
|
19
|
+
|
20
|
+
check_raw :name_too_long do |cell|
|
21
|
+
cell&.size > 10
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
column :birthdate do
|
26
|
+
doc "Birthdate contains a full date (i.e., including the year)"
|
27
|
+
colref 'B'
|
28
|
+
|
29
|
+
process do |c|
|
30
|
+
Date.parse(c)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
virtual_column :age do
|
35
|
+
process do |row|
|
36
|
+
birthdate = row[:birthdate][:value]
|
37
|
+
birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
|
38
|
+
today = Date.today
|
39
|
+
|
40
|
+
[0, today.year - birthdate.year - (birthday < today ? 1 : 0)].max
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
mapping do |row|
|
45
|
+
errors_for_row = row[:row_errors]
|
46
|
+
|
47
|
+
if errors_for_row.any?
|
48
|
+
puts "ATTENTION: Record at row #{row[:row_number]} has the following errors:"
|
49
|
+
errors_for_row.each do |error|
|
50
|
+
puts " " + error[:message].to_s
|
51
|
+
end
|
52
|
+
else
|
53
|
+
r = Dreader::Util.simplify(row)
|
54
|
+
puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
i = Reader
|
60
|
+
|
61
|
+
i.read filename: "Birthdays.ods"
|
62
|
+
i.virtual_columns
|
63
|
+
i.mappings
|
64
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#
|
2
|
+
# This demonstrates that variables are local
|
3
|
+
#
|
4
|
+
|
5
|
+
require "dreader"
|
6
|
+
|
7
|
+
class OneReader
|
8
|
+
extend Dreader::Engine
|
9
|
+
|
10
|
+
options do
|
11
|
+
first_row 2
|
12
|
+
debug true
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class AnotherReader
|
17
|
+
extend Dreader::Engine
|
18
|
+
|
19
|
+
options do
|
20
|
+
filename "filename"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
r1 = OneReader
|
25
|
+
r2 = AnotherReader
|
26
|
+
|
27
|
+
puts r1.declared_options
|
28
|
+
puts r2.declared_options
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'dreader'
|
2
|
+
|
3
|
+
i = Dreader::Engine.new
|
4
|
+
|
5
|
+
i.options do
|
6
|
+
first_row 2
|
7
|
+
end
|
8
|
+
|
9
|
+
i.column :name do
|
10
|
+
doc "A is the name string"
|
11
|
+
colref 'A'
|
12
|
+
end
|
13
|
+
|
14
|
+
i.column :birthdate do
|
15
|
+
doc "Birthdate contains a full date (i.e., including the year)"
|
16
|
+
colref 'B'
|
17
|
+
|
18
|
+
process do |c|
|
19
|
+
Date.parse(c)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
i.column :place_of_birth do
|
24
|
+
doc "This is a string, a city (the interesting part is that it is in column E"
|
25
|
+
colref 'E'
|
26
|
+
end
|
27
|
+
|
28
|
+
i.example({ name: "Galileo Galilei", birthdate: "15/02/1564", place_of_birth: "Pisa" })
|
29
|
+
i.example({ name: "Cyrus McCormick", birthdate: "15/02/1809" })
|
30
|
+
i.example({ name: "Charles Lewis Tiffany", birthdate: "15/02/1812" })
|
31
|
+
i.example({ name: "Ernest Shackleton", birthdate: "15/02/1874" })
|
32
|
+
i.example({ name: "Yelena Bonner", birthdate: "15/02/1923" })
|
33
|
+
i.example({ name: "Matt Groening", birthdate: "15/02/1954", place_of_birth: "USA" })
|
34
|
+
|
35
|
+
# only xlsx at the moment
|
36
|
+
i.generate_template template_filename: "birthdays.xlsx"
|
37
|
+
|
@@ -2,29 +2,33 @@
|
|
2
2
|
|
3
3
|
require 'dreader'
|
4
4
|
|
5
|
-
|
5
|
+
class Processor
|
6
|
+
extend Dreader::Engine
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
end
|
8
|
+
options do
|
9
|
+
first_row 2
|
10
|
+
filename "cities_by_state.ods"
|
11
|
+
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
end
|
13
|
+
column :state do |col|
|
14
|
+
col.colref 'A'
|
15
|
+
end
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
column :cities do |col|
|
18
|
+
col.colref 'B'
|
19
|
+
col.check do |data|
|
20
|
+
data.class == Integer
|
21
|
+
end
|
20
22
|
end
|
21
|
-
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
24
|
+
mapping do |row|
|
25
|
+
hash = Dreader::Util.simplify row
|
26
|
+
puts " states: #{hash[:state]} number of cities: #{hash[:cities]}"
|
27
|
+
end
|
26
28
|
end
|
27
29
|
|
30
|
+
processor = Processor
|
31
|
+
|
28
32
|
printf "Loading the spreadsheet..."
|
29
33
|
processor.load
|
30
34
|
puts "done!"
|
@@ -41,8 +45,8 @@ else
|
|
41
45
|
end
|
42
46
|
puts "done!"
|
43
47
|
|
44
|
-
puts "
|
45
|
-
processor.
|
48
|
+
puts "Applying mapping rules to the spreadsheet..."
|
49
|
+
processor.mappings
|
46
50
|
puts "... done"
|
47
|
-
|
48
|
-
|
51
|
+
|
52
|
+
|
@@ -13,43 +13,46 @@ class City
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
class Importer
|
17
|
+
extend Dreader::Engine
|
18
|
+
|
19
|
+
# read from us_cities.tsv, lines from 2 to 10 (included)
|
20
|
+
options do
|
21
|
+
filename "us_cities.tsv"
|
22
|
+
first_row 2
|
23
|
+
last_row 10
|
24
|
+
end
|
24
25
|
|
25
|
-
# these are the columns for which we only need to specify column and name
|
26
|
-
[[:city, 2], [:state, 3], [:latlon, 11]].each do |val|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
26
|
+
# these are the columns for which we only need to specify column and name
|
27
|
+
[[:city, 2], [:state, 3], [:latlon, 11]].each do |val|
|
28
|
+
# val[0] -> :city
|
29
|
+
# val[1] -> 2
|
30
|
+
column val[0] do |col|
|
31
|
+
col.colref val[1]
|
32
|
+
col.process do |val|
|
33
|
+
val.strip
|
34
|
+
end
|
33
35
|
end
|
34
36
|
end
|
35
|
-
end
|
36
37
|
|
37
|
-
# the population column requires more work
|
38
|
-
|
39
|
-
|
38
|
+
# the population column requires more work
|
39
|
+
column :population do |col|
|
40
|
+
col.colref 4
|
40
41
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
# make "3,000" into 3000 (int)
|
43
|
+
col.process do |value|
|
44
|
+
value.gsub(",", "").to_i
|
45
|
+
end
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
47
|
+
col.check do |value|
|
48
|
+
value > 0
|
49
|
+
end
|
49
50
|
|
51
|
+
end
|
50
52
|
end
|
51
53
|
|
52
54
|
cities = []
|
55
|
+
importer = Importer
|
53
56
|
|
54
57
|
importer.mapping do |row|
|
55
58
|
# remove all additional information stored in each cell
|
@@ -80,7 +83,7 @@ importer.debug process: false, check: false
|
|
80
83
|
# load and process
|
81
84
|
importer.load
|
82
85
|
cities = []
|
83
|
-
importer.
|
86
|
+
importer.mappings
|
84
87
|
|
85
88
|
# output everything to see whether it works
|
86
89
|
puts "First ten cities in the US (source Wikipedia)"
|
@@ -13,36 +13,38 @@ class City
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
class Importer
|
17
|
+
extend Dreader::Engine
|
18
|
+
|
19
|
+
# read from us_cities.tsv, lines from 2 to 10 (included)
|
20
|
+
options do
|
21
|
+
filename "us_cities.tsv"
|
22
|
+
first_row 2
|
23
|
+
last_row 10
|
24
|
+
end
|
24
25
|
|
25
|
-
# these are the columns for which we only need to specify column and name
|
26
|
-
|
27
|
-
|
28
|
-
end
|
26
|
+
# these are the columns for which we only need to specify column and name
|
27
|
+
columns ({city: 2, state: 3, latlon: 11}) do
|
28
|
+
process { |val| val.strip }
|
29
|
+
end
|
29
30
|
|
30
|
-
# the population column requires more work
|
31
|
-
|
32
|
-
|
31
|
+
# the population column requires more work
|
32
|
+
column :population do |col|
|
33
|
+
col.colref 4
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
35
|
+
# make "3,000" into 3000 (int)
|
36
|
+
col.process do |value|
|
37
|
+
value.gsub(",", "").to_i
|
38
|
+
end
|
38
39
|
|
39
|
-
|
40
|
-
|
40
|
+
col.check do |value|
|
41
|
+
value > 0
|
42
|
+
end
|
41
43
|
end
|
42
|
-
|
43
44
|
end
|
44
45
|
|
45
46
|
cities = []
|
47
|
+
importer = Importer
|
46
48
|
|
47
49
|
importer.mapping do |row|
|
48
50
|
# remove all additional information stored in each cell
|
@@ -73,7 +75,7 @@ importer.debug process: false, check: false
|
|
73
75
|
# load and process
|
74
76
|
importer.load
|
75
77
|
cities = []
|
76
|
-
importer.
|
78
|
+
importer.mappings
|
77
79
|
|
78
80
|
# output everything to see whether it works
|
79
81
|
puts "First ten cities in the US (source Wikipedia)"
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Dreader
|
2
|
+
# service class to implement the column DSL language
|
3
|
+
class Column
|
4
|
+
def initialize
|
5
|
+
@checks_raw = {}
|
6
|
+
@checks = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def doc(doc)
|
10
|
+
@doc = doc
|
11
|
+
end
|
12
|
+
|
13
|
+
def colref(colref)
|
14
|
+
@colref = colref
|
15
|
+
end
|
16
|
+
|
17
|
+
def check_raw(message = :unnamed_check, &block)
|
18
|
+
@checks_raw[message] = block
|
19
|
+
end
|
20
|
+
|
21
|
+
def process(&block)
|
22
|
+
@process = block
|
23
|
+
end
|
24
|
+
|
25
|
+
def check(message = :unnamed_check, &block)
|
26
|
+
@checks[message] = block
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_hash
|
30
|
+
{
|
31
|
+
checks_raw: @checks_raw,
|
32
|
+
process: @process,
|
33
|
+
checks: @checks,
|
34
|
+
colref: @colref,
|
35
|
+
doc: @doc
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|