dreader 0.4.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.ORG +45 -0
- data/Gemfile.lock +21 -8
- data/README.org +794 -0
- data/dreader.gemspec +6 -4
- data/examples/age/age.rb +22 -6
- data/examples/age_with_multiple_checks/Birthdays.ods +0 -0
- data/examples/age_with_multiple_checks/age_with_multiple_checks.rb +62 -0
- data/examples/template/template_generation.rb +37 -0
- data/examples/wikipedia_big_us_cities/big_us_cities.rb +20 -18
- data/examples/wikipedia_us_cities/us_cities.rb +28 -27
- data/examples/wikipedia_us_cities/us_cities_bulk_declare.rb +22 -22
- data/lib/dreader/column.rb +39 -0
- data/lib/dreader/engine.rb +473 -0
- data/lib/dreader/options.rb +16 -0
- data/lib/dreader/util.rb +71 -0
- data/lib/dreader/version.rb +1 -1
- data/lib/dreader.rb +5 -411
- metadata +59 -25
- data/Changelog.org +0 -20
- data/README.md +0 -469
data/dreader.gemspec
CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |spec|
|
|
7
7
|
spec.name = "dreader"
|
8
8
|
spec.version = Dreader::VERSION
|
9
9
|
spec.authors = ["Adolfo Villafiorita"]
|
10
|
-
spec.email = ["adolfo
|
10
|
+
spec.email = ["adolfo@shair.tech"]
|
11
11
|
|
12
12
|
spec.summary = %q{Process and import data from cvs and spreadsheets}
|
13
13
|
spec.description = %q{Use this gem to specify the structure of some tabular data
|
@@ -19,7 +19,7 @@ Rails application, but the gem can used in any Ruby application.
|
|
19
19
|
|
20
20
|
The gem should be relatively easy to use, despite its name. (Dread
|
21
21
|
stands for *d*ata *r*eader)}
|
22
|
-
spec.homepage = "
|
22
|
+
spec.homepage = "https://redmine.shair.tech/projects/dreader"
|
23
23
|
spec.license = "MIT"
|
24
24
|
|
25
25
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
@@ -29,8 +29,10 @@ stands for *d*ata *r*eader)}
|
|
29
29
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
30
30
|
spec.require_paths = ["lib"]
|
31
31
|
|
32
|
+
spec.add_runtime_dependency "roo"
|
33
|
+
spec.add_runtime_dependency "fast_excel"
|
34
|
+
|
35
|
+
spec.add_development_dependency "debug", ">= 1.0.0"
|
32
36
|
spec.add_development_dependency "bundler", "~> 1.16"
|
33
37
|
spec.add_development_dependency "rake", "~> 10.0"
|
34
|
-
|
35
|
-
spec.add_runtime_dependency "roo"
|
36
38
|
end
|
data/examples/age/age.rb
CHANGED
@@ -1,16 +1,19 @@
|
|
1
1
|
require 'dreader'
|
2
2
|
|
3
|
-
|
3
|
+
class Reader < Dreader::Engine
|
4
4
|
|
5
|
-
|
5
|
+
options do
|
6
6
|
first_row 2
|
7
|
+
debug true
|
7
8
|
end
|
8
9
|
|
9
|
-
|
10
|
+
column :name do
|
11
|
+
doc "A is the name string"
|
10
12
|
colref 'A'
|
11
13
|
end
|
12
14
|
|
13
|
-
|
15
|
+
column :birthdate do
|
16
|
+
doc "Birthdate contains a full date (i.e., including the year)"
|
14
17
|
colref 'B'
|
15
18
|
|
16
19
|
process do |c|
|
@@ -18,7 +21,7 @@ i.column :birthdate do
|
|
18
21
|
end
|
19
22
|
end
|
20
23
|
|
21
|
-
|
24
|
+
virtual_column :age do
|
22
25
|
process do |row|
|
23
26
|
birthdate = row[:birthdate][:value]
|
24
27
|
birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
|
@@ -28,12 +31,25 @@ i.virtual_column :age do
|
|
28
31
|
end
|
29
32
|
end
|
30
33
|
|
31
|
-
|
34
|
+
mapping do |row|
|
32
35
|
r = Dreader::Util.simplify(row)
|
33
36
|
puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
|
34
37
|
end
|
38
|
+
end
|
39
|
+
|
40
|
+
i = Reader.new
|
35
41
|
|
36
42
|
i.read filename: "Birthdays.ods"
|
37
43
|
i.virtual_columns
|
38
44
|
i.process
|
39
45
|
|
46
|
+
#
|
47
|
+
# Here we can do further processing on the data
|
48
|
+
#
|
49
|
+
File.open("ages.txt", "w") do |file|
|
50
|
+
i.table.each do |row|
|
51
|
+
unless row[:row_errors].any?
|
52
|
+
file.puts "#{row[:name][:value]} #{row[:age][:value]}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
Binary file
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'dreader'
|
2
|
+
|
3
|
+
class Reader < Dreader::Engine
|
4
|
+
options { first_row 2; debug true }
|
5
|
+
|
6
|
+
#
|
7
|
+
# Here we intentionally fail on good data
|
8
|
+
#
|
9
|
+
column :name do
|
10
|
+
doc "A is the name string"
|
11
|
+
|
12
|
+
colref 'A'
|
13
|
+
|
14
|
+
check_raw :name_too_short do |cell|
|
15
|
+
cell&.size < 7
|
16
|
+
end
|
17
|
+
|
18
|
+
check_raw :name_too_long do |cell|
|
19
|
+
cell&.size > 10
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
column :birthdate do
|
24
|
+
doc "Birthdate contains a full date (i.e., including the year)"
|
25
|
+
colref 'B'
|
26
|
+
|
27
|
+
process do |c|
|
28
|
+
Date.parse(c)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
virtual_column :age do
|
33
|
+
process do |row|
|
34
|
+
birthdate = row[:birthdate][:value]
|
35
|
+
birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
|
36
|
+
today = Date.today
|
37
|
+
|
38
|
+
[0, today.year - birthdate.year - (birthday < today ? 1 : 0)].max
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
mapping do |row|
|
43
|
+
errors_for_row = row[:row_errors]
|
44
|
+
|
45
|
+
if errors_for_row.any?
|
46
|
+
puts "ATTENTION: Record at row #{row[:row_number]} has the following errors:"
|
47
|
+
errors_for_row.each do |error|
|
48
|
+
puts " " + error[:message].to_s
|
49
|
+
end
|
50
|
+
else
|
51
|
+
r = Dreader::Util.simplify(row)
|
52
|
+
puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
i = Reader.new
|
58
|
+
|
59
|
+
i.read filename: "Birthdays.ods"
|
60
|
+
i.virtual_columns
|
61
|
+
i.process
|
62
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'dreader'
|
2
|
+
|
3
|
+
i = Dreader::Engine.new
|
4
|
+
|
5
|
+
i.options do
|
6
|
+
first_row 2
|
7
|
+
end
|
8
|
+
|
9
|
+
i.column :name do
|
10
|
+
doc "A is the name string"
|
11
|
+
colref 'A'
|
12
|
+
end
|
13
|
+
|
14
|
+
i.column :birthdate do
|
15
|
+
doc "Birthdate contains a full date (i.e., including the year)"
|
16
|
+
colref 'B'
|
17
|
+
|
18
|
+
process do |c|
|
19
|
+
Date.parse(c)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
i.column :place_of_birth do
|
24
|
+
doc "This is a string, a city (the interesting part is that it is in column E"
|
25
|
+
colref 'E'
|
26
|
+
end
|
27
|
+
|
28
|
+
i.example({ name: "Galileo Galilei", birthdate: "15/02/1564", place_of_birth: "Pisa" })
|
29
|
+
i.example({ name: "Cyrus McCormick", birthdate: "15/02/1809" })
|
30
|
+
i.example({ name: "Charles Lewis Tiffany", birthdate: "15/02/1812" })
|
31
|
+
i.example({ name: "Ernest Shackleton", birthdate: "15/02/1874" })
|
32
|
+
i.example({ name: "Yelena Bonner", birthdate: "15/02/1923" })
|
33
|
+
i.example({ name: "Matt Groening", birthdate: "15/02/1954", place_of_birth: "USA" })
|
34
|
+
|
35
|
+
# only xlsx at the moment
|
36
|
+
i.generate_template template_filename: "birthdays.xlsx"
|
37
|
+
|
@@ -2,28 +2,30 @@
|
|
2
2
|
|
3
3
|
require 'dreader'
|
4
4
|
|
5
|
-
|
5
|
+
class Processor < Dreader::Engine
|
6
|
+
options do
|
7
|
+
first_row 2
|
8
|
+
filename "cities_by_state.ods"
|
9
|
+
end
|
6
10
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
end
|
11
|
+
column :state do |col|
|
12
|
+
col.colref 'A'
|
13
|
+
end
|
11
14
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
+
column :cities do |col|
|
16
|
+
col.colref 'B'
|
17
|
+
col.check do |data|
|
18
|
+
data.class == Integer
|
19
|
+
end
|
20
|
+
end
|
15
21
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
data.class == Integer
|
22
|
+
mapping do |row|
|
23
|
+
hash = Dreader::Util.simplify row
|
24
|
+
puts " states: #{hash[:state]} number of cities: #{hash[:cities]}"
|
20
25
|
end
|
21
26
|
end
|
22
27
|
|
23
|
-
processor
|
24
|
-
hash = Dreader::Util.simplify row
|
25
|
-
puts " states: #{hash[:state]} number of cities: #{hash[:cities]}"
|
26
|
-
end
|
28
|
+
processor = Processor.new
|
27
29
|
|
28
30
|
printf "Loading the spreadsheet..."
|
29
31
|
processor.load
|
@@ -44,5 +46,5 @@ puts "done!"
|
|
44
46
|
puts "Processing the spreadsheet..."
|
45
47
|
processor.process
|
46
48
|
puts "... done"
|
47
|
-
|
48
|
-
|
49
|
+
|
50
|
+
|
@@ -13,43 +13,44 @@ class City
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
16
|
+
class Importer < Dreader::Engine
|
17
|
+
# read from us_cities.tsv, lines from 2 to 10 (included)
|
18
|
+
options do
|
19
|
+
filename "us_cities.tsv"
|
20
|
+
first_row 2
|
21
|
+
last_row 10
|
22
|
+
end
|
24
23
|
|
25
|
-
# these are the columns for which we only need to specify column and name
|
26
|
-
[[:city, 2], [:state, 3], [:latlon, 11]].each do |val|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
24
|
+
# these are the columns for which we only need to specify column and name
|
25
|
+
[[:city, 2], [:state, 3], [:latlon, 11]].each do |val|
|
26
|
+
# val[0] -> :city
|
27
|
+
# val[1] -> 2
|
28
|
+
column val[0] do |col|
|
29
|
+
col.colref val[1]
|
30
|
+
col.process do |val|
|
31
|
+
val.strip
|
32
|
+
end
|
33
33
|
end
|
34
34
|
end
|
35
|
-
end
|
36
35
|
|
37
|
-
# the population column requires more work
|
38
|
-
|
39
|
-
|
36
|
+
# the population column requires more work
|
37
|
+
column :population do |col|
|
38
|
+
col.colref 4
|
40
39
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
40
|
+
# make "3,000" into 3000 (int)
|
41
|
+
col.process do |value|
|
42
|
+
value.gsub(",", "").to_i
|
43
|
+
end
|
45
44
|
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
col.check do |value|
|
46
|
+
value > 0
|
47
|
+
end
|
49
48
|
|
49
|
+
end
|
50
50
|
end
|
51
51
|
|
52
52
|
cities = []
|
53
|
+
importer = Importer.new
|
53
54
|
|
54
55
|
importer.mapping do |row|
|
55
56
|
# remove all additional information stored in each cell
|
@@ -13,36 +13,36 @@ class City
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
16
|
+
class Importer < Dreader::Engine
|
17
|
+
# read from us_cities.tsv, lines from 2 to 10 (included)
|
18
|
+
options do
|
19
|
+
filename "us_cities.tsv"
|
20
|
+
first_row 2
|
21
|
+
last_row 10
|
22
|
+
end
|
24
23
|
|
25
|
-
# these are the columns for which we only need to specify column and name
|
26
|
-
|
27
|
-
|
28
|
-
end
|
24
|
+
# these are the columns for which we only need to specify column and name
|
25
|
+
columns ({city: 2, state: 3, latlon: 11}) do
|
26
|
+
process { |val| val.strip }
|
27
|
+
end
|
29
28
|
|
30
|
-
# the population column requires more work
|
31
|
-
|
32
|
-
|
29
|
+
# the population column requires more work
|
30
|
+
column :population do |col|
|
31
|
+
col.colref 4
|
33
32
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
33
|
+
# make "3,000" into 3000 (int)
|
34
|
+
col.process do |value|
|
35
|
+
value.gsub(",", "").to_i
|
36
|
+
end
|
38
37
|
|
39
|
-
|
40
|
-
|
38
|
+
col.check do |value|
|
39
|
+
value > 0
|
40
|
+
end
|
41
41
|
end
|
42
|
-
|
43
42
|
end
|
44
43
|
|
45
44
|
cities = []
|
45
|
+
importer = Importer.new
|
46
46
|
|
47
47
|
importer.mapping do |row|
|
48
48
|
# remove all additional information stored in each cell
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Dreader
|
2
|
+
# service class to implement the column DSL language
|
3
|
+
class Column
|
4
|
+
def initialize
|
5
|
+
@checks_raw = {}
|
6
|
+
@checks = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def doc(doc)
|
10
|
+
@doc = doc
|
11
|
+
end
|
12
|
+
|
13
|
+
def colref(colref)
|
14
|
+
@colref = colref
|
15
|
+
end
|
16
|
+
|
17
|
+
def check_raw(message = :unnamed_check, &block)
|
18
|
+
@checks_raw[message] = block
|
19
|
+
end
|
20
|
+
|
21
|
+
def process(&block)
|
22
|
+
@process = block
|
23
|
+
end
|
24
|
+
|
25
|
+
def check(message = :unnamed_check, &block)
|
26
|
+
@checks[message] = block
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_hash
|
30
|
+
{
|
31
|
+
checks_raw: @checks_raw,
|
32
|
+
process: @process,
|
33
|
+
checks: @checks,
|
34
|
+
colref: @colref,
|
35
|
+
doc: @doc
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|