dreader 0.5.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/dreader.gemspec CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |spec|
7
7
  spec.name = "dreader"
8
8
  spec.version = Dreader::VERSION
9
9
  spec.authors = ["Adolfo Villafiorita"]
10
- spec.email = ["adolfo.villafiorita@ict4g.net"]
10
+ spec.email = ["adolfo@shair.tech"]
11
11
 
12
12
  spec.summary = %q{Process and import data from cvs and spreadsheets}
13
13
  spec.description = %q{Use this gem to specify the structure of some tabular data
@@ -19,7 +19,7 @@ Rails application, but the gem can used in any Ruby application.
19
19
 
20
20
  The gem should be relatively easy to use, despite its name. (Dread
21
21
  stands for *d*ata *r*eader)}
22
- spec.homepage = "https://ict4g.net/gitea/adolfo/dreader"
22
+ spec.homepage = "https://redmine.shair.tech/projects/dreader"
23
23
  spec.license = "MIT"
24
24
 
25
25
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
@@ -29,8 +29,10 @@ stands for *d*ata *r*eader)}
29
29
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
30
  spec.require_paths = ["lib"]
31
31
 
32
+ spec.add_runtime_dependency "roo"
33
+ spec.add_runtime_dependency "fast_excel"
34
+
35
+ spec.add_development_dependency "debug", ">= 1.0.0"
32
36
  spec.add_development_dependency "bundler", "~> 1.16"
33
37
  spec.add_development_dependency "rake", "~> 10.0"
34
-
35
- spec.add_runtime_dependency "roo"
36
38
  end
data/examples/age/age.rb CHANGED
@@ -1,39 +1,55 @@
1
- require 'dreader'
1
+ require "dreader"
2
2
 
3
- i = Dreader::Engine.new
3
+ class Reader
4
+ extend Dreader::Engine
4
5
 
5
- i.options do
6
- first_row 2
7
- end
6
+ options do
7
+ first_row 2
8
+ debug true
9
+ end
8
10
 
9
- i.column :name do
10
- colref 'A'
11
- end
11
+ column :name do
12
+ doc "A is the name string"
13
+ colref 'A'
14
+ end
12
15
 
13
- i.column :birthdate do
14
- colref 'B'
16
+ column :birthdate do
17
+ doc "Birthdate contains a full date (i.e., including the year)"
18
+ colref 'B'
15
19
 
16
- process do |c|
17
- Date.parse(c)
20
+ process do |c|
21
+ Date.parse(c)
22
+ end
18
23
  end
19
- end
20
24
 
21
- i.virtual_column :age do
22
- process do |row|
23
- birthdate = row[:birthdate][:value]
24
- birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
25
- today = Date.today
25
+ virtual_column :age do
26
+ process do |row|
27
+ birthdate = row[:birthdate][:value]
28
+ birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
29
+ today = Date.today
26
30
 
27
- [0, today.year - birthdate.year - (birthday < today ? 1 : 0)].max
31
+ [0, today.year - birthdate.year - (birthday < today ? 1 : 0)].max
32
+ end
28
33
  end
29
- end
30
34
 
31
- i.mapping do |row|
32
- r = Dreader::Util.simplify(row)
33
- puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
35
+ mapping do |row|
36
+ r = Dreader::Util.simplify(row)
37
+ puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
38
+ end
34
39
  end
35
40
 
41
+ i = Reader
36
42
  i.read filename: "Birthdays.ods"
37
43
  i.virtual_columns
38
- i.process
39
-
44
+ i.mappings
45
+
46
+ #
47
+ # Here we can do further processing on the data
48
+ #
49
+ File.open("ages.txt", "w") do |file|
50
+ i.table.each do |row|
51
+ unless row[:row_errors].any?
52
+ file.puts "#{row[:name][:value]} #{row[:age][:value]}"
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,64 @@
1
+ require 'dreader'
2
+
3
+ class Reader
4
+ extend Dreader::Engine
5
+
6
+ options { first_row 2; debug true }
7
+
8
+ #
9
+ # Here we intentionally fail on good data
10
+ #
11
+ column :name do
12
+ doc "A is the name string"
13
+
14
+ colref 'A'
15
+
16
+ check_raw :name_too_short do |cell|
17
+ cell&.size < 7
18
+ end
19
+
20
+ check_raw :name_too_long do |cell|
21
+ cell&.size > 10
22
+ end
23
+ end
24
+
25
+ column :birthdate do
26
+ doc "Birthdate contains a full date (i.e., including the year)"
27
+ colref 'B'
28
+
29
+ process do |c|
30
+ Date.parse(c)
31
+ end
32
+ end
33
+
34
+ virtual_column :age do
35
+ process do |row|
36
+ birthdate = row[:birthdate][:value]
37
+ birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
38
+ today = Date.today
39
+
40
+ [0, today.year - birthdate.year - (birthday < today ? 1 : 0)].max
41
+ end
42
+ end
43
+
44
+ mapping do |row|
45
+ errors_for_row = row[:row_errors]
46
+
47
+ if errors_for_row.any?
48
+ puts "ATTENTION: Record at row #{row[:row_number]} has the following errors:"
49
+ errors_for_row.each do |error|
50
+ puts " " + error[:message].to_s
51
+ end
52
+ else
53
+ r = Dreader::Util.simplify(row)
54
+ puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
55
+ end
56
+ end
57
+ end
58
+
59
+ i = Reader
60
+
61
+ i.read filename: "Birthdays.ods"
62
+ i.virtual_columns
63
+ i.mappings
64
+
@@ -0,0 +1,28 @@
1
+ #
2
+ # This demonstrates that variables are local
3
+ #
4
+
5
+ require "dreader"
6
+
7
+ class OneReader
8
+ extend Dreader::Engine
9
+
10
+ options do
11
+ first_row 2
12
+ debug true
13
+ end
14
+ end
15
+
16
+ class AnotherReader
17
+ extend Dreader::Engine
18
+
19
+ options do
20
+ filename "filename"
21
+ end
22
+ end
23
+
24
+ r1 = OneReader
25
+ r2 = AnotherReader
26
+
27
+ puts r1.declared_options
28
+ puts r2.declared_options
@@ -0,0 +1,37 @@
1
+ require 'dreader'
2
+
3
+ i = Dreader::Engine.new
4
+
5
+ i.options do
6
+ first_row 2
7
+ end
8
+
9
+ i.column :name do
10
+ doc "A is the name string"
11
+ colref 'A'
12
+ end
13
+
14
+ i.column :birthdate do
15
+ doc "Birthdate contains a full date (i.e., including the year)"
16
+ colref 'B'
17
+
18
+ process do |c|
19
+ Date.parse(c)
20
+ end
21
+ end
22
+
23
+ i.column :place_of_birth do
24
+ doc "This is a string, a city (the interesting part is that it is in column E"
25
+ colref 'E'
26
+ end
27
+
28
+ i.example({ name: "Galileo Galilei", birthdate: "15/02/1564", place_of_birth: "Pisa" })
29
+ i.example({ name: "Cyrus McCormick", birthdate: "15/02/1809" })
30
+ i.example({ name: "Charles Lewis Tiffany", birthdate: "15/02/1812" })
31
+ i.example({ name: "Ernest Shackleton", birthdate: "15/02/1874" })
32
+ i.example({ name: "Yelena Bonner", birthdate: "15/02/1923" })
33
+ i.example({ name: "Matt Groening", birthdate: "15/02/1954", place_of_birth: "USA" })
34
+
35
+ # only xlsx at the moment
36
+ i.generate_template template_filename: "birthdays.xlsx"
37
+
@@ -2,29 +2,33 @@
2
2
 
3
3
  require 'dreader'
4
4
 
5
- processor = Dreader::Engine.new
5
+ class Processor
6
+ extend Dreader::Engine
6
7
 
7
- processor.options do
8
- first_row 2
9
- filename "cities_by_state.ods"
10
- end
8
+ options do
9
+ first_row 2
10
+ filename "cities_by_state.ods"
11
+ end
11
12
 
12
- processor.column :state do |col|
13
- col.colref 'A'
14
- end
13
+ column :state do |col|
14
+ col.colref 'A'
15
+ end
15
16
 
16
- processor.column :cities do |col|
17
- col.colref 'B'
18
- col.check do |data|
19
- data.class == Integer
17
+ column :cities do |col|
18
+ col.colref 'B'
19
+ col.check do |data|
20
+ data.class == Integer
21
+ end
20
22
  end
21
- end
22
23
 
23
- processor.mapping do |row|
24
- hash = Dreader::Util.simplify row
25
- puts " states: #{hash[:state]} number of cities: #{hash[:cities]}"
24
+ mapping do |row|
25
+ hash = Dreader::Util.simplify row
26
+ puts " states: #{hash[:state]} number of cities: #{hash[:cities]}"
27
+ end
26
28
  end
27
29
 
30
+ processor = Processor
31
+
28
32
  printf "Loading the spreadsheet..."
29
33
  processor.load
30
34
  puts "done!"
@@ -41,8 +45,8 @@ else
41
45
  end
42
46
  puts "done!"
43
47
 
44
- puts "Processing the spreadsheet..."
45
- processor.process
48
+ puts "Applying mapping rules to the spreadsheet..."
49
+ processor.mappings
46
50
  puts "... done"
47
-
48
-
51
+
52
+
@@ -13,43 +13,46 @@ class City
13
13
  end
14
14
  end
15
15
 
16
- importer = Dreader::Engine.new
17
-
18
- # read from us_cities.tsv, lines from 2 to 10 (included)
19
- importer.options do
20
- filename "us_cities.tsv"
21
- first_row 2
22
- last_row 10
23
- end
16
+ class Importer
17
+ extend Dreader::Engine
18
+
19
+ # read from us_cities.tsv, lines from 2 to 10 (included)
20
+ options do
21
+ filename "us_cities.tsv"
22
+ first_row 2
23
+ last_row 10
24
+ end
24
25
 
25
- # these are the columns for which we only need to specify column and name
26
- [[:city, 2], [:state, 3], [:latlon, 11]].each do |val|
27
- # val[0] -> :city
28
- # val[1] -> 2
29
- importer.column val[0] do |col|
30
- col.colref val[1]
31
- col.process do |val|
32
- val.strip
26
+ # these are the columns for which we only need to specify column and name
27
+ [[:city, 2], [:state, 3], [:latlon, 11]].each do |val|
28
+ # val[0] -> :city
29
+ # val[1] -> 2
30
+ column val[0] do |col|
31
+ col.colref val[1]
32
+ col.process do |val|
33
+ val.strip
34
+ end
33
35
  end
34
36
  end
35
- end
36
37
 
37
- # the population column requires more work
38
- importer.column :population do |col|
39
- col.colref 4
38
+ # the population column requires more work
39
+ column :population do |col|
40
+ col.colref 4
40
41
 
41
- # make "3,000" into 3000 (int)
42
- col.process do |value|
43
- value.gsub(",", "").to_i
44
- end
42
+ # make "3,000" into 3000 (int)
43
+ col.process do |value|
44
+ value.gsub(",", "").to_i
45
+ end
45
46
 
46
- col.check do |value|
47
- value > 0
48
- end
47
+ col.check do |value|
48
+ value > 0
49
+ end
49
50
 
51
+ end
50
52
  end
51
53
 
52
54
  cities = []
55
+ importer = Importer
53
56
 
54
57
  importer.mapping do |row|
55
58
  # remove all additional information stored in each cell
@@ -80,7 +83,7 @@ importer.debug process: false, check: false
80
83
  # load and process
81
84
  importer.load
82
85
  cities = []
83
- importer.process
86
+ importer.mappings
84
87
 
85
88
  # output everything to see whether it works
86
89
  puts "First ten cities in the US (source Wikipedia)"
@@ -13,36 +13,38 @@ class City
13
13
  end
14
14
  end
15
15
 
16
- importer = Dreader::Engine.new
17
-
18
- # read from us_cities.tsv, lines from 2 to 10 (included)
19
- importer.options do
20
- filename "us_cities.tsv"
21
- first_row 2
22
- last_row 10
23
- end
16
+ class Importer
17
+ extend Dreader::Engine
18
+
19
+ # read from us_cities.tsv, lines from 2 to 10 (included)
20
+ options do
21
+ filename "us_cities.tsv"
22
+ first_row 2
23
+ last_row 10
24
+ end
24
25
 
25
- # these are the columns for which we only need to specify column and name
26
- importer.bulk_declare ({city: 2, state: 3, latlon: 11}) do
27
- process { |val| val.strip }
28
- end
26
+ # these are the columns for which we only need to specify column and name
27
+ columns ({city: 2, state: 3, latlon: 11}) do
28
+ process { |val| val.strip }
29
+ end
29
30
 
30
- # the population column requires more work
31
- importer.column :population do |col|
32
- col.colref 4
31
+ # the population column requires more work
32
+ column :population do |col|
33
+ col.colref 4
33
34
 
34
- # make "3,000" into 3000 (int)
35
- col.process do |value|
36
- value.gsub(",", "").to_i
37
- end
35
+ # make "3,000" into 3000 (int)
36
+ col.process do |value|
37
+ value.gsub(",", "").to_i
38
+ end
38
39
 
39
- col.check do |value|
40
- value > 0
40
+ col.check do |value|
41
+ value > 0
42
+ end
41
43
  end
42
-
43
44
  end
44
45
 
45
46
  cities = []
47
+ importer = Importer
46
48
 
47
49
  importer.mapping do |row|
48
50
  # remove all additional information stored in each cell
@@ -73,7 +75,7 @@ importer.debug process: false, check: false
73
75
  # load and process
74
76
  importer.load
75
77
  cities = []
76
- importer.process
78
+ importer.mappings
77
79
 
78
80
  # output everything to see whether it works
79
81
  puts "First ten cities in the US (source Wikipedia)"
@@ -0,0 +1,39 @@
1
+ module Dreader
2
+ # service class to implement the column DSL language
3
+ class Column
4
+ def initialize
5
+ @checks_raw = {}
6
+ @checks = {}
7
+ end
8
+
9
+ def doc(doc)
10
+ @doc = doc
11
+ end
12
+
13
+ def colref(colref)
14
+ @colref = colref
15
+ end
16
+
17
+ def check_raw(message = :unnamed_check, &block)
18
+ @checks_raw[message] = block
19
+ end
20
+
21
+ def process(&block)
22
+ @process = block
23
+ end
24
+
25
+ def check(message = :unnamed_check, &block)
26
+ @checks[message] = block
27
+ end
28
+
29
+ def to_hash
30
+ {
31
+ checks_raw: @checks_raw,
32
+ process: @process,
33
+ checks: @checks,
34
+ colref: @colref,
35
+ doc: @doc
36
+ }
37
+ end
38
+ end
39
+ end