dreader 0.5.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/dreader.gemspec CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |spec|
7
7
  spec.name = "dreader"
8
8
  spec.version = Dreader::VERSION
9
9
  spec.authors = ["Adolfo Villafiorita"]
10
- spec.email = ["adolfo.villafiorita@ict4g.net"]
10
+ spec.email = ["adolfo@shair.tech"]
11
11
 
12
12
  spec.summary = %q{Process and import data from cvs and spreadsheets}
13
13
  spec.description = %q{Use this gem to specify the structure of some tabular data
@@ -19,7 +19,7 @@ Rails application, but the gem can used in any Ruby application.
19
19
 
20
20
  The gem should be relatively easy to use, despite its name. (Dread
21
21
  stands for *d*ata *r*eader)}
22
- spec.homepage = "https://ict4g.net/gitea/adolfo/dreader"
22
+ spec.homepage = "https://redmine.shair.tech/projects/dreader"
23
23
  spec.license = "MIT"
24
24
 
25
25
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
@@ -29,8 +29,10 @@ stands for *d*ata *r*eader)}
29
29
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
30
  spec.require_paths = ["lib"]
31
31
 
32
+ spec.add_runtime_dependency "roo"
33
+ spec.add_runtime_dependency "fast_excel"
34
+
35
+ spec.add_development_dependency "debug", ">= 1.0.0"
32
36
  spec.add_development_dependency "bundler", "~> 1.16"
33
37
  spec.add_development_dependency "rake", "~> 10.0"
34
-
35
- spec.add_runtime_dependency "roo"
36
38
  end
data/examples/age/age.rb CHANGED
@@ -1,16 +1,19 @@
1
1
  require 'dreader'
2
2
 
3
- i = Dreader::Engine.new
3
+ class Reader < Dreader::Engine
4
4
 
5
- i.options do
5
+ options do
6
6
  first_row 2
7
+ debug true
7
8
  end
8
9
 
9
- i.column :name do
10
+ column :name do
11
+ doc "A is the name string"
10
12
  colref 'A'
11
13
  end
12
14
 
13
- i.column :birthdate do
15
+ column :birthdate do
16
+ doc "Birthdate contains a full date (i.e., including the year)"
14
17
  colref 'B'
15
18
 
16
19
  process do |c|
@@ -18,7 +21,7 @@ i.column :birthdate do
18
21
  end
19
22
  end
20
23
 
21
- i.virtual_column :age do
24
+ virtual_column :age do
22
25
  process do |row|
23
26
  birthdate = row[:birthdate][:value]
24
27
  birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
@@ -28,12 +31,25 @@ i.virtual_column :age do
28
31
  end
29
32
  end
30
33
 
31
- i.mapping do |row|
34
+ mapping do |row|
32
35
  r = Dreader::Util.simplify(row)
33
36
  puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
34
37
  end
38
+ end
39
+
40
+ i = Reader.new
35
41
 
36
42
  i.read filename: "Birthdays.ods"
37
43
  i.virtual_columns
38
44
  i.process
39
45
 
46
+ #
47
+ # Here we can do further processing on the data
48
+ #
49
+ File.open("ages.txt", "w") do |file|
50
+ i.table.each do |row|
51
+ unless row[:row_errors].any?
52
+ file.puts "#{row[:name][:value]} #{row[:age][:value]}"
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,62 @@
1
+ require 'dreader'
2
+
3
+ class Reader < Dreader::Engine
4
+ options { first_row 2; debug true }
5
+
6
+ #
7
+ # Here we intentionally fail on good data
8
+ #
9
+ column :name do
10
+ doc "A is the name string"
11
+
12
+ colref 'A'
13
+
14
+ check_raw :name_too_short do |cell|
15
+ cell&.size < 7
16
+ end
17
+
18
+ check_raw :name_too_long do |cell|
19
+ cell&.size > 10
20
+ end
21
+ end
22
+
23
+ column :birthdate do
24
+ doc "Birthdate contains a full date (i.e., including the year)"
25
+ colref 'B'
26
+
27
+ process do |c|
28
+ Date.parse(c)
29
+ end
30
+ end
31
+
32
+ virtual_column :age do
33
+ process do |row|
34
+ birthdate = row[:birthdate][:value]
35
+ birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
36
+ today = Date.today
37
+
38
+ [0, today.year - birthdate.year - (birthday < today ? 1 : 0)].max
39
+ end
40
+ end
41
+
42
+ mapping do |row|
43
+ errors_for_row = row[:row_errors]
44
+
45
+ if errors_for_row.any?
46
+ puts "ATTENTION: Record at row #{row[:row_number]} has the following errors:"
47
+ errors_for_row.each do |error|
48
+ puts " " + error[:message].to_s
49
+ end
50
+ else
51
+ r = Dreader::Util.simplify(row)
52
+ puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
53
+ end
54
+ end
55
+ end
56
+
57
+ i = Reader.new
58
+
59
+ i.read filename: "Birthdays.ods"
60
+ i.virtual_columns
61
+ i.process
62
+
@@ -0,0 +1,37 @@
1
+ require 'dreader'
2
+
3
+ i = Dreader::Engine.new
4
+
5
+ i.options do
6
+ first_row 2
7
+ end
8
+
9
+ i.column :name do
10
+ doc "A is the name string"
11
+ colref 'A'
12
+ end
13
+
14
+ i.column :birthdate do
15
+ doc "Birthdate contains a full date (i.e., including the year)"
16
+ colref 'B'
17
+
18
+ process do |c|
19
+ Date.parse(c)
20
+ end
21
+ end
22
+
23
+ i.column :place_of_birth do
24
+ doc "This is a string, a city (the interesting part is that it is in column E"
25
+ colref 'E'
26
+ end
27
+
28
+ i.example({ name: "Galileo Galilei", birthdate: "15/02/1564", place_of_birth: "Pisa" })
29
+ i.example({ name: "Cyrus McCormick", birthdate: "15/02/1809" })
30
+ i.example({ name: "Charles Lewis Tiffany", birthdate: "15/02/1812" })
31
+ i.example({ name: "Ernest Shackleton", birthdate: "15/02/1874" })
32
+ i.example({ name: "Yelena Bonner", birthdate: "15/02/1923" })
33
+ i.example({ name: "Matt Groening", birthdate: "15/02/1954", place_of_birth: "USA" })
34
+
35
+ # only xlsx at the moment
36
+ i.generate_template template_filename: "birthdays.xlsx"
37
+
@@ -2,28 +2,30 @@
2
2
 
3
3
  require 'dreader'
4
4
 
5
- processor = Dreader::Engine.new
5
+ class Processor < Dreader::Engine
6
+ options do
7
+ first_row 2
8
+ filename "cities_by_state.ods"
9
+ end
6
10
 
7
- processor.options do
8
- first_row 2
9
- filename "cities_by_state.ods"
10
- end
11
+ column :state do |col|
12
+ col.colref 'A'
13
+ end
11
14
 
12
- processor.column :state do |col|
13
- col.colref 'A'
14
- end
15
+ column :cities do |col|
16
+ col.colref 'B'
17
+ col.check do |data|
18
+ data.class == Integer
19
+ end
20
+ end
15
21
 
16
- processor.column :cities do |col|
17
- col.colref 'B'
18
- col.check do |data|
19
- data.class == Integer
22
+ mapping do |row|
23
+ hash = Dreader::Util.simplify row
24
+ puts " states: #{hash[:state]} number of cities: #{hash[:cities]}"
20
25
  end
21
26
  end
22
27
 
23
- processor.mapping do |row|
24
- hash = Dreader::Util.simplify row
25
- puts " states: #{hash[:state]} number of cities: #{hash[:cities]}"
26
- end
28
+ processor = Processor.new
27
29
 
28
30
  printf "Loading the spreadsheet..."
29
31
  processor.load
@@ -44,5 +46,5 @@ puts "done!"
44
46
  puts "Processing the spreadsheet..."
45
47
  processor.process
46
48
  puts "... done"
47
-
48
-
49
+
50
+
@@ -13,43 +13,44 @@ class City
13
13
  end
14
14
  end
15
15
 
16
- importer = Dreader::Engine.new
17
-
18
- # read from us_cities.tsv, lines from 2 to 10 (included)
19
- importer.options do
20
- filename "us_cities.tsv"
21
- first_row 2
22
- last_row 10
23
- end
16
+ class Importer < Dreader::Engine
17
+ # read from us_cities.tsv, lines from 2 to 10 (included)
18
+ options do
19
+ filename "us_cities.tsv"
20
+ first_row 2
21
+ last_row 10
22
+ end
24
23
 
25
- # these are the columns for which we only need to specify column and name
26
- [[:city, 2], [:state, 3], [:latlon, 11]].each do |val|
27
- # val[0] -> :city
28
- # val[1] -> 2
29
- importer.column val[0] do |col|
30
- col.colref val[1]
31
- col.process do |val|
32
- val.strip
24
+ # these are the columns for which we only need to specify column and name
25
+ [[:city, 2], [:state, 3], [:latlon, 11]].each do |val|
26
+ # val[0] -> :city
27
+ # val[1] -> 2
28
+ column val[0] do |col|
29
+ col.colref val[1]
30
+ col.process do |val|
31
+ val.strip
32
+ end
33
33
  end
34
34
  end
35
- end
36
35
 
37
- # the population column requires more work
38
- importer.column :population do |col|
39
- col.colref 4
36
+ # the population column requires more work
37
+ column :population do |col|
38
+ col.colref 4
40
39
 
41
- # make "3,000" into 3000 (int)
42
- col.process do |value|
43
- value.gsub(",", "").to_i
44
- end
40
+ # make "3,000" into 3000 (int)
41
+ col.process do |value|
42
+ value.gsub(",", "").to_i
43
+ end
45
44
 
46
- col.check do |value|
47
- value > 0
48
- end
45
+ col.check do |value|
46
+ value > 0
47
+ end
49
48
 
49
+ end
50
50
  end
51
51
 
52
52
  cities = []
53
+ importer = Importer.new
53
54
 
54
55
  importer.mapping do |row|
55
56
  # remove all additional information stored in each cell
@@ -13,36 +13,36 @@ class City
13
13
  end
14
14
  end
15
15
 
16
- importer = Dreader::Engine.new
17
-
18
- # read from us_cities.tsv, lines from 2 to 10 (included)
19
- importer.options do
20
- filename "us_cities.tsv"
21
- first_row 2
22
- last_row 10
23
- end
16
+ class Importer < Dreader::Engine
17
+ # read from us_cities.tsv, lines from 2 to 10 (included)
18
+ options do
19
+ filename "us_cities.tsv"
20
+ first_row 2
21
+ last_row 10
22
+ end
24
23
 
25
- # these are the columns for which we only need to specify column and name
26
- importer.bulk_declare ({city: 2, state: 3, latlon: 11}) do
27
- process { |val| val.strip }
28
- end
24
+ # these are the columns for which we only need to specify column and name
25
+ columns ({city: 2, state: 3, latlon: 11}) do
26
+ process { |val| val.strip }
27
+ end
29
28
 
30
- # the population column requires more work
31
- importer.column :population do |col|
32
- col.colref 4
29
+ # the population column requires more work
30
+ column :population do |col|
31
+ col.colref 4
33
32
 
34
- # make "3,000" into 3000 (int)
35
- col.process do |value|
36
- value.gsub(",", "").to_i
37
- end
33
+ # make "3,000" into 3000 (int)
34
+ col.process do |value|
35
+ value.gsub(",", "").to_i
36
+ end
38
37
 
39
- col.check do |value|
40
- value > 0
38
+ col.check do |value|
39
+ value > 0
40
+ end
41
41
  end
42
-
43
42
  end
44
43
 
45
44
  cities = []
45
+ importer = Importer.new
46
46
 
47
47
  importer.mapping do |row|
48
48
  # remove all additional information stored in each cell
@@ -0,0 +1,39 @@
1
+ module Dreader
2
+ # service class to implement the column DSL language
3
+ class Column
4
+ def initialize
5
+ @checks_raw = {}
6
+ @checks = {}
7
+ end
8
+
9
+ def doc(doc)
10
+ @doc = doc
11
+ end
12
+
13
+ def colref(colref)
14
+ @colref = colref
15
+ end
16
+
17
+ def check_raw(message = :unnamed_check, &block)
18
+ @checks_raw[message] = block
19
+ end
20
+
21
+ def process(&block)
22
+ @process = block
23
+ end
24
+
25
+ def check(message = :unnamed_check, &block)
26
+ @checks[message] = block
27
+ end
28
+
29
+ def to_hash
30
+ {
31
+ checks_raw: @checks_raw,
32
+ process: @process,
33
+ checks: @checks,
34
+ colref: @colref,
35
+ doc: @doc
36
+ }
37
+ end
38
+ end
39
+ end