dreader 0.5.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/dreader.rb CHANGED
@@ -1,412 +1,6 @@
1
- require 'dreader/version'
2
- require 'roo'
1
+ require "dreader/column"
2
+ require "dreader/engine"
3
+ require "dreader/options"
4
+ require "dreader/util"
5
+ require "dreader/version"
3
6
 
4
- module Dreader
5
- # service class to implement the column DSL language
6
- class Column
7
- def colref colref
8
- @colref = colref
9
- end
10
-
11
- def process &block
12
- @process = block
13
- end
14
-
15
- def check &block
16
- @check = block
17
- end
18
-
19
- def to_hash
20
- {process: @process, check: @check, colref: @colref }
21
- end
22
- end
23
-
24
- # service class to implement the options DSL language
25
- class Options
26
- def initialize
27
- @attributes = {}
28
- end
29
-
30
- def method_missing(name, *args, &block)
31
- @attributes[name] = args[0]
32
- end
33
-
34
- def to_hash
35
- @attributes
36
- end
37
- end
38
-
39
- # Utilities function to simplify importing data into
40
- # ActiveRecords
41
- class Util
42
- # given a hash returned by Engine, return the same hash with
43
- # keys directly bound to the content of the :value sub-key
44
- #
45
- # Example
46
- #
47
- # hash = {name: {value: "A", ...}, surname: {value: "B", ...}}
48
- # simplify hash
49
- # {name: "A", surname: "B"}
50
- def self.simplify hash
51
- new_hash = {}
52
- hash.keys.map { |k| new_hash[k] = hash[k][:value] }
53
- new_hash
54
- end
55
-
56
- # given a hash returned by Engine, keep the "kept" keys in the top
57
- # of the hierarchy and move the "moved_key" below the
58
- # "subordinate_key"
59
- #
60
- # Example
61
- #
62
- # hash = {name: "A", surname: "B", address: "via XX Settembre", city: "Genoa"}
63
- # restructure hash, [:name, :surname], :address_attributes, [:address, :city]
64
- # {name: "A", surname: "B", address_attributes: {address: "via XX Settembre", city: "Genoa"}}
65
- #
66
- def self.restructure hash, kept, subordinate_key, moved_keys
67
- head = hash.slice kept
68
- subordinate = self.prepend subordinate_key, hash.slice(moved_keys)
69
- head.merge subordinate
70
- end
71
-
72
- # an alias for Hash.slice
73
- # keys is an array of keys
74
- def self.slice hash, keys
75
- hash.slice *keys
76
- end
77
-
78
- # remove all `keys` from `hash`
79
- def self.clean hash, keys
80
- hash.reject { |k, v| keys.include?(k) }
81
- end
82
-
83
- # given a hash, return a new hash with key and whose value is
84
- # the hash
85
- #
86
- # Example:
87
- #
88
- # hash = {name: "A", size: 10}
89
- # prepend hash, :product_attributes
90
- # {product_attributes: {name: "A", size: 10}}
91
- #
92
- def self.prepend hash, key
93
- {key => hash}
94
- end
95
- end
96
-
97
- #
98
- # This is where the real stuff begins
99
- #
100
- class Engine
101
- # readable for debugging purposes
102
- # the options we passed
103
- attr_reader :options
104
- # the specification of the columns to process
105
- attr_reader :colspec
106
- # the specification of the virtual columns
107
- attr_reader :virtualcols
108
- # the data we read
109
- attr_reader :table
110
-
111
- def initialize
112
- @options = {}
113
- @colspec = []
114
- @virtualcols = []
115
- end
116
-
117
- # define a DSL for options
118
- # any string is processed as an option and it ends up in the
119
- # @options hash
120
- def options &block
121
- options = Options.new
122
- options.instance_eval(&block)
123
-
124
- @options = options.to_hash
125
- end
126
-
127
- # define a DSL for column specification
128
- # - `name` is the name of the column
129
- # - `block` contains two declarations, `process` and `check`, which are
130
- # used, respectively, to make a cell into the desired data and to check
131
- # whether the desired data is ok
132
- def column name, &block
133
- column = Column.new
134
- column.instance_eval(&block)
135
-
136
- @colspec << column.to_hash.merge({name: name})
137
- end
138
-
139
- # bulk declare columns we intend to read
140
- #
141
- # - hash is a hash in the form { symbolic_name: colref }
142
- #
143
- # i.bulk_declare {name: 'B', age: 'C'} is equivalent to:
144
- #
145
- # i.column :name do
146
- # colref 'B'
147
- # end
148
- # i.column :age do
149
- # colref 'C'
150
- # end
151
- #
152
- # i.bulk_declare {name: 'B', age: 'C'} do
153
- # process do |cell|
154
- # cell.strip
155
- # end
156
- # end
157
- #
158
- # is equivalent to:
159
- #
160
- # i.column :name do
161
- # colref 'B'
162
- # process do |cell|
163
- # cell.strip
164
- # end
165
- # end
166
- # i.column :age do
167
- # colref 'C'
168
- # process do |cell|
169
- # cell.strip
170
- # end
171
- # end
172
- def bulk_declare hash, &block
173
- hash.keys.each do |key|
174
- column = Column.new
175
- column.colref hash[key]
176
- if block
177
- column.instance_eval(&block)
178
- end
179
- @colspec << column.to_hash.merge({name: key})
180
- end
181
- end
182
-
183
-
184
- # virtual columns define derived attributes
185
- # the code specified in the virtual column is executed after reading
186
- # a row and before applying the mapping function
187
- #
188
- # virtual colum declarations are executed in the order in which
189
- # they are defined
190
- def virtual_column name, &block
191
- column = Column.new
192
- column.instance_eval &block
193
-
194
- @virtualcols << column.to_hash.merge({name: name})
195
- end
196
-
197
- # define what we do with each line we read
198
- # - `block` is the code which takes as input a `row` and processes
199
- # `row` is a hash in which each spreadsheet cell is accessible under
200
- # the column names. Each cell has the following values:
201
- # :value, :error, :row_number, :col_number
202
- def mapping &block
203
- @mapping = block
204
- end
205
-
206
- # read a file and store it internally
207
- #
208
- # @param hash, a hash, possibly overriding any of the parameters
209
- # set in the initial options. This allows you, for
210
- # instance, to apply the same column specification to
211
- # different files and different sheets
212
- #
213
- # @return the data read from filename, in the form of an array of
214
- # hashes
215
- def read args = {}
216
- if args.class == Hash
217
- hash = @options.merge(args)
218
- else
219
- puts "dreader error at #{__callee__}: this function takes a Hash as input"
220
- exit
221
- end
222
-
223
- spreadsheet = Dreader::Engine.open_spreadsheet (hash[:filename])
224
- sheet = spreadsheet.sheet(hash[:sheet] || 0)
225
-
226
- @table = Array.new
227
- @errors = Array.new
228
-
229
- first_row = hash[:first_row] || 1
230
- last_row = hash[:last_row] || sheet.last_row
231
-
232
- (first_row..last_row).each do |row_number|
233
- r = Hash.new
234
- @colspec.each_with_index do |colspec, index|
235
- cell = sheet.cell(row_number, colspec[:colref])
236
-
237
- colname = colspec[:name]
238
-
239
- r[colname] = Hash.new
240
- r[colname][:row_number] = row_number
241
- r[colname][:col_number] = colspec[:colref]
242
-
243
- begin
244
- r[colname][:value] = value = colspec[:process] ? colspec[:process].call(cell) : cell
245
- rescue => e
246
- puts "dreader error at #{__callee__}: 'process' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
247
- raise e
248
- end
249
-
250
- begin
251
- if colspec[:check] and not colspec[:check].call(value) then
252
- r[colname][:error] = true
253
- @errors << "dreader error at #{__callee__}: value \"#{cell}\" for #{colname} at row #{row_number} (col #{index + 1}) does not pass the check function"
254
- else
255
- r[colname][:error] = false
256
- end
257
- rescue => e
258
- puts "dreader error at #{__callee__}: 'check' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
259
- raise e
260
- end
261
- end
262
-
263
- @table << r
264
- end
265
-
266
- @table
267
- end
268
-
269
- alias_method :load, :read
270
-
271
- # get (processed) row number
272
- #
273
- # - row_number is the row to get: index starts at 1.
274
- #
275
- # get_row(1) get the first line read, that is, the row specified
276
- # by `first_row` in `options` (or in read)
277
- #
278
- # You need to invoke read first
279
- def get_row row_number
280
- if row_number > @table.size
281
- puts "dreader error at #{__callee__}: 'row_number' is out of range (did you invoke read first?)"
282
- exit
283
- elsif row_number <= 0
284
- puts "dreader error at #{__callee__}: 'row_number' is zero or negative (first row is 1)."
285
- else
286
- @table[row_number - 1]
287
- end
288
- end
289
-
290
- # show to stdout the first `n` records we read from the file given the current
291
- # configuration
292
- def debug args = {}
293
- if args.class == Hash
294
- hash = @options.merge(args)
295
- else
296
- puts "dreader error at #{__callee__}: this function takes a Hash as input"
297
- exit
298
- end
299
-
300
- # apply some defaults, if not defined in the options
301
- hash[:process] = true if not hash.has_key? :process # shall we apply the process function?
302
- hash[:check] = true if not hash.has_key? :check # shall we check the data read?
303
- hash[:n] = 10 if not hash[:n]
304
-
305
- spreadsheet = Dreader::Engine.open_spreadsheet (hash[:filename])
306
- sheet = spreadsheet.sheet(hash[:sheet] || 0)
307
-
308
- puts "Current configuration:"
309
- @options.each do |k, v|
310
- puts " #{k}: #{v}"
311
- end
312
-
313
- puts "Configuration used by debug:"
314
- hash.each do |k, v|
315
- puts " #{k}: #{v}"
316
- end
317
-
318
- n = hash[:n]
319
- first_row = hash[:first_row] || 1
320
- last_row = first_row + n - 1
321
-
322
- puts " Last row (according to roo): #{sheet.last_row}"
323
- puts " Number of rows I will read in this session: #{n} (from #{first_row} to #{last_row})"
324
-
325
- (first_row..last_row).each do |row_number|
326
- puts "Row #{row_number} is:"
327
- r = Hash.new
328
- @colspec.each_with_index do |colspec, index|
329
- colname = colspec[:name]
330
- cell = sheet.cell(row_number, colspec[:colref])
331
-
332
- processed_str = ""
333
- checked_str = ""
334
-
335
- if hash[:process]
336
- begin
337
- processed = colspec[:process] ? colspec[:process].call(cell) : cell
338
- processed_str = "processed: '#{processed}' (#{processed.class})"
339
- rescue => e
340
- puts "dreader error at #{__callee__}: 'check' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
341
- raise e
342
- end
343
- end
344
- if hash[:check]
345
- begin
346
- processed = colspec[:process] ? colspec[:process].call(cell) : cell
347
- check = colspec[:check] ? colspec[:check].call(processed) : "no check specified"
348
- checked_str = "checked: '#{check}'"
349
- rescue => e
350
- puts "dreader error at #{__callee__}: 'check' specification for #{colname} at row #{row_number} raised an exception (col #{index + 1}, value: #{cell})"
351
- raise e
352
- end
353
- end
354
-
355
- puts " #{colname} => orig: '#{cell}' (#{cell.class}) #{processed_str} #{checked_str} (column: '#{colspec[:colref]}')"
356
- end
357
- end
358
- end
359
-
360
- # return an array of strings with all the errors we have encounterd
361
- # an empty array is a good news
362
- def errors
363
- @errors
364
- end
365
-
366
- def virtual_columns
367
- # execute the virtual column specification
368
- @table.each do |r|
369
- @virtualcols.each do |virtualcol|
370
- begin
371
- # add the cell to the table
372
- r[virtualcol[:name]] = {
373
- value: virtualcol[:process].call(r),
374
- virtual: true,
375
- }
376
- rescue => e
377
- puts "dreader error at #{__callee__}: 'process' specification for :#{virtualcol[:name]} raised an exception at row #{r[r.keys.first][:row_number]}"
378
- raise e
379
- end
380
- end
381
- end
382
- end
383
-
384
- # apply the mapping code to the array
385
- # it makes sense to invoke it only once
386
- #
387
- # the mapping is applied only if it defined
388
- def process
389
- @table.each do |r|
390
- @mapping.call(r) if @mapping
391
- end
392
- end
393
-
394
- def to_s
395
- @table.to_s
396
- end
397
-
398
- private
399
-
400
- def self.open_spreadsheet(filename)
401
- case File.extname(filename)
402
- when ".csv" then Roo::CSV.new(filename)
403
- when ".tsv" then Roo::CSV.new(filename, csv_options: {col_sep: "\t"})
404
- when ".ods" then Roo::OpenOffice.new(filename)
405
- when ".xls" then Roo::Excel.new(filename)
406
- when ".xlsx" then Roo::Excelx.new(filename)
407
- else raise "Unknown extension: #{File.extname(filename)}"
408
- end
409
- end
410
- end
411
-
412
- end
metadata CHANGED
@@ -1,57 +1,85 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adolfo Villafiorita
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-30 00:00:00.000000000 Z
11
+ date: 2023-10-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
14
+ name: roo
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '1.16'
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: fast_excel
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: debug
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 1.0.0
20
48
  type: :development
21
49
  prerelease: false
22
50
  version_requirements: !ruby/object:Gem::Requirement
23
51
  requirements:
24
- - - "~>"
52
+ - - ">="
25
53
  - !ruby/object:Gem::Version
26
- version: '1.16'
54
+ version: 1.0.0
27
55
  - !ruby/object:Gem::Dependency
28
- name: rake
56
+ name: bundler
29
57
  requirement: !ruby/object:Gem::Requirement
30
58
  requirements:
31
59
  - - "~>"
32
60
  - !ruby/object:Gem::Version
33
- version: '10.0'
61
+ version: '1.16'
34
62
  type: :development
35
63
  prerelease: false
36
64
  version_requirements: !ruby/object:Gem::Requirement
37
65
  requirements:
38
66
  - - "~>"
39
67
  - !ruby/object:Gem::Version
40
- version: '10.0'
68
+ version: '1.16'
41
69
  - !ruby/object:Gem::Dependency
42
- name: roo
70
+ name: rake
43
71
  requirement: !ruby/object:Gem::Requirement
44
72
  requirements:
45
- - - ">="
73
+ - - "~>"
46
74
  - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
75
+ version: '10.0'
76
+ type: :development
49
77
  prerelease: false
50
78
  version_requirements: !ruby/object:Gem::Requirement
51
79
  requirements:
52
- - - ">="
80
+ - - "~>"
53
81
  - !ruby/object:Gem::Version
54
- version: '0'
82
+ version: '10.0'
55
83
  description: |-
56
84
  Use this gem to specify the structure of some tabular data
57
85
  you want to process. The input data can be in CSV, LibreOffice, and Excel. Each row
@@ -63,35 +91,43 @@ description: |-
63
91
  The gem should be relatively easy to use, despite its name. (Dread
64
92
  stands for *d*ata *r*eader)
65
93
  email:
66
- - adolfo.villafiorita@ict4g.net
94
+ - adolfo@shair.tech
67
95
  executables: []
68
96
  extensions: []
69
97
  extra_rdoc_files: []
70
98
  files:
71
99
  - ".gitignore"
72
- - Changelog.org
100
+ - CHANGELOG.org
73
101
  - Gemfile
74
102
  - Gemfile.lock
75
103
  - LICENSE.txt
76
- - README.md
104
+ - README.org
77
105
  - Rakefile
78
106
  - bin/console
79
107
  - bin/setup
80
108
  - dreader.gemspec
81
109
  - examples/age/Birthdays.ods
82
110
  - examples/age/age.rb
111
+ - examples/age_with_multiple_checks/Birthdays.ods
112
+ - examples/age_with_multiple_checks/age_with_multiple_checks.rb
113
+ - examples/local_vars/local_vars.rb
114
+ - examples/template/template_generation.rb
83
115
  - examples/wikipedia_big_us_cities/big_us_cities.rb
84
116
  - examples/wikipedia_big_us_cities/cities_by_state.ods
85
117
  - examples/wikipedia_us_cities/us_cities.rb
86
118
  - examples/wikipedia_us_cities/us_cities.tsv
87
119
  - examples/wikipedia_us_cities/us_cities_bulk_declare.rb
88
120
  - lib/dreader.rb
121
+ - lib/dreader/column.rb
122
+ - lib/dreader/engine.rb
123
+ - lib/dreader/options.rb
124
+ - lib/dreader/util.rb
89
125
  - lib/dreader/version.rb
90
- homepage: https://ict4g.net/gitea/adolfo/dreader
126
+ homepage: https://redmine.shair.tech/projects/dreader
91
127
  licenses:
92
128
  - MIT
93
129
  metadata: {}
94
- post_install_message:
130
+ post_install_message:
95
131
  rdoc_options: []
96
132
  require_paths:
97
133
  - lib
@@ -106,8 +142,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
106
142
  - !ruby/object:Gem::Version
107
143
  version: '0'
108
144
  requirements: []
109
- rubygems_version: 3.0.3
110
- signing_key:
145
+ rubygems_version: 3.3.26
146
+ signing_key:
111
147
  specification_version: 4
112
148
  summary: Process and import data from cvs and spreadsheets
113
149
  test_files: []
data/Changelog.org DELETED
@@ -1,20 +0,0 @@
1
- * Version 0.4.2
2
- ** better error messages for process and check functions
3
- dreader now captures exceptions raised by process and check and
4
- prints and error message to stdout if an error is found.
5
- the exception is then propagated in the standard way.
6
- ** new method bulk_declare
7
- bulk_declare allow to easily declare columns which don't need a
8
- specific treatment
9
- ** read will now complains if the argument passed is not a hash
10
- ** virtualcols is now accessible (attr_reader)
11
- ** fixed a bug with slice
12
- * Version 0.4.1
13
- ** fixed an issue with ~read~: it always required a hash as input
14
- ** changed syntax of ~debug~, which now accepts a hash as argument
15
- This makes its syntax similar to ~read~.
16
- ** improved output of ~debug~
17
- By default ~debug~ now prints the output of ~process~ and ~check~.
18
- You can disable this feature by passing ~process: false~ and/or ~check:
19
- false~ to the ~debug~. Notice that ~check~ implies ~process~.
20
-