dreader 0.5.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/dreader.rb CHANGED
@@ -1,412 +1,6 @@
1
- require 'dreader/version'
2
- require 'roo'
1
+ require "dreader/column"
2
+ require "dreader/engine"
3
+ require "dreader/options"
4
+ require "dreader/util"
5
+ require "dreader/version"
3
6
 
4
- module Dreader
5
- # service class to implement the column DSL language
6
- class Column
7
- def colref colref
8
- @colref = colref
9
- end
10
-
11
- def process &block
12
- @process = block
13
- end
14
-
15
- def check &block
16
- @check = block
17
- end
18
-
19
- def to_hash
20
- {process: @process, check: @check, colref: @colref }
21
- end
22
- end
23
-
24
- # service class to implement the options DSL language
25
- class Options
26
- def initialize
27
- @attributes = {}
28
- end
29
-
30
- def method_missing(name, *args, &block)
31
- @attributes[name] = args[0]
32
- end
33
-
34
- def to_hash
35
- @attributes
36
- end
37
- end
38
-
39
- # Utilities function to simplify importing data into
40
- # ActiveRecords
41
- class Util
42
- # given a hash returned by Engine, return the same hash with
43
- # keys directly bound to the content of the :value sub-key
44
- #
45
- # Example
46
- #
47
- # hash = {name: {value: "A", ...}, surname: {value: "B", ...}}
48
- # simplify hash
49
- # {name: "A", surname: "B"}
50
- def self.simplify hash
51
- new_hash = {}
52
- hash.keys.map { |k| new_hash[k] = hash[k][:value] }
53
- new_hash
54
- end
55
-
56
- # given a hash returned by Engine, keep the "kept" keys in the top
57
- # of the hierarchy and move the "moved_key" below the
58
- # "subordinate_key"
59
- #
60
- # Example
61
- #
62
- # hash = {name: "A", surname: "B", address: "via XX Settembre", city: "Genoa"}
63
- # restructure hash, [:name, :surname], :address_attributes, [:address, :city]
64
- # {name: "A", surname: "B", address_attributes: {address: "via XX Settembre", city: "Genoa"}}
65
- #
66
- def self.restructure hash, kept, subordinate_key, moved_keys
67
- head = hash.slice kept
68
- subordinate = self.prepend subordinate_key, hash.slice(moved_keys)
69
- head.merge subordinate
70
- end
71
-
72
- # an alias for Hash.slice
73
- # keys is an array of keys
74
- def self.slice hash, keys
75
- hash.slice *keys
76
- end
77
-
78
- # remove all `keys` from `hash`
79
- def self.clean hash, keys
80
- hash.reject { |k, v| keys.include?(k) }
81
- end
82
-
83
- # given a hash, return a new hash with key and whose value is
84
- # the hash
85
- #
86
- # Example:
87
- #
88
- # hash = {name: "A", size: 10}
89
- # prepend hash, :product_attributes
90
- # {product_attributes: {name: "A", size: 10}}
91
- #
92
- def self.prepend hash, key
93
- {key => hash}
94
- end
95
- end
96
-
97
- #
98
- # This is where the real stuff begins
99
- #
100
- class Engine
101
- # readable for debugging purposes
102
- # the options we passed
103
- attr_reader :options
104
- # the specification of the columns to process
105
- attr_reader :colspec
106
- # the specification of the virtual columns
107
- attr_reader :virtualcols
108
- # the data we read
109
- attr_reader :table
110
-
111
- def initialize
112
- @options = {}
113
- @colspec = []
114
- @virtualcols = []
115
- end
116
-
117
- # define a DSL for options
118
- # any string is processed as an option and it ends up in the
119
- # @options hash
120
- def options &block
121
- options = Options.new
122
- options.instance_eval(&block)
123
-
124
- @options = options.to_hash
125
- end
126
-
127
- # define a DSL for column specification
128
- # - `name` is the name of the column
129
- # - `block` contains two declarations, `process` and `check`, which are
130
- # used, respectively, to make a cell into the desired data and to check
131
- # whether the desired data is ok
132
- def column name, &block
133
- column = Column.new
134
- column.instance_eval(&block)
135
-
136
- @colspec << column.to_hash.merge({name: name})
137
- end
138
-
139
- # bulk declare columns we intend to read
140
- #
141
- # - hash is a hash in the form { symbolic_name: colref }
142
- #
143
- # i.bulk_declare {name: 'B', age: 'C'} is equivalent to:
144
- #
145
- # i.column :name do
146
- # colref 'B'
147
- # end
148
- # i.column :age do
149
- # colref 'C'
150
- # end
151
- #
152
- # i.bulk_declare {name: 'B', age: 'C'} do
153
- # process do |cell|
154
- # cell.strip
155
- # end
156
- # end
157
- #
158
- # is equivalent to:
159
- #
160
- # i.column :name do
161
- # colref 'B'
162
- # process do |cell|
163
- # cell.strip
164
- # end
165
- # end
166
- # i.column :age do
167
- # colref 'C'
168
- # process do |cell|
169
- # cell.strip
170
- # end
171
- # end
172
- def bulk_declare hash, &block
173
- hash.keys.each do |key|
174
- column = Column.new
175
- column.colref hash[key]
176
- if block
177
- column.instance_eval(&block)
178
- end
179
- @colspec << column.to_hash.merge({name: key})
180
- end
181
- end
182
-
183
-
184
- # virtual columns define derived attributes
185
- # the code specified in the virtual column is executed after reading
186
- # a row and before applying the mapping function
187
- #
188
- # virtual colum declarations are executed in the order in which
189
- # they are defined
190
- def virtual_column name, &block
191
- column = Column.new
192
- column.instance_eval &block
193
-
194
- @virtualcols << column.to_hash.merge({name: name})
195
- end
196
-
197
- # define what we do with each line we read
198
- # - `block` is the code which takes as input a `row` and processes
199
- # `row` is a hash in which each spreadsheet cell is accessible under
200
- # the column names. Each cell has the following values:
201
- # :value, :error, :row_number, :col_number
202
- def mapping &block
203
- @mapping = block
204
- end
205
-
206
- # read a file and store it internally
207
- #
208
- # @param hash, a hash, possibly overriding any of the parameters
209
- # set in the initial options. This allows you, for
210
- # instance, to apply the same column specification to
211
- # different files and different sheets
212
- #
213
- # @return the data read from filename, in the form of an array of
214
- # hashes
215
- def read args = {}
216
- if args.class == Hash
217
- hash = @options.merge(args)
218
- else
219
- puts "dreader error at #{__callee__}: this function takes a Hash as input"
220
- exit
221
- end
222
-
223
- spreadsheet = Dreader::Engine.open_spreadsheet (hash[:filename])
224
- sheet = spreadsheet.sheet(hash[:sheet] || 0)
225
-
226
- @table = Array.new
227
- @errors = Array.new
228
-
229
- first_row = hash[:first_row] || 1
230
- last_row = hash[:last_row] || sheet.last_row
231
-
232
- (first_row..last_row).each do |row_number|
233
- r = Hash.new
234
- @colspec.each_with_index do |colspec, index|
235
- cell = sheet.cell(row_number, colspec[:colref])
236
-
237
- colname = colspec[:name]
238
-
239
- r[colname] = Hash.new
240
- r[colname][:row_number] = row_number
241
- r[colname][:col_number] = colspec[:colref]
242
-
243
- begin
244
- r[colname][:value] = value = colspec[:process] ? colspec[:process].call(cell) : cell
245
- rescue => e
246
- puts "dreader error at #{__callee__}: 'process' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
247
- raise e
248
- end
249
-
250
- begin
251
- if colspec[:check] and not colspec[:check].call(value) then
252
- r[colname][:error] = true
253
- @errors << "dreader error at #{__callee__}: value \"#{cell}\" for #{colname} at row #{row_number} (col #{index + 1}) does not pass the check function"
254
- else
255
- r[colname][:error] = false
256
- end
257
- rescue => e
258
- puts "dreader error at #{__callee__}: 'check' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
259
- raise e
260
- end
261
- end
262
-
263
- @table << r
264
- end
265
-
266
- @table
267
- end
268
-
269
- alias_method :load, :read
270
-
271
- # get (processed) row number
272
- #
273
- # - row_number is the row to get: index starts at 1.
274
- #
275
- # get_row(1) get the first line read, that is, the row specified
276
- # by `first_row` in `options` (or in read)
277
- #
278
- # You need to invoke read first
279
- def get_row row_number
280
- if row_number > @table.size
281
- puts "dreader error at #{__callee__}: 'row_number' is out of range (did you invoke read first?)"
282
- exit
283
- elsif row_number <= 0
284
- puts "dreader error at #{__callee__}: 'row_number' is zero or negative (first row is 1)."
285
- else
286
- @table[row_number - 1]
287
- end
288
- end
289
-
290
- # show to stdout the first `n` records we read from the file given the current
291
- # configuration
292
- def debug args = {}
293
- if args.class == Hash
294
- hash = @options.merge(args)
295
- else
296
- puts "dreader error at #{__callee__}: this function takes a Hash as input"
297
- exit
298
- end
299
-
300
- # apply some defaults, if not defined in the options
301
- hash[:process] = true if not hash.has_key? :process # shall we apply the process function?
302
- hash[:check] = true if not hash.has_key? :check # shall we check the data read?
303
- hash[:n] = 10 if not hash[:n]
304
-
305
- spreadsheet = Dreader::Engine.open_spreadsheet (hash[:filename])
306
- sheet = spreadsheet.sheet(hash[:sheet] || 0)
307
-
308
- puts "Current configuration:"
309
- @options.each do |k, v|
310
- puts " #{k}: #{v}"
311
- end
312
-
313
- puts "Configuration used by debug:"
314
- hash.each do |k, v|
315
- puts " #{k}: #{v}"
316
- end
317
-
318
- n = hash[:n]
319
- first_row = hash[:first_row] || 1
320
- last_row = first_row + n - 1
321
-
322
- puts " Last row (according to roo): #{sheet.last_row}"
323
- puts " Number of rows I will read in this session: #{n} (from #{first_row} to #{last_row})"
324
-
325
- (first_row..last_row).each do |row_number|
326
- puts "Row #{row_number} is:"
327
- r = Hash.new
328
- @colspec.each_with_index do |colspec, index|
329
- colname = colspec[:name]
330
- cell = sheet.cell(row_number, colspec[:colref])
331
-
332
- processed_str = ""
333
- checked_str = ""
334
-
335
- if hash[:process]
336
- begin
337
- processed = colspec[:process] ? colspec[:process].call(cell) : cell
338
- processed_str = "processed: '#{processed}' (#{processed.class})"
339
- rescue => e
340
- puts "dreader error at #{__callee__}: 'check' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
341
- raise e
342
- end
343
- end
344
- if hash[:check]
345
- begin
346
- processed = colspec[:process] ? colspec[:process].call(cell) : cell
347
- check = colspec[:check] ? colspec[:check].call(processed) : "no check specified"
348
- checked_str = "checked: '#{check}'"
349
- rescue => e
350
- puts "dreader error at #{__callee__}: 'check' specification for #{colname} at row #{row_number} raised an exception (col #{index + 1}, value: #{cell})"
351
- raise e
352
- end
353
- end
354
-
355
- puts " #{colname} => orig: '#{cell}' (#{cell.class}) #{processed_str} #{checked_str} (column: '#{colspec[:colref]}')"
356
- end
357
- end
358
- end
359
-
360
- # return an array of strings with all the errors we have encounterd
361
- # an empty array is a good news
362
- def errors
363
- @errors
364
- end
365
-
366
- def virtual_columns
367
- # execute the virtual column specification
368
- @table.each do |r|
369
- @virtualcols.each do |virtualcol|
370
- begin
371
- # add the cell to the table
372
- r[virtualcol[:name]] = {
373
- value: virtualcol[:process].call(r),
374
- virtual: true,
375
- }
376
- rescue => e
377
- puts "dreader error at #{__callee__}: 'process' specification for :#{virtualcol[:name]} raised an exception at row #{r[r.keys.first][:row_number]}"
378
- raise e
379
- end
380
- end
381
- end
382
- end
383
-
384
- # apply the mapping code to the array
385
- # it makes sense to invoke it only once
386
- #
387
- # the mapping is applied only if it defined
388
- def process
389
- @table.each do |r|
390
- @mapping.call(r) if @mapping
391
- end
392
- end
393
-
394
- def to_s
395
- @table.to_s
396
- end
397
-
398
- private
399
-
400
- def self.open_spreadsheet(filename)
401
- case File.extname(filename)
402
- when ".csv" then Roo::CSV.new(filename)
403
- when ".tsv" then Roo::CSV.new(filename, csv_options: {col_sep: "\t"})
404
- when ".ods" then Roo::OpenOffice.new(filename)
405
- when ".xls" then Roo::Excel.new(filename)
406
- when ".xlsx" then Roo::Excelx.new(filename)
407
- else raise "Unknown extension: #{File.extname(filename)}"
408
- end
409
- end
410
- end
411
-
412
- end
metadata CHANGED
@@ -1,57 +1,85 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adolfo Villafiorita
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-30 00:00:00.000000000 Z
11
+ date: 2023-10-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
14
+ name: roo
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '1.16'
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: fast_excel
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: debug
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 1.0.0
20
48
  type: :development
21
49
  prerelease: false
22
50
  version_requirements: !ruby/object:Gem::Requirement
23
51
  requirements:
24
- - - "~>"
52
+ - - ">="
25
53
  - !ruby/object:Gem::Version
26
- version: '1.16'
54
+ version: 1.0.0
27
55
  - !ruby/object:Gem::Dependency
28
- name: rake
56
+ name: bundler
29
57
  requirement: !ruby/object:Gem::Requirement
30
58
  requirements:
31
59
  - - "~>"
32
60
  - !ruby/object:Gem::Version
33
- version: '10.0'
61
+ version: '1.16'
34
62
  type: :development
35
63
  prerelease: false
36
64
  version_requirements: !ruby/object:Gem::Requirement
37
65
  requirements:
38
66
  - - "~>"
39
67
  - !ruby/object:Gem::Version
40
- version: '10.0'
68
+ version: '1.16'
41
69
  - !ruby/object:Gem::Dependency
42
- name: roo
70
+ name: rake
43
71
  requirement: !ruby/object:Gem::Requirement
44
72
  requirements:
45
- - - ">="
73
+ - - "~>"
46
74
  - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
75
+ version: '10.0'
76
+ type: :development
49
77
  prerelease: false
50
78
  version_requirements: !ruby/object:Gem::Requirement
51
79
  requirements:
52
- - - ">="
80
+ - - "~>"
53
81
  - !ruby/object:Gem::Version
54
- version: '0'
82
+ version: '10.0'
55
83
  description: |-
56
84
  Use this gem to specify the structure of some tabular data
57
85
  you want to process. The input data can be in CSV, LibreOffice, and Excel. Each row
@@ -63,35 +91,43 @@ description: |-
63
91
  The gem should be relatively easy to use, despite its name. (Dread
64
92
  stands for *d*ata *r*eader)
65
93
  email:
66
- - adolfo.villafiorita@ict4g.net
94
+ - adolfo@shair.tech
67
95
  executables: []
68
96
  extensions: []
69
97
  extra_rdoc_files: []
70
98
  files:
71
99
  - ".gitignore"
72
- - Changelog.org
100
+ - CHANGELOG.org
73
101
  - Gemfile
74
102
  - Gemfile.lock
75
103
  - LICENSE.txt
76
- - README.md
104
+ - README.org
77
105
  - Rakefile
78
106
  - bin/console
79
107
  - bin/setup
80
108
  - dreader.gemspec
81
109
  - examples/age/Birthdays.ods
82
110
  - examples/age/age.rb
111
+ - examples/age_with_multiple_checks/Birthdays.ods
112
+ - examples/age_with_multiple_checks/age_with_multiple_checks.rb
113
+ - examples/local_vars/local_vars.rb
114
+ - examples/template/template_generation.rb
83
115
  - examples/wikipedia_big_us_cities/big_us_cities.rb
84
116
  - examples/wikipedia_big_us_cities/cities_by_state.ods
85
117
  - examples/wikipedia_us_cities/us_cities.rb
86
118
  - examples/wikipedia_us_cities/us_cities.tsv
87
119
  - examples/wikipedia_us_cities/us_cities_bulk_declare.rb
88
120
  - lib/dreader.rb
121
+ - lib/dreader/column.rb
122
+ - lib/dreader/engine.rb
123
+ - lib/dreader/options.rb
124
+ - lib/dreader/util.rb
89
125
  - lib/dreader/version.rb
90
- homepage: https://ict4g.net/gitea/adolfo/dreader
126
+ homepage: https://redmine.shair.tech/projects/dreader
91
127
  licenses:
92
128
  - MIT
93
129
  metadata: {}
94
- post_install_message:
130
+ post_install_message:
95
131
  rdoc_options: []
96
132
  require_paths:
97
133
  - lib
@@ -106,8 +142,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
106
142
  - !ruby/object:Gem::Version
107
143
  version: '0'
108
144
  requirements: []
109
- rubygems_version: 3.0.3
110
- signing_key:
145
+ rubygems_version: 3.3.26
146
+ signing_key:
111
147
  specification_version: 4
112
148
  summary: Process and import data from cvs and spreadsheets
113
149
  test_files: []
data/Changelog.org DELETED
@@ -1,20 +0,0 @@
1
- * Version 0.4.2
2
- ** better error messages for process and check functions
3
- dreader now captures exceptions raised by process and check and
4
- prints and error message to stdout if an error is found.
5
- the exception is then propagated in the standard way.
6
- ** new method bulk_declare
7
- bulk_declare allow to easily declare columns which don't need a
8
- specific treatment
9
- ** read will now complains if the argument passed is not a hash
10
- ** virtualcols is now accessible (attr_reader)
11
- ** fixed a bug with slice
12
- * Version 0.4.1
13
- ** fixed an issue with ~read~: it always required a hash as input
14
- ** changed syntax of ~debug~, which now accepts a hash as argument
15
- This makes its syntax similar to ~read~.
16
- ** improved output of ~debug~
17
- By default ~debug~ now prints the output of ~process~ and ~check~.
18
- You can disable this feature by passing ~process: false~ and/or ~check:
19
- false~ to the ~debug~. Notice that ~check~ implies ~process~.
20
-