dreader 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{CHANGELOG.ORG → CHANGELOG.org} +52 -0
- data/Gemfile.lock +3 -3
- data/README.org +119 -76
- data/examples/age/age.rb +30 -30
- data/examples/age_with_multiple_checks/age_with_multiple_checks.rb +5 -3
- data/examples/local_vars/local_vars.rb +28 -0
- data/examples/wikipedia_big_us_cities/big_us_cities.rb +6 -4
- data/examples/wikipedia_us_cities/us_cities.rb +5 -3
- data/examples/wikipedia_us_cities/us_cities_bulk_declare.rb +5 -3
- data/lib/dreader/engine.rb +157 -134
- data/lib/dreader/util.rb +25 -10
- data/lib/dreader/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 524e55af5bb94cae3f407a1602069549783e935798a638361f3c98e922ffc54d
|
|
4
|
+
data.tar.gz: 2599e048324ccd233e3fa4a0261e134ced0a3347d27af1e978e635639b6284a8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e8a78531d96ef35f9272a38daa5327620026dd66c98529710901d4c5994b9e5369308612cc79d1e0998d46dbb9dd6d26c448ddc4265dd536f1e61a4fc50fb885
|
|
7
|
+
data.tar.gz: de71caed5d3df79d0d456b080a72a0edc035ef4e46906aac3f94295b07d57b956554a9d9e07b4b6195c5c09df0badfa9202122d7b6e0568cf89569b6a2277d28
|
|
@@ -1,5 +1,57 @@
|
|
|
1
1
|
#+TITLE: Changelog
|
|
2
2
|
|
|
3
|
+
* Version 1.1.1 - <2023-10-16 Mon>
|
|
4
|
+
** Adds option :extension
|
|
5
|
+
|
|
6
|
+
- Adds options =extension= to the class options and to the =open_spreadsheet=
|
|
7
|
+
function, to be able to determine the type of a file with no extension
|
|
8
|
+
|
|
9
|
+
* Version 1.1.0
|
|
10
|
+
** Fixes an issue with visibility of variables
|
|
11
|
+
|
|
12
|
+
Version 1.1.0 makes Engine a module and requires to use extend
|
|
13
|
+
|
|
14
|
+
This allows to isolate declarations in different variables.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
** Renames process to mappings
|
|
18
|
+
|
|
19
|
+
** Renames the variables in a more consistent way
|
|
20
|
+
|
|
21
|
+
#+begin_example ruby
|
|
22
|
+
attr_accessor :declared_options
|
|
23
|
+
# the specification of the columns to process
|
|
24
|
+
attr_accessor :declared_columns
|
|
25
|
+
# some example lines
|
|
26
|
+
attr_accessor :declared_examples
|
|
27
|
+
# the specification of the virtual columns
|
|
28
|
+
attr_accessor :declared_virtual_columns
|
|
29
|
+
# the mapping rules
|
|
30
|
+
attr_accessor :declared_mapping
|
|
31
|
+
#+end_example
|
|
32
|
+
|
|
33
|
+
** Declares =data= as a synonym of =table=
|
|
34
|
+
** Adds options to do everything in one pass
|
|
35
|
+
|
|
36
|
+
By passing the options
|
|
37
|
+
|
|
38
|
+
- =virtual=
|
|
39
|
+
- =mapping=
|
|
40
|
+
|
|
41
|
+
to =read= you can read and process the data in one step.
|
|
42
|
+
|
|
43
|
+
See the README for more details.
|
|
44
|
+
|
|
45
|
+
** Revises the logging messages
|
|
46
|
+
** Refactor some code to make it more readable
|
|
47
|
+
** Refactors the restructure function to make it more flexible
|
|
48
|
+
|
|
49
|
+
Now refactor takes as input symbols and hashes and reshapes
|
|
50
|
+
according to the specification.
|
|
51
|
+
|
|
52
|
+
See the README for an example.
|
|
53
|
+
|
|
54
|
+
|
|
3
55
|
* Version 1.0.0
|
|
4
56
|
** Changes the DSL to allow declaration in a class
|
|
5
57
|
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
dreader (
|
|
4
|
+
dreader (1.2.0)
|
|
5
5
|
fast_excel
|
|
6
6
|
roo
|
|
7
7
|
|
|
@@ -13,11 +13,11 @@ GEM
|
|
|
13
13
|
reline (>= 0.3.1)
|
|
14
14
|
fast_excel (0.4.1)
|
|
15
15
|
ffi (> 1.9, < 2)
|
|
16
|
-
ffi (1.
|
|
16
|
+
ffi (1.16.2)
|
|
17
17
|
io-console (0.6.0)
|
|
18
18
|
irb (1.7.2)
|
|
19
19
|
reline (>= 0.3.6)
|
|
20
|
-
nokogiri (1.15.
|
|
20
|
+
nokogiri (1.15.4-x86_64-linux)
|
|
21
21
|
racc (~> 1.4)
|
|
22
22
|
racc (1.7.1)
|
|
23
23
|
rake (10.5.0)
|
data/README.org
CHANGED
|
@@ -63,7 +63,9 @@ Print name and age of people from the following data:
|
|
|
63
63
|
#+BEGIN_EXAMPLE ruby
|
|
64
64
|
require 'dreader'
|
|
65
65
|
|
|
66
|
-
class Reader
|
|
66
|
+
class Reader
|
|
67
|
+
extend Dreader::Engine
|
|
68
|
+
|
|
67
69
|
options do
|
|
68
70
|
# we start reading from row 2
|
|
69
71
|
first_row 2
|
|
@@ -104,14 +106,14 @@ Print name and age of people from the following data:
|
|
|
104
106
|
end
|
|
105
107
|
end
|
|
106
108
|
|
|
107
|
-
reader = Reader
|
|
109
|
+
reader = Reader
|
|
108
110
|
|
|
109
111
|
# read the file
|
|
110
112
|
reader.read filename: "Birthdays.ods"
|
|
111
113
|
# compute the virtual columns
|
|
112
114
|
reader.virtual_columns
|
|
113
115
|
# run the mapping declaration
|
|
114
|
-
reader.
|
|
116
|
+
reader.mappings
|
|
115
117
|
|
|
116
118
|
#
|
|
117
119
|
# Here we can do further processing on the data
|
|
@@ -130,33 +132,36 @@ Print name and age of people from the following data:
|
|
|
130
132
|
To write an import function with Dreader:
|
|
131
133
|
|
|
132
134
|
- Declare which is the input file and where we can find data (Sheet
|
|
133
|
-
and first row)
|
|
134
|
-
- Declare the content of columns and how to check raw data, parse data,
|
|
135
|
+
and first row) (This can also be specified in each call.)
|
|
136
|
+
- Declare the content of columns and, then, how to check raw data, parse data,
|
|
135
137
|
and check parsed data
|
|
136
138
|
- Add virtual columns, that is, columns computed from other values
|
|
137
139
|
in the row
|
|
138
|
-
- Specify how to
|
|
140
|
+
- Specify how to map line. This is where you do the actual work
|
|
139
141
|
(for instance, if you process a file line by line) or put together data for
|
|
140
142
|
processing after the file has been fully read --- see the next step.
|
|
141
143
|
|
|
142
|
-
Dreader
|
|
143
|
-
|
|
144
|
-
|
|
144
|
+
Dreader now knows ho to collect, shape, and tranform (map) data according to
|
|
145
|
+
your instructions. We are now ready to do the actual work. This consists of
|
|
146
|
+
the following steps, various of which can be performed together:
|
|
145
147
|
|
|
146
|
-
-
|
|
148
|
+
- Read the file
|
|
149
|
+
- Do the parsing/transformations
|
|
150
|
+
- Compute the virtual columns
|
|
151
|
+
- Do the mappings
|
|
147
152
|
|
|
148
153
|
Each step is described in more details in the following sections.
|
|
149
154
|
|
|
150
155
|
*** Declare which is the input file and where we can find data
|
|
151
156
|
|
|
152
|
-
Require =dreader= and declare a class which
|
|
153
|
-
|
|
157
|
+
Require =dreader= and declare a class which extends =Dreader::Engine=:
|
|
154
158
|
|
|
155
159
|
#+BEGIN_EXAMPLE ruby
|
|
156
160
|
require 'dreader'
|
|
157
161
|
|
|
158
|
-
class Reader
|
|
159
|
-
|
|
162
|
+
class Reader
|
|
163
|
+
extend Dreader::Engine
|
|
164
|
+
[...]
|
|
160
165
|
end
|
|
161
166
|
#+END_EXAMPLE
|
|
162
167
|
|
|
@@ -165,6 +170,7 @@ In the class specify parsing option, using the following syntax:
|
|
|
165
170
|
#+BEGIN_EXAMPLE ruby
|
|
166
171
|
options do
|
|
167
172
|
filename 'example.ods'
|
|
173
|
+
extension ".ods"
|
|
168
174
|
|
|
169
175
|
sheet 'Sheet 1'
|
|
170
176
|
|
|
@@ -180,10 +186,17 @@ In the class specify parsing option, using the following syntax:
|
|
|
180
186
|
|
|
181
187
|
where:
|
|
182
188
|
|
|
183
|
-
- (optional) =filename= is the file to read. If not specified, you will
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
189
|
+
- (optional) =filename= is the file to read. If not specified, you will have
|
|
190
|
+
to supply a filename when loading the file (see =read=, below). *Use
|
|
191
|
+
=.tsv= for tab-separated files.*
|
|
192
|
+
- (optional) =extension= overrides or specify the extension of =filename=.
|
|
193
|
+
Takes as input the extension preceded by a "." (e.g., ".xlsx"). Notice that
|
|
194
|
+
**value of this option is not appended to filename** (see =read= below).
|
|
195
|
+
Filename must thus be a valid reference to a file in the file system. This
|
|
196
|
+
option is useful in one of these two circumstances:
|
|
197
|
+
1. When =filename= has no extension
|
|
198
|
+
2. When you want to override the extension of the filename, e.g., to force
|
|
199
|
+
reading a "file.csv" as a tab separated file
|
|
187
200
|
- (optional) =first_row= is the first line to read (use =2= if your file
|
|
188
201
|
has a header)
|
|
189
202
|
- (optional) =last_row= is the last line to read. If not specified, we
|
|
@@ -192,19 +205,20 @@ where:
|
|
|
192
205
|
contain "garbage" after the records.
|
|
193
206
|
- (optional) =sheet= is the sheet name or number to read from. If not
|
|
194
207
|
specified, the first (default) sheet is used
|
|
208
|
+
- (optional) =debug= specifies that we are debugging
|
|
209
|
+
- (optional) =logger= specifies the logger
|
|
210
|
+
- (optional) =logger_level= specifies the logger level
|
|
195
211
|
|
|
196
|
-
#+BEGIN_NOTES
|
|
197
212
|
You can override some of the defaults by passing a hash as argument to
|
|
198
213
|
the =read= function. For instance:
|
|
199
214
|
|
|
200
215
|
#+BEGIN_EXAMPLE ruby
|
|
201
|
-
|
|
216
|
+
Reader.read filename: another_filepath
|
|
202
217
|
#+END_EXAMPLE
|
|
203
218
|
|
|
204
219
|
will read data from =another_filepath=, rather than from the filename
|
|
205
220
|
specified in the options. This might be useful, for instance, if the
|
|
206
221
|
same specification has to be used for different files.
|
|
207
|
-
#+END_NOTES
|
|
208
222
|
|
|
209
223
|
|
|
210
224
|
*** Declare the content of columns and how to parse them
|
|
@@ -216,12 +230,12 @@ There are two notations:
|
|
|
216
230
|
|
|
217
231
|
#+BEGIN_EXAMPLE ruby
|
|
218
232
|
# First notation, colref is put in the block
|
|
219
|
-
|
|
233
|
+
column :name do
|
|
220
234
|
colref 'A'
|
|
221
235
|
end
|
|
222
236
|
|
|
223
237
|
# Second notation, a hash is passed in the name
|
|
224
|
-
|
|
238
|
+
column({ name: 'A' }) do
|
|
225
239
|
end
|
|
226
240
|
#+END_EXAMPLE
|
|
227
241
|
|
|
@@ -242,7 +256,7 @@ The =column= declaration can contain Ruby blocks:
|
|
|
242
256
|
=process= is valid. *Check must return true if there are no errors.*
|
|
243
257
|
|
|
244
258
|
#+begin_example
|
|
245
|
-
|
|
259
|
+
column({ name: 'A' }) do
|
|
246
260
|
check_raw do |cell|
|
|
247
261
|
!cell.nil?
|
|
248
262
|
end
|
|
@@ -256,7 +270,7 @@ The =column= declaration can contain Ruby blocks:
|
|
|
256
270
|
#+end_quote
|
|
257
271
|
|
|
258
272
|
#+begin_example
|
|
259
|
-
|
|
273
|
+
column({ name: 'A' }) do
|
|
260
274
|
check_raw :must_be_non_nil do |cell|
|
|
261
275
|
!cell.nil?
|
|
262
276
|
end
|
|
@@ -278,7 +292,7 @@ The =column= declaration can contain Ruby blocks:
|
|
|
278
292
|
#+end_quote
|
|
279
293
|
|
|
280
294
|
#+begin_example
|
|
281
|
-
|
|
295
|
+
column({ name: 'A' }) do
|
|
282
296
|
check_raw do |cell|
|
|
283
297
|
# Here cell is like in the input file
|
|
284
298
|
end
|
|
@@ -435,17 +449,28 @@ key.
|
|
|
435
449
|
*** Process data
|
|
436
450
|
|
|
437
451
|
If =mapping= does not work for your data processing activities (e.g., you need
|
|
438
|
-
to make elaborations on data which span different rows), you can add your
|
|
439
|
-
|
|
452
|
+
to make elaborations on data which span different rows), you can add your perform
|
|
453
|
+
your elaborations on the data transformed by =mappings=.
|
|
440
454
|
|
|
441
455
|
A typical scenario works as follows:
|
|
442
456
|
|
|
443
|
-
1.
|
|
444
|
-
|
|
445
|
-
1. Use =i.read= or =i.load= (synonyms), to read all data.
|
|
457
|
+
1. Reference the class =i = Reader= and use =i.read= or =i.load=
|
|
458
|
+
(synonyms), to read all data.
|
|
446
459
|
|
|
447
460
|
#+BEGIN_EXAMPLE ruby
|
|
461
|
+
i = Reader
|
|
462
|
+
|
|
463
|
+
# read uses the options if defined and takes the same arguments as options
|
|
464
|
+
# examples:
|
|
465
|
+
# i.read
|
|
466
|
+
# i.read filename: "example.ods"
|
|
467
|
+
# i.read filename: "example.ods", extension: ".ods"
|
|
468
|
+
# i.read filename: "example", extension: ".ods"
|
|
469
|
+
# (the line above opens the file "example" as an Open Document Spreasdheet)
|
|
448
470
|
i.read
|
|
471
|
+
|
|
472
|
+
# alternately
|
|
473
|
+
Reader.read
|
|
449
474
|
#+END_EXAMPLE
|
|
450
475
|
|
|
451
476
|
2. Use =errors= to see whether any of the check functions failed:
|
|
@@ -465,20 +490,38 @@ A typical scenario works as follows:
|
|
|
465
490
|
|
|
466
491
|
(Optionally: check again for errors.)
|
|
467
492
|
|
|
468
|
-
4. Use the =
|
|
469
|
-
|
|
493
|
+
4. Use the =mappings= function to execute the =mapping= directive on each line
|
|
494
|
+
read from the file.
|
|
470
495
|
|
|
471
496
|
#+BEGIN_EXAMPLE ruby
|
|
472
|
-
i.
|
|
497
|
+
i.mappings
|
|
473
498
|
#+END_EXAMPLE
|
|
474
499
|
|
|
475
500
|
(Optionally: check again for errors.)
|
|
476
501
|
|
|
477
|
-
5. Add your own code to process data
|
|
502
|
+
5. Add your own code to process the data returned after =mappings=, which you
|
|
503
|
+
can access with =i.table= or =i.data= (synonyms).
|
|
478
504
|
|
|
479
|
-
Look in the examples directory for further details and a couple of
|
|
480
|
-
|
|
505
|
+
Look in the examples directory for further details and a couple of working
|
|
506
|
+
examples.
|
|
481
507
|
|
|
508
|
+
*** Improving performances
|
|
509
|
+
|
|
510
|
+
While debugging your specification executing =read=, =virtual_columns=, and
|
|
511
|
+
=mappings= in distinct steps is a good idea. When you go in production, you
|
|
512
|
+
might want to reduce the number of passes you perform on the data.
|
|
513
|
+
|
|
514
|
+
You can pass the option =virtual: true= to =read= to compute virtual
|
|
515
|
+
columns while you are reading data.
|
|
516
|
+
|
|
517
|
+
You can pass the option =mapping: true= to =read= to compute virtual
|
|
518
|
+
columns and perform the mapping while you are reading data. Notice that:
|
|
519
|
+
|
|
520
|
+
- =mapping= implies =virtual=, that is, if you pass =mapping: true= the read
|
|
521
|
+
function will also compute virtual columns
|
|
522
|
+
- =mapping= alters the content of =@table= and **subsequent calls to
|
|
523
|
+
=virtual_column= and =mapping= will fail.** You have reset by invoking
|
|
524
|
+
=read= again.
|
|
482
525
|
|
|
483
526
|
*** Managing Errors
|
|
484
527
|
|
|
@@ -529,22 +572,23 @@ end
|
|
|
529
572
|
|
|
530
573
|
You can check for errors in two different ways:
|
|
531
574
|
|
|
532
|
-
The first is in the =mapping= directive, where can check whether some checks
|
|
533
|
-
the =row= failed, by:
|
|
575
|
+
The first is in the =mapping= directive, where can check whether some checks
|
|
576
|
+
for the =row= failed, by:
|
|
534
577
|
|
|
535
578
|
1. checking from the =:error= boolean key associated to each column, that is:
|
|
536
579
|
|
|
537
580
|
=row[<column_name>][:error]=
|
|
538
581
|
|
|
539
|
-
2. looking at the value of the =:row_errors= key, which contains all error
|
|
540
|
-
generated for the row:
|
|
582
|
+
2. looking at the value of the =:row_errors= key, which contains all error
|
|
583
|
+
messages generated for the row:
|
|
541
584
|
|
|
542
585
|
=row[:row_errors]=
|
|
543
586
|
|
|
544
|
-
3. After the processing, by using the method =errors=, which lists all the
|
|
587
|
+
3. After the processing, by using the method =errors=, which lists all the
|
|
588
|
+
errors.
|
|
545
589
|
|
|
546
|
-
The utility function =Dreader::Util.errors= takes as input the errors generated
|
|
547
|
-
Dreader and extract those of a specific row and, optionally column:
|
|
590
|
+
The utility function =Dreader::Util.errors= takes as input the errors generated
|
|
591
|
+
by Dreader and extract those of a specific row and, optionally column:
|
|
548
592
|
|
|
549
593
|
#+begin_example ruby
|
|
550
594
|
# get all the errors at line 2
|
|
@@ -644,39 +688,39 @@ Thus, for instance, given the example above returns:
|
|
|
644
688
|
|
|
645
689
|
* Simplifying the hash with the data read
|
|
646
690
|
|
|
647
|
-
The =Dreader::Util= class provides some functions to simplify the
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
ActiveRecord creators.
|
|
691
|
+
The =Dreader::Util= class provides some functions to simplify the hashes built
|
|
692
|
+
by =dreader=. This is useful to simplify the code you write and to genereate
|
|
693
|
+
hashes you can pass, for instance, to ActiveRecord creators.
|
|
651
694
|
|
|
652
695
|
** Simplify removes everything but the values
|
|
653
696
|
|
|
654
|
-
=Dreader::Util.simplify
|
|
655
|
-
|
|
697
|
+
=Dreader::Util.simplify(hash)= removes all information but the value and makes
|
|
698
|
+
the value accessible directly from the name of the column.
|
|
656
699
|
|
|
657
700
|
#+BEGIN_EXAMPLE ruby
|
|
658
701
|
i.table[0]
|
|
659
|
-
{
|
|
660
|
-
|
|
702
|
+
{
|
|
703
|
+
name: { value: "John", row_number: 1, col_number: 1, errors: nil },
|
|
704
|
+
age: { value: 30, row_number: 1, col_number: 2, errors: nil }
|
|
705
|
+
}
|
|
661
706
|
|
|
662
707
|
Dreader::Util.simplify i.table[0]
|
|
663
708
|
{ name: "John", age: 30 }
|
|
664
709
|
#+END_EXAMPLE
|
|
665
710
|
|
|
666
|
-
*As an additional bonus, it removes the keys =row_number= and =row_errors=,
|
|
667
|
-
which are not part of the data read, in the first place.*
|
|
668
|
-
|
|
669
711
|
** Slice and Clean select columns
|
|
670
712
|
|
|
671
|
-
=Dreader::Util.slice
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
713
|
+
=Dreader::Util.slice(hash, keys)= and =Dreader::Util.clean(hash, keys)=, where
|
|
714
|
+
=keys= is an arrays of keys, are respectively used to select or remove some
|
|
715
|
+
keys from the hash returned by Dreader. (Notice that the Ruby Hash class
|
|
716
|
+
already provides similar methods.)
|
|
675
717
|
|
|
676
718
|
#+BEGIN_EXAMPLE ruby
|
|
677
719
|
i.table[0]
|
|
678
|
-
{
|
|
679
|
-
|
|
720
|
+
{
|
|
721
|
+
name: { value: "John", row_number: 1, col_number: 1, errors: nil },
|
|
722
|
+
age: { value: 30, row_number: 1, col_number: 2, errors: nil }
|
|
723
|
+
}
|
|
680
724
|
|
|
681
725
|
Dreader::Util.slice i.table[0], :name
|
|
682
726
|
{ name: { value: "John", row_number: 1, col_number: 1, errors: nil}
|
|
@@ -685,8 +729,8 @@ Ruby Hash class already provides similar methods.)
|
|
|
685
729
|
{ age: { value: 30, row_number: 1, col_number: 2, errors: nil }
|
|
686
730
|
#+END_EXAMPLE
|
|
687
731
|
|
|
688
|
-
The methods =slice= and =clean= are more useful when used in
|
|
689
|
-
|
|
732
|
+
The methods =slice= and =clean= are more useful when used in conjuction with
|
|
733
|
+
=simplify=:
|
|
690
734
|
|
|
691
735
|
#+BEGIN_EXAMPLE ruby
|
|
692
736
|
hash = Dreader::Util.simplify i.table[0]
|
|
@@ -704,21 +748,23 @@ create an =ActiveRecord= object.
|
|
|
704
748
|
|
|
705
749
|
** Better Integration with ActiveRecord
|
|
706
750
|
|
|
707
|
-
Finally, the =Dreader::Util.restructure= method helps building hashes
|
|
708
|
-
|
|
751
|
+
Finally, the =Dreader::Util.restructure= method helps building hashes to create
|
|
752
|
+
[[http://api.rubyonrails.org/classes/ActiveModel/Model.html][ActiveModel]] objects with nested attributes.
|
|
753
|
+
|
|
754
|
+
**The starting point is a simplified row.**
|
|
709
755
|
|
|
710
756
|
#+BEGIN_EXAMPLE ruby
|
|
711
|
-
hash = {name: "John", surname: "Doe", address: "Unknown", city: "NY"
|
|
757
|
+
hash = { name: "John", surname: "Doe", address: "Unknown", city: "NY" }
|
|
712
758
|
|
|
713
|
-
Dreader::Util.restructure hash, [:name, :surname
|
|
714
|
-
{name: "John", surname: "Doe", address_attributes: {address: "
|
|
759
|
+
Dreader::Util.restructure hash, [:name, :surname, :address_attributes, [:address, :city]]
|
|
760
|
+
{ name: "John", surname: "Doe", address_attributes: { address: "Unknown", city: "NY" } }
|
|
715
761
|
#+END_EXAMPLE
|
|
716
762
|
|
|
717
763
|
|
|
718
764
|
* Debugging your specification
|
|
719
765
|
|
|
720
|
-
The =debug= function prints the current configuration, reads some
|
|
721
|
-
|
|
766
|
+
The =debug= function prints the current configuration, reads some records from
|
|
767
|
+
the input file(s), and shows the records read:
|
|
722
768
|
|
|
723
769
|
#+BEGIN_EXAMPLE ruby
|
|
724
770
|
i.debug
|
|
@@ -735,8 +781,8 @@ read:
|
|
|
735
781
|
i.debug process: false, check: false
|
|
736
782
|
#+END_EXAMPLE
|
|
737
783
|
|
|
738
|
-
Notice that =check= implies =process=, since =check= is invoked on the
|
|
739
|
-
|
|
784
|
+
Notice that =check= implies =process=, since =check= is invoked on the output
|
|
785
|
+
of the =process= directive.`
|
|
740
786
|
|
|
741
787
|
If you prefer, in alternative to =debug= you can also use configuration
|
|
742
788
|
variables (but then you need to change the configuration according to the
|
|
@@ -751,7 +797,7 @@ environment):
|
|
|
751
797
|
|
|
752
798
|
* Changelog
|
|
753
799
|
|
|
754
|
-
See [[file:CHANGELOG.
|
|
800
|
+
See [[file:CHANGELOG.org][CHANGELOG]].
|
|
755
801
|
|
|
756
802
|
* Known Limitations
|
|
757
803
|
|
|
@@ -759,9 +805,6 @@ At the moment:
|
|
|
759
805
|
|
|
760
806
|
- it is not possible to specify column references using header names
|
|
761
807
|
(like Roo does).
|
|
762
|
-
- it is not possible to pass options to the file readers. As a
|
|
763
|
-
consequence tab-separated files must have the =.tsv= extension or
|
|
764
|
-
they will not be parsed correctly
|
|
765
808
|
- some more testing wouldn't hurt.
|
|
766
809
|
|
|
767
810
|
* Known Bugs
|
data/examples/age/age.rb
CHANGED
|
@@ -1,47 +1,47 @@
|
|
|
1
|
-
require
|
|
1
|
+
require "dreader"
|
|
2
2
|
|
|
3
|
-
class Reader
|
|
3
|
+
class Reader
|
|
4
|
+
extend Dreader::Engine
|
|
4
5
|
|
|
5
|
-
options do
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
end
|
|
6
|
+
options do
|
|
7
|
+
first_row 2
|
|
8
|
+
debug true
|
|
9
|
+
end
|
|
9
10
|
|
|
10
|
-
column :name do
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
end
|
|
11
|
+
column :name do
|
|
12
|
+
doc "A is the name string"
|
|
13
|
+
colref 'A'
|
|
14
|
+
end
|
|
14
15
|
|
|
15
|
-
column :birthdate do
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
column :birthdate do
|
|
17
|
+
doc "Birthdate contains a full date (i.e., including the year)"
|
|
18
|
+
colref 'B'
|
|
18
19
|
|
|
19
|
-
|
|
20
|
-
|
|
20
|
+
process do |c|
|
|
21
|
+
Date.parse(c)
|
|
22
|
+
end
|
|
21
23
|
end
|
|
22
|
-
end
|
|
23
24
|
|
|
24
|
-
virtual_column :age do
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
25
|
+
virtual_column :age do
|
|
26
|
+
process do |row|
|
|
27
|
+
birthdate = row[:birthdate][:value]
|
|
28
|
+
birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
|
|
29
|
+
today = Date.today
|
|
29
30
|
|
|
30
|
-
|
|
31
|
+
[0, today.year - birthdate.year - (birthday < today ? 1 : 0)].max
|
|
32
|
+
end
|
|
31
33
|
end
|
|
32
|
-
end
|
|
33
34
|
|
|
34
|
-
mapping do |row|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
end
|
|
35
|
+
mapping do |row|
|
|
36
|
+
r = Dreader::Util.simplify(row)
|
|
37
|
+
puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
|
|
38
|
+
end
|
|
38
39
|
end
|
|
39
40
|
|
|
40
|
-
i = Reader
|
|
41
|
-
|
|
41
|
+
i = Reader
|
|
42
42
|
i.read filename: "Birthdays.ods"
|
|
43
43
|
i.virtual_columns
|
|
44
|
-
i.
|
|
44
|
+
i.mappings
|
|
45
45
|
|
|
46
46
|
#
|
|
47
47
|
# Here we can do further processing on the data
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
require 'dreader'
|
|
2
2
|
|
|
3
|
-
class Reader
|
|
3
|
+
class Reader
|
|
4
|
+
extend Dreader::Engine
|
|
5
|
+
|
|
4
6
|
options { first_row 2; debug true }
|
|
5
7
|
|
|
6
8
|
#
|
|
@@ -54,9 +56,9 @@ class Reader < Dreader::Engine
|
|
|
54
56
|
end
|
|
55
57
|
end
|
|
56
58
|
|
|
57
|
-
i = Reader
|
|
59
|
+
i = Reader
|
|
58
60
|
|
|
59
61
|
i.read filename: "Birthdays.ods"
|
|
60
62
|
i.virtual_columns
|
|
61
|
-
i.
|
|
63
|
+
i.mappings
|
|
62
64
|
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#
|
|
2
|
+
# This demonstrates that variables are local
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
require "dreader"
|
|
6
|
+
|
|
7
|
+
class OneReader
|
|
8
|
+
extend Dreader::Engine
|
|
9
|
+
|
|
10
|
+
options do
|
|
11
|
+
first_row 2
|
|
12
|
+
debug true
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
class AnotherReader
|
|
17
|
+
extend Dreader::Engine
|
|
18
|
+
|
|
19
|
+
options do
|
|
20
|
+
filename "filename"
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
r1 = OneReader
|
|
25
|
+
r2 = AnotherReader
|
|
26
|
+
|
|
27
|
+
puts r1.declared_options
|
|
28
|
+
puts r2.declared_options
|
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
require 'dreader'
|
|
4
4
|
|
|
5
|
-
class Processor
|
|
5
|
+
class Processor
|
|
6
|
+
extend Dreader::Engine
|
|
7
|
+
|
|
6
8
|
options do
|
|
7
9
|
first_row 2
|
|
8
10
|
filename "cities_by_state.ods"
|
|
@@ -25,7 +27,7 @@ class Processor < Dreader::Engine
|
|
|
25
27
|
end
|
|
26
28
|
end
|
|
27
29
|
|
|
28
|
-
processor = Processor
|
|
30
|
+
processor = Processor
|
|
29
31
|
|
|
30
32
|
printf "Loading the spreadsheet..."
|
|
31
33
|
processor.load
|
|
@@ -43,8 +45,8 @@ else
|
|
|
43
45
|
end
|
|
44
46
|
puts "done!"
|
|
45
47
|
|
|
46
|
-
puts "
|
|
47
|
-
processor.
|
|
48
|
+
puts "Applying mapping rules to the spreadsheet..."
|
|
49
|
+
processor.mappings
|
|
48
50
|
puts "... done"
|
|
49
51
|
|
|
50
52
|
|
|
@@ -13,7 +13,9 @@ class City
|
|
|
13
13
|
end
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
class Importer
|
|
16
|
+
class Importer
|
|
17
|
+
extend Dreader::Engine
|
|
18
|
+
|
|
17
19
|
# read from us_cities.tsv, lines from 2 to 10 (included)
|
|
18
20
|
options do
|
|
19
21
|
filename "us_cities.tsv"
|
|
@@ -50,7 +52,7 @@ class Importer < Dreader::Engine
|
|
|
50
52
|
end
|
|
51
53
|
|
|
52
54
|
cities = []
|
|
53
|
-
importer = Importer
|
|
55
|
+
importer = Importer
|
|
54
56
|
|
|
55
57
|
importer.mapping do |row|
|
|
56
58
|
# remove all additional information stored in each cell
|
|
@@ -81,7 +83,7 @@ importer.debug process: false, check: false
|
|
|
81
83
|
# load and process
|
|
82
84
|
importer.load
|
|
83
85
|
cities = []
|
|
84
|
-
importer.
|
|
86
|
+
importer.mappings
|
|
85
87
|
|
|
86
88
|
# output everything to see whether it works
|
|
87
89
|
puts "First ten cities in the US (source Wikipedia)"
|
|
@@ -13,7 +13,9 @@ class City
|
|
|
13
13
|
end
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
class Importer
|
|
16
|
+
class Importer
|
|
17
|
+
extend Dreader::Engine
|
|
18
|
+
|
|
17
19
|
# read from us_cities.tsv, lines from 2 to 10 (included)
|
|
18
20
|
options do
|
|
19
21
|
filename "us_cities.tsv"
|
|
@@ -42,7 +44,7 @@ class Importer < Dreader::Engine
|
|
|
42
44
|
end
|
|
43
45
|
|
|
44
46
|
cities = []
|
|
45
|
-
importer = Importer
|
|
47
|
+
importer = Importer
|
|
46
48
|
|
|
47
49
|
importer.mapping do |row|
|
|
48
50
|
# remove all additional information stored in each cell
|
|
@@ -73,7 +75,7 @@ importer.debug process: false, check: false
|
|
|
73
75
|
# load and process
|
|
74
76
|
importer.load
|
|
75
77
|
cities = []
|
|
76
|
-
importer.
|
|
78
|
+
importer.mappings
|
|
77
79
|
|
|
78
80
|
# output everything to see whether it works
|
|
79
81
|
puts "First ten cities in the US (source Wikipedia)"
|
data/lib/dreader/engine.rb
CHANGED
|
@@ -10,47 +10,29 @@ module Dreader
|
|
|
10
10
|
#
|
|
11
11
|
# This is where the real stuff begins
|
|
12
12
|
#
|
|
13
|
-
|
|
14
|
-
# TODO: make the writer into private methods (need to be accessed only
|
|
15
|
-
# in the initializer) and demote to attr_reader
|
|
16
|
-
|
|
13
|
+
module Engine
|
|
17
14
|
# the options we passed
|
|
18
|
-
attr_accessor :
|
|
15
|
+
attr_accessor :declared_options
|
|
19
16
|
# the specification of the columns to process
|
|
20
|
-
attr_accessor :
|
|
17
|
+
attr_accessor :declared_columns
|
|
21
18
|
# some example lines
|
|
22
|
-
attr_accessor :
|
|
19
|
+
attr_accessor :declared_examples
|
|
23
20
|
# the specification of the virtual columns
|
|
24
|
-
attr_accessor :
|
|
21
|
+
attr_accessor :declared_virtual_columns
|
|
25
22
|
# the mapping rules
|
|
26
|
-
attr_accessor :
|
|
23
|
+
attr_accessor :declared_mapping
|
|
27
24
|
|
|
28
25
|
# the data we read
|
|
29
26
|
attr_reader :table
|
|
30
27
|
|
|
31
|
-
# variables declared in the class which need to be propagated in
|
|
32
|
-
# the instance
|
|
33
|
-
INSTANTIATE = %i[options colspec examples virtualcols]
|
|
34
|
-
|
|
35
|
-
def initialize
|
|
36
|
-
@logger = Logger.new($stdout)
|
|
37
|
-
@logger.level = Logger::WARN
|
|
38
|
-
|
|
39
|
-
# populate the instance with the variables defined in the class
|
|
40
|
-
@options = defined?(@@options) ? @@options : {}
|
|
41
|
-
@colspec = defined?(@@colspec) ? @@colspec : []
|
|
42
|
-
@examples = defined?(@@examples) ? @@examples : []
|
|
43
|
-
@virtualcols = defined?(@@virtualcols) ? @@virtualcols : []
|
|
44
|
-
end
|
|
45
|
-
|
|
46
28
|
# define a DSL for options
|
|
47
29
|
# any string is processed as an option and it ends up in the
|
|
48
30
|
# @options hash
|
|
49
|
-
def
|
|
31
|
+
def options(&block)
|
|
50
32
|
options = Options.new
|
|
51
33
|
options.instance_eval(&block)
|
|
52
34
|
|
|
53
|
-
|
|
35
|
+
@declared_options = options.to_hash
|
|
54
36
|
end
|
|
55
37
|
|
|
56
38
|
# define a DSL for column specification
|
|
@@ -58,18 +40,18 @@ module Dreader
|
|
|
58
40
|
# - `block` contains two declarations, `process` and `check`, which are
|
|
59
41
|
# used, respectively, to make a cell into the desired data and to check
|
|
60
42
|
# whether the desired data is ok
|
|
61
|
-
def
|
|
43
|
+
def column(name, &block)
|
|
62
44
|
column = Column.new
|
|
63
45
|
column.instance_eval(&block)
|
|
64
46
|
|
|
65
|
-
|
|
47
|
+
@declared_columns ||= []
|
|
66
48
|
|
|
67
49
|
if name.instance_of?(Hash)
|
|
68
|
-
|
|
50
|
+
@declared_columns << column.to_hash.merge(
|
|
69
51
|
{ name: name.keys.first, colref: name.values.first }
|
|
70
52
|
)
|
|
71
53
|
else
|
|
72
|
-
|
|
54
|
+
@declared_columns << column.to_hash.merge({ name: name })
|
|
73
55
|
end
|
|
74
56
|
end
|
|
75
57
|
|
|
@@ -106,20 +88,20 @@ module Dreader
|
|
|
106
88
|
# cell.strip
|
|
107
89
|
# end
|
|
108
90
|
# end
|
|
109
|
-
def
|
|
91
|
+
def columns(hash, &block)
|
|
110
92
|
hash.each_key do |key|
|
|
111
93
|
column = Column.new
|
|
112
94
|
column.colref hash[key]
|
|
113
95
|
column.instance_eval(&block) if block
|
|
114
96
|
|
|
115
|
-
|
|
116
|
-
|
|
97
|
+
@declared_columns ||= []
|
|
98
|
+
@declared_columns << column.to_hash.merge({ name: key })
|
|
117
99
|
end
|
|
118
100
|
end
|
|
119
101
|
|
|
120
|
-
def
|
|
121
|
-
|
|
122
|
-
|
|
102
|
+
def example(hash)
|
|
103
|
+
@declared_examples ||= []
|
|
104
|
+
@declared_examples << hash
|
|
123
105
|
end
|
|
124
106
|
|
|
125
107
|
# virtual columns define derived attributes
|
|
@@ -128,12 +110,12 @@ module Dreader
|
|
|
128
110
|
#
|
|
129
111
|
# virtual colum declarations are executed in the order in which
|
|
130
112
|
# they are defined
|
|
131
|
-
def
|
|
113
|
+
def virtual_column(name, &block)
|
|
132
114
|
column = Column.new
|
|
133
115
|
column.instance_eval &block
|
|
134
116
|
|
|
135
|
-
|
|
136
|
-
|
|
117
|
+
@declared_virtual_columns ||= []
|
|
118
|
+
@declared_virtual_columns << column.to_hash.merge({ name: name })
|
|
137
119
|
end
|
|
138
120
|
|
|
139
121
|
# define what we do with each line we read
|
|
@@ -141,8 +123,8 @@ module Dreader
|
|
|
141
123
|
# `row` is a hash in which each spreadsheet cell is accessible under
|
|
142
124
|
# the column names. Each cell has the following values:
|
|
143
125
|
# :value, :error, :row_number, :col_number
|
|
144
|
-
def
|
|
145
|
-
|
|
126
|
+
def mapping(&block)
|
|
127
|
+
@declared_mapping = block
|
|
146
128
|
end
|
|
147
129
|
|
|
148
130
|
# read a file and store it internally
|
|
@@ -155,18 +137,20 @@ module Dreader
|
|
|
155
137
|
# @return the data read from filename, in the form of an array of
|
|
156
138
|
# hashes
|
|
157
139
|
def read(args = {})
|
|
140
|
+
# args override values in options (if defined)
|
|
141
|
+
# the initializer guarantees @options is at least {}
|
|
142
|
+
options = (@declared_options || {}).merge(args)
|
|
143
|
+
|
|
144
|
+
@logger = options[:logger] || Logger.new($stdout)
|
|
145
|
+
@logger.level = options[:logger_level] || Logger::WARN
|
|
146
|
+
@debug = options[:debug] == true
|
|
147
|
+
|
|
158
148
|
if !args.instance_of?(Hash)
|
|
159
149
|
@logger.error "#{__callee__}: this function takes a Hash as input"
|
|
160
150
|
raise Exception
|
|
161
151
|
end
|
|
162
152
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
@logger = options[:logger] if options[:logger]
|
|
166
|
-
@logger.level = options[:logger_level] if options[:logger_level]
|
|
167
|
-
@debug = options[:debug] == true
|
|
168
|
-
|
|
169
|
-
spreadsheet = Dreader::Engine.open_spreadsheet (options[:filename])
|
|
153
|
+
spreadsheet = open_spreadsheet(options)
|
|
170
154
|
sheet = spreadsheet.sheet(options[:sheet] || 0)
|
|
171
155
|
first_row = options[:first_row] || 1
|
|
172
156
|
last_row = options[:last_row] || sheet.last_row
|
|
@@ -176,64 +160,74 @@ module Dreader
|
|
|
176
160
|
@logger.level = Logger::DEBUG
|
|
177
161
|
|
|
178
162
|
# override the number of lines read
|
|
179
|
-
options[:n]
|
|
180
|
-
last_row =
|
|
181
|
-
|
|
163
|
+
n_lines = options[:n] ? options[:n].to_i : 10
|
|
164
|
+
last_row = first_row + n_lines - 1
|
|
165
|
+
|
|
182
166
|
# apply some defaults for debugging, if not defined in the options
|
|
183
167
|
[:check_raw, :process, :check].map do |key|
|
|
184
168
|
options[key] = true unless options.key?(key)
|
|
185
169
|
end
|
|
186
170
|
end
|
|
187
171
|
|
|
188
|
-
{ current: @
|
|
189
|
-
@logger.debug "#{k.capitalize} configuration:"
|
|
172
|
+
{ current: @declared_options, debug: options }.each do |k, v|
|
|
173
|
+
@logger.debug "[dreader] #{k.capitalize} configuration:"
|
|
190
174
|
v.each do |key, value|
|
|
191
175
|
@logger.debug " #{key}: #{value}"
|
|
192
176
|
end
|
|
193
177
|
end
|
|
194
178
|
|
|
195
|
-
@table = []
|
|
196
179
|
@errors = []
|
|
197
180
|
|
|
198
|
-
(first_row..last_row).
|
|
181
|
+
@table = (first_row..last_row).map do |row_number|
|
|
199
182
|
r = { row_number: row_number, row_errors: [] }
|
|
200
183
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
184
|
+
# this has side-effects on r
|
|
185
|
+
columns_on(r, row_number, sheet)
|
|
186
|
+
|
|
187
|
+
# this has side-effects on r
|
|
188
|
+
virtual_columns_on(r) if options[:virtual] || options[:mapping]
|
|
189
|
+
|
|
190
|
+
options[:mapping] ? mappings_on(r) : r
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# TODO: PASS A ROW (and not row_number and sheet)
|
|
195
|
+
def columns_on(r, row_number, sheet)
|
|
196
|
+
@declared_columns.each_with_index do |colspec, index|
|
|
197
|
+
colname = colspec[:name]
|
|
198
|
+
colref = colspec[:colref]
|
|
199
|
+
cell = sheet.cell(row_number, colref)
|
|
200
|
+
|
|
201
|
+
r[colname] = {
|
|
202
|
+
row: row_number,
|
|
203
|
+
col: colspec[:colref],
|
|
204
|
+
value: cell,
|
|
205
|
+
error: false
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
# Repeated below
|
|
209
|
+
# @logger.debug "[dreader] Processing #{coord(row_number, colref)}"
|
|
227
210
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
211
|
+
# check raw data
|
|
212
|
+
check_data(colspec[:checks_raw], r, colname)
|
|
213
|
+
|
|
214
|
+
# process data
|
|
215
|
+
coord = coord(row_number, colspec[:colref], cell)
|
|
216
|
+
begin
|
|
217
|
+
processed = colspec[:process] ? colspec[:process].call(cell) : cell
|
|
218
|
+
@logger.debug "[dreader] #{colname} process #{coord} yields '#{processed}' (#{processed.class})"
|
|
219
|
+
r[colname][:value] = processed
|
|
220
|
+
rescue => e
|
|
221
|
+
@logger.error "[dreader] #{colname} process #{coord} raises an exception"
|
|
222
|
+
raise e
|
|
231
223
|
end
|
|
232
224
|
|
|
233
|
-
|
|
225
|
+
# check data after process - notice that now r contains the value
|
|
226
|
+
# processed by process
|
|
227
|
+
check_data(colspec[:checks], r, colname)
|
|
234
228
|
end
|
|
235
229
|
|
|
236
|
-
|
|
230
|
+
r
|
|
237
231
|
end
|
|
238
232
|
|
|
239
233
|
alias load read
|
|
@@ -254,51 +248,65 @@ module Dreader
|
|
|
254
248
|
# You need to invoke read first
|
|
255
249
|
def get_row(row_number)
|
|
256
250
|
if row_number > @table.size
|
|
257
|
-
@logger.error "
|
|
251
|
+
@logger.error "[dreader] 'row_number' is out of range (did you invoke read?)"
|
|
258
252
|
exit
|
|
259
253
|
elsif row_number <= 0
|
|
260
|
-
@logger.error "
|
|
254
|
+
@logger.error "[dreader] 'row_number' is zero or negative (first row is 1)."
|
|
261
255
|
else
|
|
262
256
|
@table[row_number - 1]
|
|
263
257
|
end
|
|
264
258
|
end
|
|
265
259
|
|
|
266
|
-
# return an array of hashes with all the errors we have
|
|
260
|
+
# return an array of hashes with all the errors we have encountered
|
|
267
261
|
# an empty array is a good news
|
|
268
262
|
attr_reader :errors
|
|
269
263
|
|
|
270
|
-
def virtual_columns
|
|
264
|
+
def virtual_columns
|
|
271
265
|
# execute the virtual column specification
|
|
272
|
-
@table.each
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
r[colname][:value] = virtualcol[:process].call(r)
|
|
283
|
-
end
|
|
284
|
-
rescue => e
|
|
285
|
-
row = r[:row_number]
|
|
286
|
-
@logger.error "#{__callee__}: process for virtual column :#{colname} raised an exception at row #{row}"
|
|
287
|
-
raise e
|
|
288
|
-
end
|
|
266
|
+
@table.each { |row| virtual_columns_on(row) }
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Compute virtual columns for, with side effect on row
|
|
270
|
+
def virtual_columns_on(row)
|
|
271
|
+
@declared_virtual_columns.each do |virtualcol|
|
|
272
|
+
colname = virtualcol[:name]
|
|
273
|
+
row[colname] = { virtual: true }
|
|
274
|
+
|
|
275
|
+
check_data(virtualcol[:checks_raw], row, colname, full_row: true)
|
|
289
276
|
|
|
290
|
-
|
|
291
|
-
|
|
277
|
+
begin
|
|
278
|
+
# add the cell to the table
|
|
279
|
+
if virtualcol[:process]
|
|
280
|
+
row[colname][:value] = virtualcol[:process].call(row)
|
|
281
|
+
end
|
|
282
|
+
rescue => e
|
|
283
|
+
r = row[:row_number]
|
|
284
|
+
@logger.error "[dreader] #{colname} process raises an exception at row #{r}"
|
|
285
|
+
raise e
|
|
292
286
|
end
|
|
287
|
+
|
|
288
|
+
# check data after process -- we also have the processed value of
|
|
289
|
+
# the virtual column
|
|
290
|
+
check_data(virtualcol[:checks], row, colname, full_row: true)
|
|
293
291
|
end
|
|
294
292
|
end
|
|
295
293
|
|
|
296
|
-
# apply the mapping code to the array
|
|
297
|
-
#
|
|
294
|
+
# apply the mapping code to the array it makes sense to invoke it only
|
|
295
|
+
# once.
|
|
298
296
|
#
|
|
299
|
-
# the mapping is applied only if it defined
|
|
300
|
-
|
|
301
|
-
|
|
297
|
+
# the mapping is applied only if it defined and it uses map, so that
|
|
298
|
+
# it can be used functionally
|
|
299
|
+
def mappings
|
|
300
|
+
@table.map { |row| mappings_on(row) }
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def mappings_on(row)
|
|
304
|
+
@declared_mapping&.call(row)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
# an alias
|
|
308
|
+
def data
|
|
309
|
+
@table
|
|
302
310
|
end
|
|
303
311
|
|
|
304
312
|
def to_s
|
|
@@ -314,14 +322,14 @@ module Dreader
|
|
|
314
322
|
end
|
|
315
323
|
|
|
316
324
|
def compare_headers(hash = {})
|
|
317
|
-
options = @
|
|
325
|
+
options = @declared_options.merge(hash)
|
|
318
326
|
|
|
319
|
-
spreadsheet =
|
|
327
|
+
spreadsheet = open_spreadsheet(options)
|
|
320
328
|
sheet = spreadsheet.sheet(options[:sheet] || 0)
|
|
321
329
|
header_row_number = options[:first_row] - 1 || 1
|
|
322
330
|
|
|
323
331
|
output_hash = {}
|
|
324
|
-
@
|
|
332
|
+
@declared_columns.map do |colspec|
|
|
325
333
|
cell = sheet.cell(row_number, colspec[:colref])
|
|
326
334
|
human_readable = colspec[:name].to_s.split("_").map(&:capitalize).join(" ")
|
|
327
335
|
|
|
@@ -341,7 +349,7 @@ module Dreader
|
|
|
341
349
|
# second row includes the documentation string, to document values in
|
|
342
350
|
# the columns
|
|
343
351
|
def template(hash = {})
|
|
344
|
-
options = @
|
|
352
|
+
options = @declared_options.merge(hash)
|
|
345
353
|
filename = options[:template_filename]
|
|
346
354
|
|
|
347
355
|
workbook = FastExcel.open(filename, constant_memory: true)
|
|
@@ -357,22 +365,22 @@ module Dreader
|
|
|
357
365
|
#
|
|
358
366
|
|
|
359
367
|
# here we write the first row
|
|
360
|
-
@
|
|
368
|
+
@declared_columns.each do |colspec|
|
|
361
369
|
human_readable = colspec[:name].to_s.split("_").map(&:capitalize).join(" ")
|
|
362
370
|
colref = colref_to_i(colspec[:colref])
|
|
363
371
|
worksheet.write_string(first_row, colref, human_readable, bold)
|
|
364
372
|
end
|
|
365
373
|
|
|
366
374
|
# here we create a note with the legenda
|
|
367
|
-
@
|
|
375
|
+
@declared_columns.each do |colspec|
|
|
368
376
|
colref = colref_to_i(colspec[:colref])
|
|
369
377
|
worksheet.write_string(first_row + 1, colref, colspec[:doc], nil)
|
|
370
378
|
end
|
|
371
379
|
|
|
372
380
|
# here we write some example records
|
|
373
|
-
@
|
|
381
|
+
@declared_examples.each_with_index do |example_hash, index|
|
|
374
382
|
example_hash.each do |colname, value|
|
|
375
|
-
colspec = @
|
|
383
|
+
colspec = @declared_columns.select { |x| x[:name] == colname }.first
|
|
376
384
|
if colspec
|
|
377
385
|
colref = colref_to_i(colspec[:colref])
|
|
378
386
|
worksheet.write_string(index + 3, colref, value, nil)
|
|
@@ -387,16 +395,31 @@ module Dreader
|
|
|
387
395
|
|
|
388
396
|
private
|
|
389
397
|
|
|
390
|
-
|
|
391
|
-
|
|
398
|
+
# list of keys we support in options. We remove them when reading
|
|
399
|
+
# the CSV file
|
|
400
|
+
OPTION_KEYS = %i[
|
|
401
|
+
filename sheet first_row last_row logger logger_level
|
|
402
|
+
]
|
|
403
|
+
|
|
404
|
+
def open_spreadsheet(options)
|
|
405
|
+
filename = options[:filename]
|
|
406
|
+
ext = options[:extension] || File.extname(filename)
|
|
392
407
|
|
|
393
408
|
case ext
|
|
394
|
-
when ".csv"
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
when ".
|
|
398
|
-
|
|
399
|
-
|
|
409
|
+
when ".csv"
|
|
410
|
+
csv_options = @declared_options.except(*OPTION_KEYS)
|
|
411
|
+
Roo::CSV.new(filename, csv_options:)
|
|
412
|
+
when ".tsv"
|
|
413
|
+
csv_options = @declared_options.except(*OPTION_KEYS).merge({ col_sep: "\t" })
|
|
414
|
+
Roo::CSV.new(filename, csv_options:)
|
|
415
|
+
when ".ods"
|
|
416
|
+
Roo::OpenOffice.new(filename)
|
|
417
|
+
when ".xls"
|
|
418
|
+
Roo::Excel.new(filename)
|
|
419
|
+
when ".xlsx"
|
|
420
|
+
Roo::Excelx.new(filename)
|
|
421
|
+
else
|
|
422
|
+
raise "Unknown extension: #{ext}"
|
|
400
423
|
end
|
|
401
424
|
end
|
|
402
425
|
|
|
@@ -445,7 +468,7 @@ module Dreader
|
|
|
445
468
|
|
|
446
469
|
begin
|
|
447
470
|
pass = check_function.call(value)
|
|
448
|
-
@logger.debug "check
|
|
471
|
+
@logger.debug "[dreader] check #{colname}/#{error_message} at #{coord} yields: '#{pass}'"
|
|
449
472
|
|
|
450
473
|
if pass != true
|
|
451
474
|
hash[colname][:error] = true
|
|
@@ -460,14 +483,14 @@ module Dreader
|
|
|
460
483
|
hash[:row_errors] << error
|
|
461
484
|
end
|
|
462
485
|
rescue => e
|
|
463
|
-
@logger.error "
|
|
486
|
+
@logger.error "[dreader] check #{colname}/#{error_message} raises an exception at #{coord}"
|
|
464
487
|
raise e
|
|
465
488
|
end
|
|
466
489
|
end
|
|
467
490
|
end
|
|
468
491
|
|
|
469
|
-
def coord(row, col,
|
|
470
|
-
"
|
|
492
|
+
def coord(row, col, value = nil)
|
|
493
|
+
["#{row}#{col}", (value ? "(#{value})" : nil)].join(" ")
|
|
471
494
|
end
|
|
472
495
|
end
|
|
473
496
|
end
|
data/lib/dreader/util.rb
CHANGED
|
@@ -19,20 +19,35 @@ module Dreader
|
|
|
19
19
|
end.to_h
|
|
20
20
|
end
|
|
21
21
|
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
22
|
+
# Given a "simplified" hash restructure it according to the second
|
|
23
|
+
# argument.
|
|
24
|
+
#
|
|
25
|
+
# Use it for generating hashes with nested attributes, which
|
|
26
|
+
# follows Rails conventions.
|
|
27
|
+
#
|
|
28
|
+
# @params:
|
|
29
|
+
# - hash the hash to restructure
|
|
30
|
+
# - args splat arguments which specify how to (re)structure the
|
|
31
|
+
# values in Hash. Each element is either a symbol or a Hash
|
|
25
32
|
#
|
|
26
33
|
# Example
|
|
27
34
|
#
|
|
28
|
-
# hash = { name: "A", surname: "B", address: "via
|
|
29
|
-
#
|
|
30
|
-
#
|
|
35
|
+
# hash = { name: "A", surname: "B", address: "via Piave", city: "Genoa" }
|
|
36
|
+
#
|
|
37
|
+
# restructure(hash, :name, :surname)
|
|
38
|
+
# { name: "A", surname: "B" }
|
|
31
39
|
#
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
40
|
+
# restructure(hash, :name, address_attributes: [:address, :city])
|
|
41
|
+
# {name: "A", address_attributes: { address: "via Piave", city: "Genoa" }
|
|
42
|
+
#
|
|
43
|
+
def self.restructure(hash, *new_structure)
|
|
44
|
+
new_structure.to_h do |value|
|
|
45
|
+
if value.instance_of?(Hash)
|
|
46
|
+
[value.keys.first, self.restructure(hash, *value.values.first)]
|
|
47
|
+
else
|
|
48
|
+
[value, hash[value]]
|
|
49
|
+
end
|
|
50
|
+
end
|
|
36
51
|
end
|
|
37
52
|
|
|
38
53
|
# an alias for Hash.slice
|
data/lib/dreader/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dreader
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Adolfo Villafiorita
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-
|
|
11
|
+
date: 2023-10-31 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: roo
|
|
@@ -97,7 +97,7 @@ extensions: []
|
|
|
97
97
|
extra_rdoc_files: []
|
|
98
98
|
files:
|
|
99
99
|
- ".gitignore"
|
|
100
|
-
- CHANGELOG.
|
|
100
|
+
- CHANGELOG.org
|
|
101
101
|
- Gemfile
|
|
102
102
|
- Gemfile.lock
|
|
103
103
|
- LICENSE.txt
|
|
@@ -110,6 +110,7 @@ files:
|
|
|
110
110
|
- examples/age/age.rb
|
|
111
111
|
- examples/age_with_multiple_checks/Birthdays.ods
|
|
112
112
|
- examples/age_with_multiple_checks/age_with_multiple_checks.rb
|
|
113
|
+
- examples/local_vars/local_vars.rb
|
|
113
114
|
- examples/template/template_generation.rb
|
|
114
115
|
- examples/wikipedia_big_us_cities/big_us_cities.rb
|
|
115
116
|
- examples/wikipedia_big_us_cities/cities_by_state.ods
|
|
@@ -141,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
141
142
|
- !ruby/object:Gem::Version
|
|
142
143
|
version: '0'
|
|
143
144
|
requirements: []
|
|
144
|
-
rubygems_version: 3.
|
|
145
|
+
rubygems_version: 3.4.10
|
|
145
146
|
signing_key:
|
|
146
147
|
specification_version: 4
|
|
147
148
|
summary: Process and import data from cvs and spreadsheets
|