dreader 1.0.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/{CHANGELOG.ORG → CHANGELOG.org} +52 -0
- data/Gemfile.lock +3 -3
- data/README.org +119 -76
- data/examples/age/age.rb +30 -30
- data/examples/age_with_multiple_checks/age_with_multiple_checks.rb +5 -3
- data/examples/local_vars/local_vars.rb +28 -0
- data/examples/wikipedia_big_us_cities/big_us_cities.rb +6 -4
- data/examples/wikipedia_us_cities/us_cities.rb +5 -3
- data/examples/wikipedia_us_cities/us_cities_bulk_declare.rb +5 -3
- data/lib/dreader/engine.rb +157 -134
- data/lib/dreader/util.rb +25 -10
- data/lib/dreader/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 524e55af5bb94cae3f407a1602069549783e935798a638361f3c98e922ffc54d
|
4
|
+
data.tar.gz: 2599e048324ccd233e3fa4a0261e134ced0a3347d27af1e978e635639b6284a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e8a78531d96ef35f9272a38daa5327620026dd66c98529710901d4c5994b9e5369308612cc79d1e0998d46dbb9dd6d26c448ddc4265dd536f1e61a4fc50fb885
|
7
|
+
data.tar.gz: de71caed5d3df79d0d456b080a72a0edc035ef4e46906aac3f94295b07d57b956554a9d9e07b4b6195c5c09df0badfa9202122d7b6e0568cf89569b6a2277d28
|
@@ -1,5 +1,57 @@
|
|
1
1
|
#+TITLE: Changelog
|
2
2
|
|
3
|
+
* Version 1.1.1 - <2023-10-16 Mon>
|
4
|
+
** Adds option :extension
|
5
|
+
|
6
|
+
- Adds options =extension= to the class options and to the =open_spreadsheet=
|
7
|
+
function, to be able to determine the type of a file with no extension
|
8
|
+
|
9
|
+
* Version 1.1.0
|
10
|
+
** Fixes an issue with visibility of variables
|
11
|
+
|
12
|
+
Version 1.1.0 makes Engine a module and requires to use extend
|
13
|
+
|
14
|
+
This allows to isolate declarations in different variables.
|
15
|
+
|
16
|
+
|
17
|
+
** Renames process to mappings
|
18
|
+
|
19
|
+
** Renames the variables in a more consistent way
|
20
|
+
|
21
|
+
#+begin_example ruby
|
22
|
+
attr_accessor :declared_options
|
23
|
+
# the specification of the columns to process
|
24
|
+
attr_accessor :declared_columns
|
25
|
+
# some example lines
|
26
|
+
attr_accessor :declared_examples
|
27
|
+
# the specification of the virtual columns
|
28
|
+
attr_accessor :declared_virtual_columns
|
29
|
+
# the mapping rules
|
30
|
+
attr_accessor :declared_mapping
|
31
|
+
#+end_example
|
32
|
+
|
33
|
+
** Declares =data= as a synonym of =table=
|
34
|
+
** Adds options to do everything in one pass
|
35
|
+
|
36
|
+
By passing the options
|
37
|
+
|
38
|
+
- =virtual=
|
39
|
+
- =mapping=
|
40
|
+
|
41
|
+
to =read= you can read and process the data in one step.
|
42
|
+
|
43
|
+
See the README for more details.
|
44
|
+
|
45
|
+
** Revises the logging messages
|
46
|
+
** Refactor some code to make it more readable
|
47
|
+
** Refactors the restructure function to make it more flexible
|
48
|
+
|
49
|
+
Now refactor takes as input symbols and hashes and reshapes
|
50
|
+
according to the specification.
|
51
|
+
|
52
|
+
See the README for an example.
|
53
|
+
|
54
|
+
|
3
55
|
* Version 1.0.0
|
4
56
|
** Changes the DSL to allow declaration in a class
|
5
57
|
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
dreader (
|
4
|
+
dreader (1.2.0)
|
5
5
|
fast_excel
|
6
6
|
roo
|
7
7
|
|
@@ -13,11 +13,11 @@ GEM
|
|
13
13
|
reline (>= 0.3.1)
|
14
14
|
fast_excel (0.4.1)
|
15
15
|
ffi (> 1.9, < 2)
|
16
|
-
ffi (1.
|
16
|
+
ffi (1.16.2)
|
17
17
|
io-console (0.6.0)
|
18
18
|
irb (1.7.2)
|
19
19
|
reline (>= 0.3.6)
|
20
|
-
nokogiri (1.15.
|
20
|
+
nokogiri (1.15.4-x86_64-linux)
|
21
21
|
racc (~> 1.4)
|
22
22
|
racc (1.7.1)
|
23
23
|
rake (10.5.0)
|
data/README.org
CHANGED
@@ -63,7 +63,9 @@ Print name and age of people from the following data:
|
|
63
63
|
#+BEGIN_EXAMPLE ruby
|
64
64
|
require 'dreader'
|
65
65
|
|
66
|
-
class Reader
|
66
|
+
class Reader
|
67
|
+
extend Dreader::Engine
|
68
|
+
|
67
69
|
options do
|
68
70
|
# we start reading from row 2
|
69
71
|
first_row 2
|
@@ -104,14 +106,14 @@ Print name and age of people from the following data:
|
|
104
106
|
end
|
105
107
|
end
|
106
108
|
|
107
|
-
reader = Reader
|
109
|
+
reader = Reader
|
108
110
|
|
109
111
|
# read the file
|
110
112
|
reader.read filename: "Birthdays.ods"
|
111
113
|
# compute the virtual columns
|
112
114
|
reader.virtual_columns
|
113
115
|
# run the mapping declaration
|
114
|
-
reader.
|
116
|
+
reader.mappings
|
115
117
|
|
116
118
|
#
|
117
119
|
# Here we can do further processing on the data
|
@@ -130,33 +132,36 @@ Print name and age of people from the following data:
|
|
130
132
|
To write an import function with Dreader:
|
131
133
|
|
132
134
|
- Declare which is the input file and where we can find data (Sheet
|
133
|
-
and first row)
|
134
|
-
- Declare the content of columns and how to check raw data, parse data,
|
135
|
+
and first row) (This can also be specified in each call.)
|
136
|
+
- Declare the content of columns and, then, how to check raw data, parse data,
|
135
137
|
and check parsed data
|
136
138
|
- Add virtual columns, that is, columns computed from other values
|
137
139
|
in the row
|
138
|
-
- Specify how to
|
140
|
+
- Specify how to map line. This is where you do the actual work
|
139
141
|
(for instance, if you process a file line by line) or put together data for
|
140
142
|
processing after the file has been fully read --- see the next step.
|
141
143
|
|
142
|
-
Dreader
|
143
|
-
|
144
|
-
|
144
|
+
Dreader now knows ho to collect, shape, and tranform (map) data according to
|
145
|
+
your instructions. We are now ready to do the actual work. This consists of
|
146
|
+
the following steps, various of which can be performed together:
|
145
147
|
|
146
|
-
-
|
148
|
+
- Read the file
|
149
|
+
- Do the parsing/transformations
|
150
|
+
- Compute the virtual columns
|
151
|
+
- Do the mappings
|
147
152
|
|
148
153
|
Each step is described in more details in the following sections.
|
149
154
|
|
150
155
|
*** Declare which is the input file and where we can find data
|
151
156
|
|
152
|
-
Require =dreader= and declare a class which
|
153
|
-
|
157
|
+
Require =dreader= and declare a class which extends =Dreader::Engine=:
|
154
158
|
|
155
159
|
#+BEGIN_EXAMPLE ruby
|
156
160
|
require 'dreader'
|
157
161
|
|
158
|
-
class Reader
|
159
|
-
|
162
|
+
class Reader
|
163
|
+
extend Dreader::Engine
|
164
|
+
[...]
|
160
165
|
end
|
161
166
|
#+END_EXAMPLE
|
162
167
|
|
@@ -165,6 +170,7 @@ In the class specify parsing option, using the following syntax:
|
|
165
170
|
#+BEGIN_EXAMPLE ruby
|
166
171
|
options do
|
167
172
|
filename 'example.ods'
|
173
|
+
extension ".ods"
|
168
174
|
|
169
175
|
sheet 'Sheet 1'
|
170
176
|
|
@@ -180,10 +186,17 @@ In the class specify parsing option, using the following syntax:
|
|
180
186
|
|
181
187
|
where:
|
182
188
|
|
183
|
-
- (optional) =filename= is the file to read. If not specified, you will
|
184
|
-
|
185
|
-
|
186
|
-
|
189
|
+
- (optional) =filename= is the file to read. If not specified, you will have
|
190
|
+
to supply a filename when loading the file (see =read=, below). *Use
|
191
|
+
=.tsv= for tab-separated files.*
|
192
|
+
- (optional) =extension= overrides or specify the extension of =filename=.
|
193
|
+
Takes as input the extension preceded by a "." (e.g., ".xlsx"). Notice that
|
194
|
+
**value of this option is not appended to filename** (see =read= below).
|
195
|
+
Filename must thus be a valid reference to a file in the file system. This
|
196
|
+
option is useful in one of these two circumstances:
|
197
|
+
1. When =filename= has no extension
|
198
|
+
2. When you want to override the extension of the filename, e.g., to force
|
199
|
+
reading a "file.csv" as a tab separated file
|
187
200
|
- (optional) =first_row= is the first line to read (use =2= if your file
|
188
201
|
has a header)
|
189
202
|
- (optional) =last_row= is the last line to read. If not specified, we
|
@@ -192,19 +205,20 @@ where:
|
|
192
205
|
contain "garbage" after the records.
|
193
206
|
- (optional) =sheet= is the sheet name or number to read from. If not
|
194
207
|
specified, the first (default) sheet is used
|
208
|
+
- (optional) =debug= specifies that we are debugging
|
209
|
+
- (optional) =logger= specifies the logger
|
210
|
+
- (optional) =logger_level= specifies the logger level
|
195
211
|
|
196
|
-
#+BEGIN_NOTES
|
197
212
|
You can override some of the defaults by passing a hash as argument to
|
198
213
|
the =read= function. For instance:
|
199
214
|
|
200
215
|
#+BEGIN_EXAMPLE ruby
|
201
|
-
|
216
|
+
Reader.read filename: another_filepath
|
202
217
|
#+END_EXAMPLE
|
203
218
|
|
204
219
|
will read data from =another_filepath=, rather than from the filename
|
205
220
|
specified in the options. This might be useful, for instance, if the
|
206
221
|
same specification has to be used for different files.
|
207
|
-
#+END_NOTES
|
208
222
|
|
209
223
|
|
210
224
|
*** Declare the content of columns and how to parse them
|
@@ -216,12 +230,12 @@ There are two notations:
|
|
216
230
|
|
217
231
|
#+BEGIN_EXAMPLE ruby
|
218
232
|
# First notation, colref is put in the block
|
219
|
-
|
233
|
+
column :name do
|
220
234
|
colref 'A'
|
221
235
|
end
|
222
236
|
|
223
237
|
# Second notation, a hash is passed in the name
|
224
|
-
|
238
|
+
column({ name: 'A' }) do
|
225
239
|
end
|
226
240
|
#+END_EXAMPLE
|
227
241
|
|
@@ -242,7 +256,7 @@ The =column= declaration can contain Ruby blocks:
|
|
242
256
|
=process= is valid. *Check must return true if there are no errors.*
|
243
257
|
|
244
258
|
#+begin_example
|
245
|
-
|
259
|
+
column({ name: 'A' }) do
|
246
260
|
check_raw do |cell|
|
247
261
|
!cell.nil?
|
248
262
|
end
|
@@ -256,7 +270,7 @@ The =column= declaration can contain Ruby blocks:
|
|
256
270
|
#+end_quote
|
257
271
|
|
258
272
|
#+begin_example
|
259
|
-
|
273
|
+
column({ name: 'A' }) do
|
260
274
|
check_raw :must_be_non_nil do |cell|
|
261
275
|
!cell.nil?
|
262
276
|
end
|
@@ -278,7 +292,7 @@ The =column= declaration can contain Ruby blocks:
|
|
278
292
|
#+end_quote
|
279
293
|
|
280
294
|
#+begin_example
|
281
|
-
|
295
|
+
column({ name: 'A' }) do
|
282
296
|
check_raw do |cell|
|
283
297
|
# Here cell is like in the input file
|
284
298
|
end
|
@@ -435,17 +449,28 @@ key.
|
|
435
449
|
*** Process data
|
436
450
|
|
437
451
|
If =mapping= does not work for your data processing activities (e.g., you need
|
438
|
-
to make elaborations on data which span different rows), you can add your
|
439
|
-
|
452
|
+
to make elaborations on data which span different rows), you can add your perform
|
453
|
+
your elaborations on the data transformed by =mappings=.
|
440
454
|
|
441
455
|
A typical scenario works as follows:
|
442
456
|
|
443
|
-
1.
|
444
|
-
|
445
|
-
1. Use =i.read= or =i.load= (synonyms), to read all data.
|
457
|
+
1. Reference the class =i = Reader= and use =i.read= or =i.load=
|
458
|
+
(synonyms), to read all data.
|
446
459
|
|
447
460
|
#+BEGIN_EXAMPLE ruby
|
461
|
+
i = Reader
|
462
|
+
|
463
|
+
# read uses the options if defined and takes the same arguments as options
|
464
|
+
# examples:
|
465
|
+
# i.read
|
466
|
+
# i.read filename: "example.ods"
|
467
|
+
# i.read filename: "example.ods", extension: ".ods"
|
468
|
+
# i.read filename: "example", extension: ".ods"
|
469
|
+
# (the line above opens the file "example" as an Open Document Spreasdheet)
|
448
470
|
i.read
|
471
|
+
|
472
|
+
# alternately
|
473
|
+
Reader.read
|
449
474
|
#+END_EXAMPLE
|
450
475
|
|
451
476
|
2. Use =errors= to see whether any of the check functions failed:
|
@@ -465,20 +490,38 @@ A typical scenario works as follows:
|
|
465
490
|
|
466
491
|
(Optionally: check again for errors.)
|
467
492
|
|
468
|
-
4. Use the =
|
469
|
-
|
493
|
+
4. Use the =mappings= function to execute the =mapping= directive on each line
|
494
|
+
read from the file.
|
470
495
|
|
471
496
|
#+BEGIN_EXAMPLE ruby
|
472
|
-
i.
|
497
|
+
i.mappings
|
473
498
|
#+END_EXAMPLE
|
474
499
|
|
475
500
|
(Optionally: check again for errors.)
|
476
501
|
|
477
|
-
5. Add your own code to process data
|
502
|
+
5. Add your own code to process the data returned after =mappings=, which you
|
503
|
+
can access with =i.table= or =i.data= (synonyms).
|
478
504
|
|
479
|
-
Look in the examples directory for further details and a couple of
|
480
|
-
|
505
|
+
Look in the examples directory for further details and a couple of working
|
506
|
+
examples.
|
481
507
|
|
508
|
+
*** Improving performances
|
509
|
+
|
510
|
+
While debugging your specification executing =read=, =virtual_columns=, and
|
511
|
+
=mappings= in distinct steps is a good idea. When you go in production, you
|
512
|
+
might want to reduce the number of passes you perform on the data.
|
513
|
+
|
514
|
+
You can pass the option =virtual: true= to =read= to compute virtual
|
515
|
+
columns while you are reading data.
|
516
|
+
|
517
|
+
You can pass the option =mapping: true= to =read= to compute virtual
|
518
|
+
columns and perform the mapping while you are reading data. Notice that:
|
519
|
+
|
520
|
+
- =mapping= implies =virtual=, that is, if you pass =mapping: true= the read
|
521
|
+
function will also compute virtual columns
|
522
|
+
- =mapping= alters the content of =@table= and **subsequent calls to
|
523
|
+
=virtual_column= and =mapping= will fail.** You have reset by invoking
|
524
|
+
=read= again.
|
482
525
|
|
483
526
|
*** Managing Errors
|
484
527
|
|
@@ -529,22 +572,23 @@ end
|
|
529
572
|
|
530
573
|
You can check for errors in two different ways:
|
531
574
|
|
532
|
-
The first is in the =mapping= directive, where can check whether some checks
|
533
|
-
the =row= failed, by:
|
575
|
+
The first is in the =mapping= directive, where can check whether some checks
|
576
|
+
for the =row= failed, by:
|
534
577
|
|
535
578
|
1. checking from the =:error= boolean key associated to each column, that is:
|
536
579
|
|
537
580
|
=row[<column_name>][:error]=
|
538
581
|
|
539
|
-
2. looking at the value of the =:row_errors= key, which contains all error
|
540
|
-
generated for the row:
|
582
|
+
2. looking at the value of the =:row_errors= key, which contains all error
|
583
|
+
messages generated for the row:
|
541
584
|
|
542
585
|
=row[:row_errors]=
|
543
586
|
|
544
|
-
3. After the processing, by using the method =errors=, which lists all the
|
587
|
+
3. After the processing, by using the method =errors=, which lists all the
|
588
|
+
errors.
|
545
589
|
|
546
|
-
The utility function =Dreader::Util.errors= takes as input the errors generated
|
547
|
-
Dreader and extract those of a specific row and, optionally column:
|
590
|
+
The utility function =Dreader::Util.errors= takes as input the errors generated
|
591
|
+
by Dreader and extract those of a specific row and, optionally column:
|
548
592
|
|
549
593
|
#+begin_example ruby
|
550
594
|
# get all the errors at line 2
|
@@ -644,39 +688,39 @@ Thus, for instance, given the example above returns:
|
|
644
688
|
|
645
689
|
* Simplifying the hash with the data read
|
646
690
|
|
647
|
-
The =Dreader::Util= class provides some functions to simplify the
|
648
|
-
|
649
|
-
|
650
|
-
ActiveRecord creators.
|
691
|
+
The =Dreader::Util= class provides some functions to simplify the hashes built
|
692
|
+
by =dreader=. This is useful to simplify the code you write and to genereate
|
693
|
+
hashes you can pass, for instance, to ActiveRecord creators.
|
651
694
|
|
652
695
|
** Simplify removes everything but the values
|
653
696
|
|
654
|
-
=Dreader::Util.simplify
|
655
|
-
|
697
|
+
=Dreader::Util.simplify(hash)= removes all information but the value and makes
|
698
|
+
the value accessible directly from the name of the column.
|
656
699
|
|
657
700
|
#+BEGIN_EXAMPLE ruby
|
658
701
|
i.table[0]
|
659
|
-
{
|
660
|
-
|
702
|
+
{
|
703
|
+
name: { value: "John", row_number: 1, col_number: 1, errors: nil },
|
704
|
+
age: { value: 30, row_number: 1, col_number: 2, errors: nil }
|
705
|
+
}
|
661
706
|
|
662
707
|
Dreader::Util.simplify i.table[0]
|
663
708
|
{ name: "John", age: 30 }
|
664
709
|
#+END_EXAMPLE
|
665
710
|
|
666
|
-
*As an additional bonus, it removes the keys =row_number= and =row_errors=,
|
667
|
-
which are not part of the data read, in the first place.*
|
668
|
-
|
669
711
|
** Slice and Clean select columns
|
670
712
|
|
671
|
-
=Dreader::Util.slice
|
672
|
-
|
673
|
-
|
674
|
-
|
713
|
+
=Dreader::Util.slice(hash, keys)= and =Dreader::Util.clean(hash, keys)=, where
|
714
|
+
=keys= is an arrays of keys, are respectively used to select or remove some
|
715
|
+
keys from the hash returned by Dreader. (Notice that the Ruby Hash class
|
716
|
+
already provides similar methods.)
|
675
717
|
|
676
718
|
#+BEGIN_EXAMPLE ruby
|
677
719
|
i.table[0]
|
678
|
-
{
|
679
|
-
|
720
|
+
{
|
721
|
+
name: { value: "John", row_number: 1, col_number: 1, errors: nil },
|
722
|
+
age: { value: 30, row_number: 1, col_number: 2, errors: nil }
|
723
|
+
}
|
680
724
|
|
681
725
|
Dreader::Util.slice i.table[0], :name
|
682
726
|
{ name: { value: "John", row_number: 1, col_number: 1, errors: nil}
|
@@ -685,8 +729,8 @@ Ruby Hash class already provides similar methods.)
|
|
685
729
|
{ age: { value: 30, row_number: 1, col_number: 2, errors: nil }
|
686
730
|
#+END_EXAMPLE
|
687
731
|
|
688
|
-
The methods =slice= and =clean= are more useful when used in
|
689
|
-
|
732
|
+
The methods =slice= and =clean= are more useful when used in conjuction with
|
733
|
+
=simplify=:
|
690
734
|
|
691
735
|
#+BEGIN_EXAMPLE ruby
|
692
736
|
hash = Dreader::Util.simplify i.table[0]
|
@@ -704,21 +748,23 @@ create an =ActiveRecord= object.
|
|
704
748
|
|
705
749
|
** Better Integration with ActiveRecord
|
706
750
|
|
707
|
-
Finally, the =Dreader::Util.restructure= method helps building hashes
|
708
|
-
|
751
|
+
Finally, the =Dreader::Util.restructure= method helps building hashes to create
|
752
|
+
[[http://api.rubyonrails.org/classes/ActiveModel/Model.html][ActiveModel]] objects with nested attributes.
|
753
|
+
|
754
|
+
**The starting point is a simplified row.**
|
709
755
|
|
710
756
|
#+BEGIN_EXAMPLE ruby
|
711
|
-
hash = {name: "John", surname: "Doe", address: "Unknown", city: "NY"
|
757
|
+
hash = { name: "John", surname: "Doe", address: "Unknown", city: "NY" }
|
712
758
|
|
713
|
-
Dreader::Util.restructure hash, [:name, :surname
|
714
|
-
{name: "John", surname: "Doe", address_attributes: {address: "
|
759
|
+
Dreader::Util.restructure hash, [:name, :surname, :address_attributes, [:address, :city]]
|
760
|
+
{ name: "John", surname: "Doe", address_attributes: { address: "Unknown", city: "NY" } }
|
715
761
|
#+END_EXAMPLE
|
716
762
|
|
717
763
|
|
718
764
|
* Debugging your specification
|
719
765
|
|
720
|
-
The =debug= function prints the current configuration, reads some
|
721
|
-
|
766
|
+
The =debug= function prints the current configuration, reads some records from
|
767
|
+
the input file(s), and shows the records read:
|
722
768
|
|
723
769
|
#+BEGIN_EXAMPLE ruby
|
724
770
|
i.debug
|
@@ -735,8 +781,8 @@ read:
|
|
735
781
|
i.debug process: false, check: false
|
736
782
|
#+END_EXAMPLE
|
737
783
|
|
738
|
-
Notice that =check= implies =process=, since =check= is invoked on the
|
739
|
-
|
784
|
+
Notice that =check= implies =process=, since =check= is invoked on the output
|
785
|
+
of the =process= directive.`
|
740
786
|
|
741
787
|
If you prefer, in alternative to =debug= you can also use configuration
|
742
788
|
variables (but then you need to change the configuration according to the
|
@@ -751,7 +797,7 @@ environment):
|
|
751
797
|
|
752
798
|
* Changelog
|
753
799
|
|
754
|
-
See [[file:CHANGELOG.
|
800
|
+
See [[file:CHANGELOG.org][CHANGELOG]].
|
755
801
|
|
756
802
|
* Known Limitations
|
757
803
|
|
@@ -759,9 +805,6 @@ At the moment:
|
|
759
805
|
|
760
806
|
- it is not possible to specify column references using header names
|
761
807
|
(like Roo does).
|
762
|
-
- it is not possible to pass options to the file readers. As a
|
763
|
-
consequence tab-separated files must have the =.tsv= extension or
|
764
|
-
they will not be parsed correctly
|
765
808
|
- some more testing wouldn't hurt.
|
766
809
|
|
767
810
|
* Known Bugs
|
data/examples/age/age.rb
CHANGED
@@ -1,47 +1,47 @@
|
|
1
|
-
require
|
1
|
+
require "dreader"
|
2
2
|
|
3
|
-
class Reader
|
3
|
+
class Reader
|
4
|
+
extend Dreader::Engine
|
4
5
|
|
5
|
-
options do
|
6
|
-
|
7
|
-
|
8
|
-
end
|
6
|
+
options do
|
7
|
+
first_row 2
|
8
|
+
debug true
|
9
|
+
end
|
9
10
|
|
10
|
-
column :name do
|
11
|
-
|
12
|
-
|
13
|
-
end
|
11
|
+
column :name do
|
12
|
+
doc "A is the name string"
|
13
|
+
colref 'A'
|
14
|
+
end
|
14
15
|
|
15
|
-
column :birthdate do
|
16
|
-
|
17
|
-
|
16
|
+
column :birthdate do
|
17
|
+
doc "Birthdate contains a full date (i.e., including the year)"
|
18
|
+
colref 'B'
|
18
19
|
|
19
|
-
|
20
|
-
|
20
|
+
process do |c|
|
21
|
+
Date.parse(c)
|
22
|
+
end
|
21
23
|
end
|
22
|
-
end
|
23
24
|
|
24
|
-
virtual_column :age do
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
virtual_column :age do
|
26
|
+
process do |row|
|
27
|
+
birthdate = row[:birthdate][:value]
|
28
|
+
birthday = Date.new(Date.today.year, birthdate.month, birthdate.day)
|
29
|
+
today = Date.today
|
29
30
|
|
30
|
-
|
31
|
+
[0, today.year - birthdate.year - (birthday < today ? 1 : 0)].max
|
32
|
+
end
|
31
33
|
end
|
32
|
-
end
|
33
34
|
|
34
|
-
mapping do |row|
|
35
|
-
|
36
|
-
|
37
|
-
end
|
35
|
+
mapping do |row|
|
36
|
+
r = Dreader::Util.simplify(row)
|
37
|
+
puts "#{r[:name]} is #{r[:age]} years old (born on #{r[:birthdate]})"
|
38
|
+
end
|
38
39
|
end
|
39
40
|
|
40
|
-
i = Reader
|
41
|
-
|
41
|
+
i = Reader
|
42
42
|
i.read filename: "Birthdays.ods"
|
43
43
|
i.virtual_columns
|
44
|
-
i.
|
44
|
+
i.mappings
|
45
45
|
|
46
46
|
#
|
47
47
|
# Here we can do further processing on the data
|
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'dreader'
|
2
2
|
|
3
|
-
class Reader
|
3
|
+
class Reader
|
4
|
+
extend Dreader::Engine
|
5
|
+
|
4
6
|
options { first_row 2; debug true }
|
5
7
|
|
6
8
|
#
|
@@ -54,9 +56,9 @@ class Reader < Dreader::Engine
|
|
54
56
|
end
|
55
57
|
end
|
56
58
|
|
57
|
-
i = Reader
|
59
|
+
i = Reader
|
58
60
|
|
59
61
|
i.read filename: "Birthdays.ods"
|
60
62
|
i.virtual_columns
|
61
|
-
i.
|
63
|
+
i.mappings
|
62
64
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#
|
2
|
+
# This demonstrates that variables are local
|
3
|
+
#
|
4
|
+
|
5
|
+
require "dreader"
|
6
|
+
|
7
|
+
class OneReader
|
8
|
+
extend Dreader::Engine
|
9
|
+
|
10
|
+
options do
|
11
|
+
first_row 2
|
12
|
+
debug true
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class AnotherReader
|
17
|
+
extend Dreader::Engine
|
18
|
+
|
19
|
+
options do
|
20
|
+
filename "filename"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
r1 = OneReader
|
25
|
+
r2 = AnotherReader
|
26
|
+
|
27
|
+
puts r1.declared_options
|
28
|
+
puts r2.declared_options
|
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
require 'dreader'
|
4
4
|
|
5
|
-
class Processor
|
5
|
+
class Processor
|
6
|
+
extend Dreader::Engine
|
7
|
+
|
6
8
|
options do
|
7
9
|
first_row 2
|
8
10
|
filename "cities_by_state.ods"
|
@@ -25,7 +27,7 @@ class Processor < Dreader::Engine
|
|
25
27
|
end
|
26
28
|
end
|
27
29
|
|
28
|
-
processor = Processor
|
30
|
+
processor = Processor
|
29
31
|
|
30
32
|
printf "Loading the spreadsheet..."
|
31
33
|
processor.load
|
@@ -43,8 +45,8 @@ else
|
|
43
45
|
end
|
44
46
|
puts "done!"
|
45
47
|
|
46
|
-
puts "
|
47
|
-
processor.
|
48
|
+
puts "Applying mapping rules to the spreadsheet..."
|
49
|
+
processor.mappings
|
48
50
|
puts "... done"
|
49
51
|
|
50
52
|
|
@@ -13,7 +13,9 @@ class City
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
class Importer
|
16
|
+
class Importer
|
17
|
+
extend Dreader::Engine
|
18
|
+
|
17
19
|
# read from us_cities.tsv, lines from 2 to 10 (included)
|
18
20
|
options do
|
19
21
|
filename "us_cities.tsv"
|
@@ -50,7 +52,7 @@ class Importer < Dreader::Engine
|
|
50
52
|
end
|
51
53
|
|
52
54
|
cities = []
|
53
|
-
importer = Importer
|
55
|
+
importer = Importer
|
54
56
|
|
55
57
|
importer.mapping do |row|
|
56
58
|
# remove all additional information stored in each cell
|
@@ -81,7 +83,7 @@ importer.debug process: false, check: false
|
|
81
83
|
# load and process
|
82
84
|
importer.load
|
83
85
|
cities = []
|
84
|
-
importer.
|
86
|
+
importer.mappings
|
85
87
|
|
86
88
|
# output everything to see whether it works
|
87
89
|
puts "First ten cities in the US (source Wikipedia)"
|
@@ -13,7 +13,9 @@ class City
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
class Importer
|
16
|
+
class Importer
|
17
|
+
extend Dreader::Engine
|
18
|
+
|
17
19
|
# read from us_cities.tsv, lines from 2 to 10 (included)
|
18
20
|
options do
|
19
21
|
filename "us_cities.tsv"
|
@@ -42,7 +44,7 @@ class Importer < Dreader::Engine
|
|
42
44
|
end
|
43
45
|
|
44
46
|
cities = []
|
45
|
-
importer = Importer
|
47
|
+
importer = Importer
|
46
48
|
|
47
49
|
importer.mapping do |row|
|
48
50
|
# remove all additional information stored in each cell
|
@@ -73,7 +75,7 @@ importer.debug process: false, check: false
|
|
73
75
|
# load and process
|
74
76
|
importer.load
|
75
77
|
cities = []
|
76
|
-
importer.
|
78
|
+
importer.mappings
|
77
79
|
|
78
80
|
# output everything to see whether it works
|
79
81
|
puts "First ten cities in the US (source Wikipedia)"
|
data/lib/dreader/engine.rb
CHANGED
@@ -10,47 +10,29 @@ module Dreader
|
|
10
10
|
#
|
11
11
|
# This is where the real stuff begins
|
12
12
|
#
|
13
|
-
|
14
|
-
# TODO: make the writer into private methods (need to be accessed only
|
15
|
-
# in the initializer) and demote to attr_reader
|
16
|
-
|
13
|
+
module Engine
|
17
14
|
# the options we passed
|
18
|
-
attr_accessor :
|
15
|
+
attr_accessor :declared_options
|
19
16
|
# the specification of the columns to process
|
20
|
-
attr_accessor :
|
17
|
+
attr_accessor :declared_columns
|
21
18
|
# some example lines
|
22
|
-
attr_accessor :
|
19
|
+
attr_accessor :declared_examples
|
23
20
|
# the specification of the virtual columns
|
24
|
-
attr_accessor :
|
21
|
+
attr_accessor :declared_virtual_columns
|
25
22
|
# the mapping rules
|
26
|
-
attr_accessor :
|
23
|
+
attr_accessor :declared_mapping
|
27
24
|
|
28
25
|
# the data we read
|
29
26
|
attr_reader :table
|
30
27
|
|
31
|
-
# variables declared in the class which need to be propagated in
|
32
|
-
# the instance
|
33
|
-
INSTANTIATE = %i[options colspec examples virtualcols]
|
34
|
-
|
35
|
-
def initialize
|
36
|
-
@logger = Logger.new($stdout)
|
37
|
-
@logger.level = Logger::WARN
|
38
|
-
|
39
|
-
# populate the instance with the variables defined in the class
|
40
|
-
@options = defined?(@@options) ? @@options : {}
|
41
|
-
@colspec = defined?(@@colspec) ? @@colspec : []
|
42
|
-
@examples = defined?(@@examples) ? @@examples : []
|
43
|
-
@virtualcols = defined?(@@virtualcols) ? @@virtualcols : []
|
44
|
-
end
|
45
|
-
|
46
28
|
# define a DSL for options
|
47
29
|
# any string is processed as an option and it ends up in the
|
48
30
|
# @options hash
|
49
|
-
def
|
31
|
+
def options(&block)
|
50
32
|
options = Options.new
|
51
33
|
options.instance_eval(&block)
|
52
34
|
|
53
|
-
|
35
|
+
@declared_options = options.to_hash
|
54
36
|
end
|
55
37
|
|
56
38
|
# define a DSL for column specification
|
@@ -58,18 +40,18 @@ module Dreader
|
|
58
40
|
# - `block` contains two declarations, `process` and `check`, which are
|
59
41
|
# used, respectively, to make a cell into the desired data and to check
|
60
42
|
# whether the desired data is ok
|
61
|
-
def
|
43
|
+
def column(name, &block)
|
62
44
|
column = Column.new
|
63
45
|
column.instance_eval(&block)
|
64
46
|
|
65
|
-
|
47
|
+
@declared_columns ||= []
|
66
48
|
|
67
49
|
if name.instance_of?(Hash)
|
68
|
-
|
50
|
+
@declared_columns << column.to_hash.merge(
|
69
51
|
{ name: name.keys.first, colref: name.values.first }
|
70
52
|
)
|
71
53
|
else
|
72
|
-
|
54
|
+
@declared_columns << column.to_hash.merge({ name: name })
|
73
55
|
end
|
74
56
|
end
|
75
57
|
|
@@ -106,20 +88,20 @@ module Dreader
|
|
106
88
|
# cell.strip
|
107
89
|
# end
|
108
90
|
# end
|
109
|
-
def
|
91
|
+
def columns(hash, &block)
|
110
92
|
hash.each_key do |key|
|
111
93
|
column = Column.new
|
112
94
|
column.colref hash[key]
|
113
95
|
column.instance_eval(&block) if block
|
114
96
|
|
115
|
-
|
116
|
-
|
97
|
+
@declared_columns ||= []
|
98
|
+
@declared_columns << column.to_hash.merge({ name: key })
|
117
99
|
end
|
118
100
|
end
|
119
101
|
|
120
|
-
def
|
121
|
-
|
122
|
-
|
102
|
+
def example(hash)
|
103
|
+
@declared_examples ||= []
|
104
|
+
@declared_examples << hash
|
123
105
|
end
|
124
106
|
|
125
107
|
# virtual columns define derived attributes
|
@@ -128,12 +110,12 @@ module Dreader
|
|
128
110
|
#
|
129
111
|
# virtual colum declarations are executed in the order in which
|
130
112
|
# they are defined
|
131
|
-
def
|
113
|
+
def virtual_column(name, &block)
|
132
114
|
column = Column.new
|
133
115
|
column.instance_eval &block
|
134
116
|
|
135
|
-
|
136
|
-
|
117
|
+
@declared_virtual_columns ||= []
|
118
|
+
@declared_virtual_columns << column.to_hash.merge({ name: name })
|
137
119
|
end
|
138
120
|
|
139
121
|
# define what we do with each line we read
|
@@ -141,8 +123,8 @@ module Dreader
|
|
141
123
|
# `row` is a hash in which each spreadsheet cell is accessible under
|
142
124
|
# the column names. Each cell has the following values:
|
143
125
|
# :value, :error, :row_number, :col_number
|
144
|
-
def
|
145
|
-
|
126
|
+
def mapping(&block)
|
127
|
+
@declared_mapping = block
|
146
128
|
end
|
147
129
|
|
148
130
|
# read a file and store it internally
|
@@ -155,18 +137,20 @@ module Dreader
|
|
155
137
|
# @return the data read from filename, in the form of an array of
|
156
138
|
# hashes
|
157
139
|
def read(args = {})
|
140
|
+
# args override values in options (if defined)
|
141
|
+
# the initializer guarantees @options is at least {}
|
142
|
+
options = (@declared_options || {}).merge(args)
|
143
|
+
|
144
|
+
@logger = options[:logger] || Logger.new($stdout)
|
145
|
+
@logger.level = options[:logger_level] || Logger::WARN
|
146
|
+
@debug = options[:debug] == true
|
147
|
+
|
158
148
|
if !args.instance_of?(Hash)
|
159
149
|
@logger.error "#{__callee__}: this function takes a Hash as input"
|
160
150
|
raise Exception
|
161
151
|
end
|
162
152
|
|
163
|
-
|
164
|
-
|
165
|
-
@logger = options[:logger] if options[:logger]
|
166
|
-
@logger.level = options[:logger_level] if options[:logger_level]
|
167
|
-
@debug = options[:debug] == true
|
168
|
-
|
169
|
-
spreadsheet = Dreader::Engine.open_spreadsheet (options[:filename])
|
153
|
+
spreadsheet = open_spreadsheet(options)
|
170
154
|
sheet = spreadsheet.sheet(options[:sheet] || 0)
|
171
155
|
first_row = options[:first_row] || 1
|
172
156
|
last_row = options[:last_row] || sheet.last_row
|
@@ -176,64 +160,74 @@ module Dreader
|
|
176
160
|
@logger.level = Logger::DEBUG
|
177
161
|
|
178
162
|
# override the number of lines read
|
179
|
-
options[:n]
|
180
|
-
last_row =
|
181
|
-
|
163
|
+
n_lines = options[:n] ? options[:n].to_i : 10
|
164
|
+
last_row = first_row + n_lines - 1
|
165
|
+
|
182
166
|
# apply some defaults for debugging, if not defined in the options
|
183
167
|
[:check_raw, :process, :check].map do |key|
|
184
168
|
options[key] = true unless options.key?(key)
|
185
169
|
end
|
186
170
|
end
|
187
171
|
|
188
|
-
{ current: @
|
189
|
-
@logger.debug "#{k.capitalize} configuration:"
|
172
|
+
{ current: @declared_options, debug: options }.each do |k, v|
|
173
|
+
@logger.debug "[dreader] #{k.capitalize} configuration:"
|
190
174
|
v.each do |key, value|
|
191
175
|
@logger.debug " #{key}: #{value}"
|
192
176
|
end
|
193
177
|
end
|
194
178
|
|
195
|
-
@table = []
|
196
179
|
@errors = []
|
197
180
|
|
198
|
-
(first_row..last_row).
|
181
|
+
@table = (first_row..last_row).map do |row_number|
|
199
182
|
r = { row_number: row_number, row_errors: [] }
|
200
183
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
184
|
+
# this has side-effects on r
|
185
|
+
columns_on(r, row_number, sheet)
|
186
|
+
|
187
|
+
# this has side-effects on r
|
188
|
+
virtual_columns_on(r) if options[:virtual] || options[:mapping]
|
189
|
+
|
190
|
+
options[:mapping] ? mappings_on(r) : r
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
# TODO: PASS A ROW (and not row_number and sheet)
|
195
|
+
def columns_on(r, row_number, sheet)
|
196
|
+
@declared_columns.each_with_index do |colspec, index|
|
197
|
+
colname = colspec[:name]
|
198
|
+
colref = colspec[:colref]
|
199
|
+
cell = sheet.cell(row_number, colref)
|
200
|
+
|
201
|
+
r[colname] = {
|
202
|
+
row: row_number,
|
203
|
+
col: colspec[:colref],
|
204
|
+
value: cell,
|
205
|
+
error: false
|
206
|
+
}
|
207
|
+
|
208
|
+
# Repeated below
|
209
|
+
# @logger.debug "[dreader] Processing #{coord(row_number, colref)}"
|
227
210
|
|
228
|
-
|
229
|
-
|
230
|
-
|
211
|
+
# check raw data
|
212
|
+
check_data(colspec[:checks_raw], r, colname)
|
213
|
+
|
214
|
+
# process data
|
215
|
+
coord = coord(row_number, colspec[:colref], cell)
|
216
|
+
begin
|
217
|
+
processed = colspec[:process] ? colspec[:process].call(cell) : cell
|
218
|
+
@logger.debug "[dreader] #{colname} process #{coord} yields '#{processed}' (#{processed.class})"
|
219
|
+
r[colname][:value] = processed
|
220
|
+
rescue => e
|
221
|
+
@logger.error "[dreader] #{colname} process #{coord} raises an exception"
|
222
|
+
raise e
|
231
223
|
end
|
232
224
|
|
233
|
-
|
225
|
+
# check data after process - notice that now r contains the value
|
226
|
+
# processed by process
|
227
|
+
check_data(colspec[:checks], r, colname)
|
234
228
|
end
|
235
229
|
|
236
|
-
|
230
|
+
r
|
237
231
|
end
|
238
232
|
|
239
233
|
alias load read
|
@@ -254,51 +248,65 @@ module Dreader
|
|
254
248
|
# You need to invoke read first
|
255
249
|
def get_row(row_number)
|
256
250
|
if row_number > @table.size
|
257
|
-
@logger.error "
|
251
|
+
@logger.error "[dreader] 'row_number' is out of range (did you invoke read?)"
|
258
252
|
exit
|
259
253
|
elsif row_number <= 0
|
260
|
-
@logger.error "
|
254
|
+
@logger.error "[dreader] 'row_number' is zero or negative (first row is 1)."
|
261
255
|
else
|
262
256
|
@table[row_number - 1]
|
263
257
|
end
|
264
258
|
end
|
265
259
|
|
266
|
-
# return an array of hashes with all the errors we have
|
260
|
+
# return an array of hashes with all the errors we have encountered
|
267
261
|
# an empty array is a good news
|
268
262
|
attr_reader :errors
|
269
263
|
|
270
|
-
def virtual_columns
|
264
|
+
def virtual_columns
|
271
265
|
# execute the virtual column specification
|
272
|
-
@table.each
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
r[colname][:value] = virtualcol[:process].call(r)
|
283
|
-
end
|
284
|
-
rescue => e
|
285
|
-
row = r[:row_number]
|
286
|
-
@logger.error "#{__callee__}: process for virtual column :#{colname} raised an exception at row #{row}"
|
287
|
-
raise e
|
288
|
-
end
|
266
|
+
@table.each { |row| virtual_columns_on(row) }
|
267
|
+
end
|
268
|
+
|
269
|
+
# Compute virtual columns for, with side effect on row
|
270
|
+
def virtual_columns_on(row)
|
271
|
+
@declared_virtual_columns.each do |virtualcol|
|
272
|
+
colname = virtualcol[:name]
|
273
|
+
row[colname] = { virtual: true }
|
274
|
+
|
275
|
+
check_data(virtualcol[:checks_raw], row, colname, full_row: true)
|
289
276
|
|
290
|
-
|
291
|
-
|
277
|
+
begin
|
278
|
+
# add the cell to the table
|
279
|
+
if virtualcol[:process]
|
280
|
+
row[colname][:value] = virtualcol[:process].call(row)
|
281
|
+
end
|
282
|
+
rescue => e
|
283
|
+
r = row[:row_number]
|
284
|
+
@logger.error "[dreader] #{colname} process raises an exception at row #{r}"
|
285
|
+
raise e
|
292
286
|
end
|
287
|
+
|
288
|
+
# check data after process -- we also have the processed value of
|
289
|
+
# the virtual column
|
290
|
+
check_data(virtualcol[:checks], row, colname, full_row: true)
|
293
291
|
end
|
294
292
|
end
|
295
293
|
|
296
|
-
# apply the mapping code to the array
|
297
|
-
#
|
294
|
+
# apply the mapping code to the array it makes sense to invoke it only
|
295
|
+
# once.
|
298
296
|
#
|
299
|
-
# the mapping is applied only if it defined
|
300
|
-
|
301
|
-
|
297
|
+
# the mapping is applied only if it defined and it uses map, so that
|
298
|
+
# it can be used functionally
|
299
|
+
def mappings
|
300
|
+
@table.map { |row| mappings_on(row) }
|
301
|
+
end
|
302
|
+
|
303
|
+
def mappings_on(row)
|
304
|
+
@declared_mapping&.call(row)
|
305
|
+
end
|
306
|
+
|
307
|
+
# an alias
|
308
|
+
def data
|
309
|
+
@table
|
302
310
|
end
|
303
311
|
|
304
312
|
def to_s
|
@@ -314,14 +322,14 @@ module Dreader
|
|
314
322
|
end
|
315
323
|
|
316
324
|
def compare_headers(hash = {})
|
317
|
-
options = @
|
325
|
+
options = @declared_options.merge(hash)
|
318
326
|
|
319
|
-
spreadsheet =
|
327
|
+
spreadsheet = open_spreadsheet(options)
|
320
328
|
sheet = spreadsheet.sheet(options[:sheet] || 0)
|
321
329
|
header_row_number = options[:first_row] - 1 || 1
|
322
330
|
|
323
331
|
output_hash = {}
|
324
|
-
@
|
332
|
+
@declared_columns.map do |colspec|
|
325
333
|
cell = sheet.cell(row_number, colspec[:colref])
|
326
334
|
human_readable = colspec[:name].to_s.split("_").map(&:capitalize).join(" ")
|
327
335
|
|
@@ -341,7 +349,7 @@ module Dreader
|
|
341
349
|
# second row includes the documentation string, to document values in
|
342
350
|
# the columns
|
343
351
|
def template(hash = {})
|
344
|
-
options = @
|
352
|
+
options = @declared_options.merge(hash)
|
345
353
|
filename = options[:template_filename]
|
346
354
|
|
347
355
|
workbook = FastExcel.open(filename, constant_memory: true)
|
@@ -357,22 +365,22 @@ module Dreader
|
|
357
365
|
#
|
358
366
|
|
359
367
|
# here we write the first row
|
360
|
-
@
|
368
|
+
@declared_columns.each do |colspec|
|
361
369
|
human_readable = colspec[:name].to_s.split("_").map(&:capitalize).join(" ")
|
362
370
|
colref = colref_to_i(colspec[:colref])
|
363
371
|
worksheet.write_string(first_row, colref, human_readable, bold)
|
364
372
|
end
|
365
373
|
|
366
374
|
# here we create a note with the legenda
|
367
|
-
@
|
375
|
+
@declared_columns.each do |colspec|
|
368
376
|
colref = colref_to_i(colspec[:colref])
|
369
377
|
worksheet.write_string(first_row + 1, colref, colspec[:doc], nil)
|
370
378
|
end
|
371
379
|
|
372
380
|
# here we write some example records
|
373
|
-
@
|
381
|
+
@declared_examples.each_with_index do |example_hash, index|
|
374
382
|
example_hash.each do |colname, value|
|
375
|
-
colspec = @
|
383
|
+
colspec = @declared_columns.select { |x| x[:name] == colname }.first
|
376
384
|
if colspec
|
377
385
|
colref = colref_to_i(colspec[:colref])
|
378
386
|
worksheet.write_string(index + 3, colref, value, nil)
|
@@ -387,16 +395,31 @@ module Dreader
|
|
387
395
|
|
388
396
|
private
|
389
397
|
|
390
|
-
|
391
|
-
|
398
|
+
# list of keys we support in options. We remove them when reading
|
399
|
+
# the CSV file
|
400
|
+
OPTION_KEYS = %i[
|
401
|
+
filename sheet first_row last_row logger logger_level
|
402
|
+
]
|
403
|
+
|
404
|
+
def open_spreadsheet(options)
|
405
|
+
filename = options[:filename]
|
406
|
+
ext = options[:extension] || File.extname(filename)
|
392
407
|
|
393
408
|
case ext
|
394
|
-
when ".csv"
|
395
|
-
|
396
|
-
|
397
|
-
when ".
|
398
|
-
|
399
|
-
|
409
|
+
when ".csv"
|
410
|
+
csv_options = @declared_options.except(*OPTION_KEYS)
|
411
|
+
Roo::CSV.new(filename, csv_options:)
|
412
|
+
when ".tsv"
|
413
|
+
csv_options = @declared_options.except(*OPTION_KEYS).merge({ col_sep: "\t" })
|
414
|
+
Roo::CSV.new(filename, csv_options:)
|
415
|
+
when ".ods"
|
416
|
+
Roo::OpenOffice.new(filename)
|
417
|
+
when ".xls"
|
418
|
+
Roo::Excel.new(filename)
|
419
|
+
when ".xlsx"
|
420
|
+
Roo::Excelx.new(filename)
|
421
|
+
else
|
422
|
+
raise "Unknown extension: #{ext}"
|
400
423
|
end
|
401
424
|
end
|
402
425
|
|
@@ -445,7 +468,7 @@ module Dreader
|
|
445
468
|
|
446
469
|
begin
|
447
470
|
pass = check_function.call(value)
|
448
|
-
@logger.debug "check
|
471
|
+
@logger.debug "[dreader] check #{colname}/#{error_message} at #{coord} yields: '#{pass}'"
|
449
472
|
|
450
473
|
if pass != true
|
451
474
|
hash[colname][:error] = true
|
@@ -460,14 +483,14 @@ module Dreader
|
|
460
483
|
hash[:row_errors] << error
|
461
484
|
end
|
462
485
|
rescue => e
|
463
|
-
@logger.error "
|
486
|
+
@logger.error "[dreader] check #{colname}/#{error_message} raises an exception at #{coord}"
|
464
487
|
raise e
|
465
488
|
end
|
466
489
|
end
|
467
490
|
end
|
468
491
|
|
469
|
-
def coord(row, col,
|
470
|
-
"
|
492
|
+
def coord(row, col, value = nil)
|
493
|
+
["#{row}#{col}", (value ? "(#{value})" : nil)].join(" ")
|
471
494
|
end
|
472
495
|
end
|
473
496
|
end
|
data/lib/dreader/util.rb
CHANGED
@@ -19,20 +19,35 @@ module Dreader
|
|
19
19
|
end.to_h
|
20
20
|
end
|
21
21
|
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
22
|
+
# Given a "simplified" hash restructure it according to the second
|
23
|
+
# argument.
|
24
|
+
#
|
25
|
+
# Use it for generating hashes with nested attributes, which
|
26
|
+
# follows Rails conventions.
|
27
|
+
#
|
28
|
+
# @params:
|
29
|
+
# - hash the hash to restructure
|
30
|
+
# - args splat arguments which specify how to (re)structure the
|
31
|
+
# values in Hash. Each element is either a symbol or a Hash
|
25
32
|
#
|
26
33
|
# Example
|
27
34
|
#
|
28
|
-
# hash = { name: "A", surname: "B", address: "via
|
29
|
-
#
|
30
|
-
#
|
35
|
+
# hash = { name: "A", surname: "B", address: "via Piave", city: "Genoa" }
|
36
|
+
#
|
37
|
+
# restructure(hash, :name, :surname)
|
38
|
+
# { name: "A", surname: "B" }
|
31
39
|
#
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
40
|
+
# restructure(hash, :name, address_attributes: [:address, :city])
|
41
|
+
# {name: "A", address_attributes: { address: "via Piave", city: "Genoa" }
|
42
|
+
#
|
43
|
+
def self.restructure(hash, *new_structure)
|
44
|
+
new_structure.to_h do |value|
|
45
|
+
if value.instance_of?(Hash)
|
46
|
+
[value.keys.first, self.restructure(hash, *value.values.first)]
|
47
|
+
else
|
48
|
+
[value, hash[value]]
|
49
|
+
end
|
50
|
+
end
|
36
51
|
end
|
37
52
|
|
38
53
|
# an alias for Hash.slice
|
data/lib/dreader/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adolfo Villafiorita
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: roo
|
@@ -97,7 +97,7 @@ extensions: []
|
|
97
97
|
extra_rdoc_files: []
|
98
98
|
files:
|
99
99
|
- ".gitignore"
|
100
|
-
- CHANGELOG.
|
100
|
+
- CHANGELOG.org
|
101
101
|
- Gemfile
|
102
102
|
- Gemfile.lock
|
103
103
|
- LICENSE.txt
|
@@ -110,6 +110,7 @@ files:
|
|
110
110
|
- examples/age/age.rb
|
111
111
|
- examples/age_with_multiple_checks/Birthdays.ods
|
112
112
|
- examples/age_with_multiple_checks/age_with_multiple_checks.rb
|
113
|
+
- examples/local_vars/local_vars.rb
|
113
114
|
- examples/template/template_generation.rb
|
114
115
|
- examples/wikipedia_big_us_cities/big_us_cities.rb
|
115
116
|
- examples/wikipedia_big_us_cities/cities_by_state.ods
|
@@ -141,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
141
142
|
- !ruby/object:Gem::Version
|
142
143
|
version: '0'
|
143
144
|
requirements: []
|
144
|
-
rubygems_version: 3.
|
145
|
+
rubygems_version: 3.4.10
|
145
146
|
signing_key:
|
146
147
|
specification_version: 4
|
147
148
|
summary: Process and import data from cvs and spreadsheets
|