sycsvpro 0.1.4 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/README.md +113 -21
- data/bin/sycsvpro +98 -25
- data/lib/sycsvpro/calculator.rb +50 -10
- data/lib/sycsvpro/dsl.rb +12 -0
- data/lib/sycsvpro/header.rb +24 -8
- data/lib/sycsvpro/join.rb +159 -0
- data/lib/sycsvpro/table.rb +83 -5
- data/lib/sycsvpro/version.rb +1 -1
- data/lib/sycsvpro.rb +1 -0
- data/spec/sycsvpro/calculator_spec.rb +31 -1
- data/spec/sycsvpro/header_spec.rb +7 -1
- data/spec/sycsvpro/join_spec.rb +178 -0
- data/spec/sycsvpro/table_spec.rb +153 -2
- data/sycsvpro.rdoc +9 -4
- metadata +4 -2
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -16,21 +16,32 @@ Processing of csv files. *sycsvpro* offers following functions
|
|
16
16
|
* create or edit a Ruby script
|
17
17
|
* list scripts available optionally with methods (since version 0.0.7)
|
18
18
|
* execute a Ruby script file that operates a csv file
|
19
|
-
* create a table from a source file with dynamically create columns (since
|
19
|
+
* create a table from a source file with dynamically create columns (since
|
20
|
+
version 0.1.4)
|
21
|
+
* join two file based on a joint column value (since version 0.1.7)
|
20
22
|
|
21
23
|
To get help type
|
22
24
|
|
23
25
|
$ sycsvpro -h
|
24
26
|
|
25
|
-
In the following examples we assume the following
|
27
|
+
In the following examples we assume the following files 'machines.csv' and
|
28
|
+
'region.csv'
|
26
29
|
|
27
30
|
```
|
28
|
-
customer;machine;control;drive;motor;date;contract
|
29
|
-
hello;h1;con123;dri120;mot100;1.01.3013;1
|
30
|
-
hello;h2;con123;dri130;mot110;1.02.3012;1
|
31
|
-
indix;i1;con456;dri130;mot090;5.11.3013;1
|
32
|
-
chiro;c1;con333;dri110;mot100;1.10.3011;1
|
33
|
-
chiro;c2;con331;dri100;mot130;3.05.3010;1
|
31
|
+
customer;machine;control;drive;motor;date;contract;price;c-id
|
32
|
+
hello;h1;con123;dri120;mot100;1.01.3013;1;2.5;123
|
33
|
+
hello;h2;con123;dri130;mot110;1.02.3012;1;12.1;123
|
34
|
+
indix;i1;con456;dri130;mot090;5.11.3013;1;23.24;345
|
35
|
+
chiro;c1;con333;dri110;mot100;1.10.3011;1;122.15;456
|
36
|
+
chiro;c2;con331;dri100;mot130;3.05.3010;1;25.3;456
|
37
|
+
```
|
38
|
+
|
39
|
+
```
|
40
|
+
region;country;c-id
|
41
|
+
R1;DE,123
|
42
|
+
R2;AT;234
|
43
|
+
R3;US;345
|
44
|
+
R4;CA;456
|
34
45
|
```
|
35
46
|
|
36
47
|
Analyze
|
@@ -114,7 +125,8 @@ Count all customers (key column) in rows 2 to 20 that have machines that start
|
|
114
125
|
with *h* and have a contract valid beginning after 1.1.2000. Add a sum row with
|
115
126
|
title Total at column 1
|
116
127
|
|
117
|
-
$ sycsvpro -f in.csv -o out.csv count -r 2-20 -k 0:customer
|
128
|
+
$ sycsvpro -f in.csv -o out.csv count -r 2-20 -k 0:customer
|
129
|
+
-c 1:/^h/,5:">1.1.2000" --df "%d.%m.%Y" -s "Total:1"
|
118
130
|
|
119
131
|
The result in file out.csv is
|
120
132
|
|
@@ -143,12 +155,30 @@ The aggregation result in out.csv is
|
|
143
155
|
indix;1
|
144
156
|
chiro;2
|
145
157
|
|
158
|
+
Table
|
159
|
+
-----
|
160
|
+
Analyze the contract revenue per customer and per year
|
161
|
+
|
162
|
+
$ sycsvpro -f in.csv -o out.csv table
|
163
|
+
-h "Customer,c5=~/\\.(\\d{4})/"
|
164
|
+
-k c1
|
165
|
+
-c "c5=~/\\.\\d{4})/:+n1"
|
166
|
+
|
167
|
+
The table result will be in out.csv
|
168
|
+
|
169
|
+
$ cat out.csv
|
170
|
+
Customer;3013;3012;3011;3010
|
171
|
+
hello;2.5;12.1;0;0
|
172
|
+
indix;23.24;0;0;0
|
173
|
+
chiro;0;0;122.15;25.3
|
174
|
+
|
146
175
|
Calc
|
147
176
|
----
|
148
177
|
Process arithmetic operations on the contract count and create a target column
|
149
178
|
and a sum which is added at the end of the result file
|
150
179
|
|
151
|
-
$ sycsvpro -f in.csv -o out.csv calc -r 2-20 -h *,target
|
180
|
+
$ sycsvpro -f in.csv -o out.csv calc -r 2-20 -h *,target
|
181
|
+
-c 6:*2,7:target=c6*10
|
152
182
|
|
153
183
|
$ cat out.csv
|
154
184
|
customer;machine;control;drive;motor;date;contract;target
|
@@ -162,6 +192,26 @@ and a sum which is added at the end of the result file
|
|
162
192
|
In the sum row non-numbers in the colums are converted to 0. Therefore column 0
|
163
193
|
is summed up to 0 as all strings are converted to 0.
|
164
194
|
|
195
|
+
Join
|
196
|
+
----
|
197
|
+
Join the machine and contract file with columns from the customer address file
|
198
|
+
|
199
|
+
$ sycsvpro -f in.csv -o out.csv join address.csv -c 0,1
|
200
|
+
-p 2,1
|
201
|
+
-i "COUNTRY,REGION"
|
202
|
+
-j "3=8"
|
203
|
+
|
204
|
+
This will create the result
|
205
|
+
|
206
|
+
```
|
207
|
+
customer;COUNTRY;REGION;machine;control;drive;motor;date;contract;price;c-id
|
208
|
+
hello;DE;R1;h1;con123;dri120;mot100;1.01.3013;1;2.5;123
|
209
|
+
hello;DE;R1;h2;con123;dri130;mot110;1.02.3012;1;12.1;123
|
210
|
+
indix;US;R3i1;con456;dri130;mot090;5.11.3013;1;23.24;345
|
211
|
+
chiro;CA;R4;c1;con333;dri110;mot100;1.10.3011;1;122.15;456
|
212
|
+
chiro;CA;R4;c2;con331;dri100;mot130;3.05.3010;1;25.3;456
|
213
|
+
```
|
214
|
+
|
165
215
|
Sort
|
166
216
|
----
|
167
217
|
Sort rows on specified columns as an example sort rows based on customer
|
@@ -198,7 +248,7 @@ the name script.rb and a method call_me
|
|
198
248
|
List
|
199
249
|
----
|
200
250
|
List the scripts, insert-file or all scripts available in the scripts directory
|
201
|
-
which is also displayed
|
251
|
+
which is also displayed. Comments before methods are also displayed
|
202
252
|
|
203
253
|
script directory: ~/.syc/sycsvpro/scripts
|
204
254
|
$ sycsvpro list -m
|
@@ -257,6 +307,9 @@ end with _column_ or _columns_ dependent if a value or an array should be
|
|
257
307
|
returned. You can find the *rows* and *write_to* methods at
|
258
308
|
_lib/sycsvpro/dsl.rb_.
|
259
309
|
|
310
|
+
Examples for scripts using sycsvpro can be found at
|
311
|
+
[sugaryourcoffee/sycsvpro-scripts](https://github.com/sugaryourcoffee/sycsvpro-scripts)
|
312
|
+
|
260
313
|
Working with sycsvpro
|
261
314
|
=====================
|
262
315
|
|
@@ -316,19 +369,58 @@ Version 0.1.4
|
|
316
369
|
* Associate values to multi keys
|
317
370
|
* Create values based on arithmetic operations of source table data
|
318
371
|
Example
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
h
|
323
|
-
Another header column is created dynamicall based on the year part of
|
324
|
-
date in column 0
|
325
|
-
k
|
326
|
-
c
|
327
|
-
HeaderName is dynamically determined based on column 0 and added
|
328
|
-
of column 1 to this column that is associated to the key
|
372
|
+
`sycsvpro -f in.csv -o out.csv table -h "c4,c5,c0=~/\\.(\\d{4})/"
|
373
|
+
-k "c4,c5"
|
374
|
+
-c "c0=~/\\.(\\d{4})/:+n1"`
|
375
|
+
+ h the header is created from the source table header of column 4 and 5.
|
376
|
+
Another header column is created dynamicall based on the year part of
|
377
|
+
a date in column 0
|
378
|
+
+ k the key is based on source table of column 4 and 5
|
379
|
+
+ c the column operation is in the form HeaderName:Operation. In this case
|
380
|
+
the HeaderName is dynamically determined based on column 0 and added
|
381
|
+
the value of column 1 to this column that is associated to the key
|
329
382
|
|
330
383
|
c4, n4, d4 are string, number and date values respectively
|
331
384
|
|
385
|
+
Version 0.1.5
|
386
|
+
-------------
|
387
|
+
* Add a sum row after the heading or at the end of file like so
|
388
|
+
`sycsvpro -f in.csv -o out.csv table -h "c4,c5,c0=~/\\.(\\d{4})/"
|
389
|
+
-k "c4,c5"
|
390
|
+
-c "c0=~/\\.(\\d{4})/:+n1"
|
391
|
+
-s "c0=~/\\.(\\d{4})/"`
|
392
|
+
This will sum up the dynamically created column.
|
393
|
+
|
394
|
+
Version 0.1.6
|
395
|
+
-------------
|
396
|
+
* Commas within columns expression are now ignored while splitting columns of
|
397
|
+
table columns
|
398
|
+
* Table takes a number format now with `--nf DE` which will convert numbers
|
399
|
+
from DE locale like 1.000,00 to 1000.00
|
400
|
+
* Table uses a precision for numbers. Default is 2. Can be assigned with `pr: 2`
|
401
|
+
|
402
|
+
Version 0.1.7
|
403
|
+
-------------
|
404
|
+
* Calc can now be used not to only do arithmetic operations on columns but also
|
405
|
+
string operations. Ultimately any valid Ruby command can be used to process a
|
406
|
+
column value
|
407
|
+
`sycsvpro -f customer.csv -o customer-number.csv calc
|
408
|
+
-h "Customer_ID,Customer,Country"
|
409
|
+
-r "1-eof"
|
410
|
+
-c "2:s0.scan(/^([A-Z]+)\\//).flatten[0],
|
411
|
+
0:s0.scan(/(?<=\\/)(.*)$/).flatten[0],1:s1"
|
412
|
+
* Join is a new class that joins to tables based on a joint column value
|
413
|
+
`sycsvpro -f infile.csv -o outfile.csv join source.csv -c "2,4"
|
414
|
+
-j "1=3"
|
415
|
+
-p "1,3"
|
416
|
+
-h "*"
|
417
|
+
-i "A,B"`
|
418
|
+
This will join infile.csv with source.csv based on the join columns (j "1=3").
|
419
|
+
From source.csv columns 2 and 4 (-c "2,4") will be inserted at column
|
420
|
+
positions 1 and 3 (-p "1,3"). The header will be used from the infile.csv
|
421
|
+
(-h "*") supplemented by the columns A and B (-i "A,B") that will also be
|
422
|
+
positioned at column 1 and 3 (-p "1,3").
|
423
|
+
|
332
424
|
Installation
|
333
425
|
============
|
334
426
|
[](http://badge.fury.io/rb/sycsvpro)
|
data/bin/sycsvpro
CHANGED
@@ -89,12 +89,13 @@ command :extract do |c|
|
|
89
89
|
end
|
90
90
|
end
|
91
91
|
|
92
|
-
desc 'Collect values of specified rows and columns from the file and group
|
92
|
+
desc 'Collect values of specified rows and columns from the file and group '+
|
93
|
+
'them in categories'
|
93
94
|
command :collect do |c|
|
94
95
|
|
95
96
|
c.desc 'Rows to consider for collection'
|
96
97
|
c.arg_name 'ROW1,ROW2,ROW10-ROW30,45-EOF,REGEXP'
|
97
|
-
c.flag [:r, :row], :must_match => row_regex
|
98
|
+
c.flag [:r, :row], :must_match => row_regex
|
98
99
|
|
99
100
|
c.desc 'Columns to collect values from'
|
100
101
|
c.arg_name 'CATEGORY1:COL1,COL2,COL10-COL30+CATEGORY2:COL3-COL9'
|
@@ -120,7 +121,7 @@ desc 'Allocate specified columns from the file to a key value'
|
|
120
121
|
command :allocate do |c|
|
121
122
|
c.desc 'Rows to consider'
|
122
123
|
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
123
|
-
c.flag [:r, :row], :must_match => row_regex
|
124
|
+
c.flag [:r, :row], :must_match => row_regex
|
124
125
|
|
125
126
|
c.desc 'Key to allocate columns to'
|
126
127
|
c.arg_name '0'
|
@@ -147,7 +148,8 @@ command :allocate do |c|
|
|
147
148
|
end
|
148
149
|
end
|
149
150
|
|
150
|
-
desc 'Creates a script/insert file or opens a script/insert file for editing
|
151
|
+
desc 'Creates a script/insert file or opens a script/insert file for editing '+
|
152
|
+
'if it exists'
|
151
153
|
command :edit do |c|
|
152
154
|
c.desc 'Name of the script/insert file'
|
153
155
|
c.arg_name 'SCRIPT_NAME.rb|INSERT_NAME.ins'
|
@@ -159,12 +161,14 @@ command :edit do |c|
|
|
159
161
|
|
160
162
|
c.action do |global_options,options,args|
|
161
163
|
script_creator = Sycsvpro::ScriptCreator.new(dir: sycsvpro_directory,
|
162
|
-
script: options[:s],
|
164
|
+
script: options[:s],
|
165
|
+
method: options[:m])
|
163
166
|
system "vi #{script_creator.script_file}"
|
164
167
|
end
|
165
168
|
end
|
166
169
|
|
167
|
-
desc 'Lists script or insert files in the scripts directory with optionally
|
170
|
+
desc 'Lists script or insert files in the scripts directory with optionally '+
|
171
|
+
'listing methods of script files'
|
168
172
|
command :list do |c|
|
169
173
|
c.desc 'Type of script (Ruby, insert or all files)'
|
170
174
|
c.default_value 'script'
|
@@ -235,7 +239,7 @@ command :count do |c|
|
|
235
239
|
|
236
240
|
c.desc 'Rows to consider'
|
237
241
|
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
238
|
-
c.flag [:r, :row], :must_match => row_regex
|
242
|
+
c.flag [:r, :row], :must_match => row_regex
|
239
243
|
|
240
244
|
c.desc 'Columns to count where columns 2 and 3 are counted conditionally'
|
241
245
|
c.arg_name '1,2:<14.2.2014,10-30,3:>10'
|
@@ -274,7 +278,7 @@ command :aggregate do |c|
|
|
274
278
|
|
275
279
|
c.desc 'Rows to consider'
|
276
280
|
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
277
|
-
c.flag [:r, :row], :must_match => row_regex
|
281
|
+
c.flag [:r, :row], :must_match => row_regex
|
278
282
|
|
279
283
|
c.desc 'Columns to count'
|
280
284
|
c.arg_name '1,2-4'
|
@@ -311,18 +315,27 @@ command :table do |c|
|
|
311
315
|
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
312
316
|
c.flag [:r, :row], :must_match => row_regex
|
313
317
|
|
314
|
-
c.desc 'Header can be defined by Words (Year), references to source header
|
318
|
+
c.desc 'Header can be defined by Words (Year), references to source header '+
|
319
|
+
'(c1) and dynamically created header values (c1+c2,c0=~/\\.(\\d{4})/)'
|
315
320
|
c.arg_name "COL_A,c6,c2+c4,c0=~/\\.(\\d{4})/"
|
316
321
|
c.flag [:h, :header]
|
317
322
|
|
318
|
-
c.desc 'Key to that the other columns are associated to. A key can be
|
323
|
+
c.desc 'Key to that the other columns are associated to. A key can be '+
|
324
|
+
'created dynamically'
|
319
325
|
c.arg_name "c0=~/\\.(\\d{4})/,c6"
|
320
326
|
c.flag [:k, :key]
|
321
327
|
|
322
|
-
c.desc 'Columns to be associated to the key. Columns are identified by the
|
328
|
+
c.desc 'Columns to be associated to the key. Columns are identified by the '+
|
329
|
+
'column name. The operation to create the column value is separated '+
|
330
|
+
'by a colon (:) from the column name'
|
323
331
|
c.arg_name "c0=~/\\.(\\d{4})/:+n1,Value:+n2"
|
324
332
|
c.flag [:c, :col]
|
325
333
|
|
334
|
+
c.desc 'Adds a sum row after the heading or at the end of the file for col '+
|
335
|
+
'values'
|
336
|
+
c.arg_name "TOP|EOF:c0=~/\\.(\\d{4})/,Value"
|
337
|
+
c.flag [:s, :sum]
|
338
|
+
|
326
339
|
c.desc 'Format of date values'
|
327
340
|
c.arg_name '%d.%m.%Y|%m/%d/%Y|...'
|
328
341
|
c.flag [:df]
|
@@ -341,20 +354,76 @@ command :table do |c|
|
|
341
354
|
rows: options[:r],
|
342
355
|
header: options[:h],
|
343
356
|
key: options[:k],
|
344
|
-
cols: options[:c]
|
357
|
+
cols: options[:c],
|
358
|
+
sum: options[:s])
|
345
359
|
table.execute
|
346
360
|
puts "done"
|
347
361
|
end
|
348
362
|
|
349
363
|
end
|
350
364
|
|
365
|
+
desc 'Join two files based on a joint column value'
|
366
|
+
arg_name 'SOURCE_FILE'
|
367
|
+
command :join do |c|
|
368
|
+
c.desc 'Rows to consider'
|
369
|
+
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
370
|
+
c.flag [:r, :row], :must_match => row_regex
|
371
|
+
|
372
|
+
c.desc 'Columns to merge into the infile'
|
373
|
+
c.arg_name '1,5,7'
|
374
|
+
c.flag [:c, :cols], :must_match => /^\d+(?:,\d+)*/
|
375
|
+
|
376
|
+
c.desc 'The position at which column position to insert the columns within '+
|
377
|
+
'the infile. The sequence of the position is assigned to the columns '+
|
378
|
+
'to be inserted'
|
379
|
+
c.arg_name '5,1'
|
380
|
+
c.flag [:p, :pos], :must_match => /^\d+(?:,\d+)*/
|
381
|
+
|
382
|
+
c.desc 'The join columns in the source file, which contains the columns to '+
|
383
|
+
'be inserted into the infile'
|
384
|
+
c.arg_name '2=1'
|
385
|
+
c.flag [:j, :join], :must_match => /^\d+=\d+$/
|
386
|
+
|
387
|
+
c.desc 'Indicates whether the infile headerless'
|
388
|
+
c.default_value false
|
389
|
+
c.switch [:headerless]
|
390
|
+
|
391
|
+
c.desc 'Header columns of the infile'
|
392
|
+
c.arg_name '*,COL1,COL2'
|
393
|
+
c.default_value '*'
|
394
|
+
c.flag [:h, :header]
|
395
|
+
|
396
|
+
c.desc 'Header columns to be used for the inserted columns from the source '+
|
397
|
+
'file. The position (-p 5,1) determines where to insert the header '+
|
398
|
+
'columns'
|
399
|
+
c.arg_name 'INS_COL1,INS_COL2'
|
400
|
+
c.flag [:i, :insert]
|
401
|
+
|
402
|
+
c.action do |global_options,options,args|
|
403
|
+
join = Sycsvpro::Join.new(infile: global_options[:f],
|
404
|
+
outfile: global_options[:o],
|
405
|
+
source: args[0],
|
406
|
+
rows: options[:r],
|
407
|
+
cols: options[:c],
|
408
|
+
pos: options[:p],
|
409
|
+
joins: options[:j],
|
410
|
+
headerless: options[:headerless],
|
411
|
+
header: options[:h],
|
412
|
+
insert_header: options[:i])
|
413
|
+
print 'Joining...'
|
414
|
+
join.execute
|
415
|
+
print 'done'
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
351
419
|
desc 'Sort rows based on column values'
|
352
420
|
command :sort do |c|
|
353
421
|
c.desc 'Rows to consider'
|
354
422
|
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
355
|
-
c.flag [:r, :row], :must_match => row_regex
|
423
|
+
c.flag [:r, :row], :must_match => row_regex
|
356
424
|
|
357
|
-
c.desc 'Columns to sort based on a type (n = number, s = string, d = date)
|
425
|
+
c.desc 'Columns to sort based on a type (n = number, s = string, d = date) '+
|
426
|
+
'and its value'
|
358
427
|
c.arg_name 'n:1,s:2-5,d:7'
|
359
428
|
c.flag [:c, :col], :must_match => /[d|n|s]:\d+(?:-\d+|,[d|n|s]:\d+)*/
|
360
429
|
|
@@ -443,29 +512,33 @@ command :map do |c|
|
|
443
512
|
end
|
444
513
|
end
|
445
514
|
|
446
|
-
desc 'Process
|
515
|
+
desc 'Process operations on columns. Optionally add a sum row for columns with'+
|
516
|
+
'number values'
|
447
517
|
command :calc do |c|
|
448
518
|
c.desc 'The first non-empty column is considered the header. '+
|
449
|
-
'If additional columns are created then *,COL1,COL2 will create
|
450
|
-
'columns COL1 and COL2'
|
451
|
-
|
519
|
+
'If additional columns are created then *,COL1,COL2 will create '+
|
520
|
+
'the additional header columns COL1 and COL2. It is also possible '+
|
521
|
+
'to specify different header columns like COL1,COL2,COL3'
|
522
|
+
c.arg_name '*,COL2,COL2|COL1,COL2,COL3'
|
452
523
|
default_value '*'
|
453
|
-
c.flag [:h, :header], :must_match =>
|
524
|
+
c.flag [:h, :header], :must_match => /^[*|\w ]+(?:,[\w ]+)*/
|
454
525
|
|
455
526
|
c.desc 'Rows to consider for calculations'
|
456
527
|
c.arg_name 'ROW1,ROW2-ROW10,45-EOF,REGEXP'
|
457
|
-
c.flag [:r, :row], :must_match => row_regex
|
528
|
+
c.flag [:r, :row], :must_match => row_regex
|
458
529
|
|
459
|
-
c.desc 'Column to do
|
460
|
-
|
461
|
-
|
462
|
-
c.
|
530
|
+
c.desc 'Column to do operations on. s0 = String in column 0, c1 = number '+
|
531
|
+
'in column 1 and d2 = date in column 2. Examples: 2:c1+1,3:s0,'+
|
532
|
+
'4:s0.scan(/(\\d+)\//).flatten[0]'
|
533
|
+
c.arg_name "COL1:*2,COL2:-C3,COL3:*2+(4+C5)"
|
534
|
+
c.flag [:c, :col], :must_match => /^\d+:.+/
|
463
535
|
|
464
536
|
c.desc 'Date format of date columns'
|
465
537
|
c.arg_name '%d.%m.%Y|%Y-%m-%d|...'
|
466
538
|
c.flag [:df]
|
467
539
|
|
468
|
-
c.desc 'Indicate to add a sum row'
|
540
|
+
c.desc 'Indicate to add a sum row at end of file. Will sum up values with '+
|
541
|
+
'numbers. Columns with non-number values will be set to 0.'
|
469
542
|
c.switch [:s, :sum]
|
470
543
|
|
471
544
|
c.action do |global_options,options,args|
|
data/lib/sycsvpro/calculator.rb
CHANGED
@@ -6,9 +6,42 @@ require 'date'
|
|
6
6
|
# Operating csv files
|
7
7
|
module Sycsvpro
|
8
8
|
|
9
|
-
# Processes
|
10
|
-
#
|
11
|
-
#
|
9
|
+
# Processes operations on columns of a csv file.
|
10
|
+
#
|
11
|
+
# A column value has to be a number in case of arithmetical operations.
|
12
|
+
#
|
13
|
+
# Possible operations are +, -, *, /, % and **.
|
14
|
+
#
|
15
|
+
# It is possible to use values of columns as an operator like multiply
|
16
|
+
# column 1 of the csv file with 2 and assign it to column 4 of the result
|
17
|
+
# file: c1*2
|
18
|
+
#
|
19
|
+
# Other values might be dates or strings.
|
20
|
+
#
|
21
|
+
# d1:: date value in column 1
|
22
|
+
# s2:: string value in column 2
|
23
|
+
# c3:: number value in column 3
|
24
|
+
#
|
25
|
+
# To assign a string from column 1 of the csv file to column 3 of the
|
26
|
+
# resulting file you can do like so: 3:s1
|
27
|
+
#
|
28
|
+
# You can also use Ruby expressions to assign values: 0:[d1,d2,d3].min - This
|
29
|
+
# will assign the least date value from columns 1, 2 and 3 to column 0.
|
30
|
+
#
|
31
|
+
# Note: If you assign a value to column 1 and subsequently are using column 1
|
32
|
+
# in other assignments then column 1 will have the result of a previous
|
33
|
+
# operation.
|
34
|
+
#
|
35
|
+
# Example:
|
36
|
+
# Having a row "CA/123456" and you want to have 123456 in column 0
|
37
|
+
# of the resulting csv file and CA in column 2. If you conduct following
|
38
|
+
# operations it will fail
|
39
|
+
# 1:s0.scan(/\/(.+)/).flatten[0] -> 123456
|
40
|
+
# 2:s0.scan(/([A-Z]+)/).flatten[0] -> nil
|
41
|
+
# To achieve the required result you have to change the operational sequence
|
42
|
+
# like so
|
43
|
+
# 2:s0.scan(/([A-Z]+)/).flatten[0] -> CA
|
44
|
+
# 1.so.scan(/\/(.+)/).flatten[0] -> 123456
|
12
45
|
class Calculator
|
13
46
|
|
14
47
|
include Dsl
|
@@ -30,18 +63,24 @@ module Sycsvpro
|
|
30
63
|
# if true add a sum row at the bottom of the out file
|
31
64
|
attr_reader :add_sum_row
|
32
65
|
|
33
|
-
# Creates a new Calculator.
|
34
|
-
#
|
35
|
-
#
|
36
|
-
# arithmetic operation that creates new columns
|
66
|
+
# Creates a new Calculator. Optionally a header can be provided. The header
|
67
|
+
# can be supplemented with additional column names that are generated due
|
68
|
+
# to an arithmetic operation that creates new columns
|
37
69
|
# :call-seq:
|
38
70
|
# Sycsvpro::Calculator.new(infile: "in.csv",
|
39
71
|
# outfile: "out.csv",
|
40
72
|
# df: "%d.%m.%Y",
|
41
73
|
# rows: "1,2,BEGINn3>20END",
|
42
74
|
# header: "*,Count",
|
43
|
-
# cols: "4:
|
75
|
+
# cols: "4:c1+c2*2",
|
44
76
|
# sum: true).execute
|
77
|
+
# infile:: File that contains the rows to be operated on
|
78
|
+
# outfile:: Result of the operations
|
79
|
+
# df:: Date format
|
80
|
+
# rows:: Row filter that indicates which rows to consider
|
81
|
+
# header:: Header of the columns
|
82
|
+
# cols:: Operations on the column values
|
83
|
+
# sum:: Indicate whether to add a sum row
|
45
84
|
def initialize(options={})
|
46
85
|
@infile = options[:infile]
|
47
86
|
@outfile = options[:outfile]
|
@@ -59,6 +98,7 @@ module Sycsvpro
|
|
59
98
|
def method_missing(id, *args, &block)
|
60
99
|
return to_number(columns[$1.to_i]) if id =~ /c(\d+)/
|
61
100
|
return to_date(columns[$1.to_i]) if id =~ /d(\d+)/
|
101
|
+
return columns[$1.to_i] if id =~ /s(\d+)/
|
62
102
|
super
|
63
103
|
end
|
64
104
|
|
@@ -68,7 +108,7 @@ module Sycsvpro
|
|
68
108
|
|
69
109
|
File.open(outfile, 'w') do |out|
|
70
110
|
File.open(infile).each_with_index do |line, index|
|
71
|
-
next if line.chomp.empty?
|
111
|
+
next if line.chomp.empty? || unstring(line).chomp.split(';').empty?
|
72
112
|
|
73
113
|
unless processed_header
|
74
114
|
header_row = header.process(line.chomp)
|
@@ -115,7 +155,7 @@ module Sycsvpro
|
|
115
155
|
def create_calculator(code)
|
116
156
|
code.split(/,(?=\d+:)/).each do |operation|
|
117
157
|
col, term = operation.split(':')
|
118
|
-
term = "c#{col}#{term}"
|
158
|
+
term = "c#{col}#{term}" if term =~ /^[+\-*\/%]/
|
119
159
|
formulae[col] = term
|
120
160
|
end
|
121
161
|
end
|
data/lib/sycsvpro/dsl.rb
CHANGED
@@ -2,6 +2,12 @@ require_relative 'row_filter'
|
|
2
2
|
|
3
3
|
# Methods to be used in customer specific script files
|
4
4
|
module Dsl
|
5
|
+
|
6
|
+
# Splits comma separated strings that contain commas within the value. Such
|
7
|
+
# values have to be enclosed between BEGIN and END
|
8
|
+
# Example:
|
9
|
+
# Year,c1+c2,c1=~/[A-Z]{1,2}/,Month
|
10
|
+
COMMA_SPLITTER_REGEX = /(?<=,|^)(BEGIN.*?END|\/.*?\/|.*?)(?=,|$)/i
|
5
11
|
|
6
12
|
# read arguments provided at invocation
|
7
13
|
# :call-seq:
|
@@ -85,6 +91,12 @@ module Dsl
|
|
85
91
|
str.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
86
92
|
end
|
87
93
|
|
94
|
+
# Retrieves the values scanned by a COMMA_SPLITTER_REGEX
|
95
|
+
def split_by_comma_regex(values)
|
96
|
+
values.scan(COMMA_SPLITTER_REGEX).flatten.each.
|
97
|
+
collect { |h| h.gsub(/BEGIN|END/, "") }
|
98
|
+
end
|
99
|
+
|
88
100
|
private
|
89
101
|
|
90
102
|
# Assigns values to keys that are used in rows and yielded to the block
|
data/lib/sycsvpro/header.rb
CHANGED
@@ -11,14 +11,16 @@ module Sycsvpro
|
|
11
11
|
|
12
12
|
# Header columns
|
13
13
|
attr_reader :header_cols
|
14
|
+
# Columns that will be inserted into the header at the defined positions
|
15
|
+
attr_reader :insert_cols
|
16
|
+
# Positions where to insert the insert_cols
|
17
|
+
attr_reader :positions
|
14
18
|
|
15
19
|
# Create a new header
|
16
|
-
def initialize(header)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
@header_cols = []
|
21
|
-
end
|
20
|
+
def initialize(header, options = {})
|
21
|
+
@header_cols = split_by_comma_regex(header || "")
|
22
|
+
@insert_cols = (options[:insert] || "").split(',')
|
23
|
+
@positions = options[:pos] || []
|
22
24
|
end
|
23
25
|
|
24
26
|
def method_missing(id, *args, &block)
|
@@ -28,7 +30,7 @@ module Sycsvpro
|
|
28
30
|
|
29
31
|
# Returns the header
|
30
32
|
def process(line, values = true)
|
31
|
-
return "" if @header_cols.empty?
|
33
|
+
return "" if @header_cols.empty? && @insert_cols.empty?
|
32
34
|
header_patterns = {}
|
33
35
|
@row_cols = unstring(line).split(';')
|
34
36
|
if @header_cols[0] == '*'
|
@@ -52,13 +54,14 @@ module Sycsvpro
|
|
52
54
|
end
|
53
55
|
end
|
54
56
|
end
|
57
|
+
insert_header_cols
|
55
58
|
header_patterns.each { |i,h| @header_cols.insert(i,h) }
|
56
59
|
to_s
|
57
60
|
end
|
58
61
|
|
59
62
|
# Returns @header_cols without pattern
|
60
63
|
def clear_header_cols
|
61
|
-
@header_cols.
|
64
|
+
@header_cols.select { |col| col !~ /^c\d+[=~+]{1,2}/ }
|
62
65
|
end
|
63
66
|
|
64
67
|
# Returns the index of the column
|
@@ -66,11 +69,24 @@ module Sycsvpro
|
|
66
69
|
clear_header_cols.index(value)
|
67
70
|
end
|
68
71
|
|
72
|
+
# Returns the value of column number
|
73
|
+
def value_of(column)
|
74
|
+
clear_header_cols[column]
|
75
|
+
end
|
76
|
+
|
69
77
|
# Returns the header
|
70
78
|
def to_s
|
71
79
|
clear_header_cols.join(';')
|
72
80
|
end
|
73
81
|
|
82
|
+
private
|
83
|
+
|
84
|
+
def insert_header_cols
|
85
|
+
@header_cols.flatten!
|
86
|
+
positions.sort.each { |p| header_cols.insert(p, "") }
|
87
|
+
positions.each_with_index { |p,i| header_cols[p] = insert_cols[i] }
|
88
|
+
end
|
89
|
+
|
74
90
|
end
|
75
91
|
|
76
92
|
end
|