sycsvpro 0.1.4 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/README.md +113 -21
- data/bin/sycsvpro +98 -25
- data/lib/sycsvpro/calculator.rb +50 -10
- data/lib/sycsvpro/dsl.rb +12 -0
- data/lib/sycsvpro/header.rb +24 -8
- data/lib/sycsvpro/join.rb +159 -0
- data/lib/sycsvpro/table.rb +83 -5
- data/lib/sycsvpro/version.rb +1 -1
- data/lib/sycsvpro.rb +1 -0
- data/spec/sycsvpro/calculator_spec.rb +31 -1
- data/spec/sycsvpro/header_spec.rb +7 -1
- data/spec/sycsvpro/join_spec.rb +178 -0
- data/spec/sycsvpro/table_spec.rb +153 -2
- data/sycsvpro.rdoc +9 -4
- metadata +4 -2
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -16,21 +16,32 @@ Processing of csv files. *sycsvpro* offers following functions
|
|
16
16
|
* create or edit a Ruby script
|
17
17
|
* list scripts available optionally with methods (since version 0.0.7)
|
18
18
|
* execute a Ruby script file that operates a csv file
|
19
|
-
* create a table from a source file with dynamically create columns (since
|
19
|
+
* create a table from a source file with dynamically create columns (since
|
20
|
+
version 0.1.4)
|
21
|
+
* join two file based on a joint column value (since version 0.1.7)
|
20
22
|
|
21
23
|
To get help type
|
22
24
|
|
23
25
|
$ sycsvpro -h
|
24
26
|
|
25
|
-
In the following examples we assume the following
|
27
|
+
In the following examples we assume the following files 'machines.csv' and
|
28
|
+
'region.csv'
|
26
29
|
|
27
30
|
```
|
28
|
-
customer;machine;control;drive;motor;date;contract
|
29
|
-
hello;h1;con123;dri120;mot100;1.01.3013;1
|
30
|
-
hello;h2;con123;dri130;mot110;1.02.3012;1
|
31
|
-
indix;i1;con456;dri130;mot090;5.11.3013;1
|
32
|
-
chiro;c1;con333;dri110;mot100;1.10.3011;1
|
33
|
-
chiro;c2;con331;dri100;mot130;3.05.3010;1
|
31
|
+
customer;machine;control;drive;motor;date;contract;price;c-id
|
32
|
+
hello;h1;con123;dri120;mot100;1.01.3013;1;2.5;123
|
33
|
+
hello;h2;con123;dri130;mot110;1.02.3012;1;12.1;123
|
34
|
+
indix;i1;con456;dri130;mot090;5.11.3013;1;23.24;345
|
35
|
+
chiro;c1;con333;dri110;mot100;1.10.3011;1;122.15;456
|
36
|
+
chiro;c2;con331;dri100;mot130;3.05.3010;1;25.3;456
|
37
|
+
```
|
38
|
+
|
39
|
+
```
|
40
|
+
region;country;c-id
|
41
|
+
R1;DE,123
|
42
|
+
R2;AT;234
|
43
|
+
R3;US;345
|
44
|
+
R4;CA;456
|
34
45
|
```
|
35
46
|
|
36
47
|
Analyze
|
@@ -114,7 +125,8 @@ Count all customers (key column) in rows 2 to 20 that have machines that start
|
|
114
125
|
with *h* and have a contract valid beginning after 1.1.2000. Add a sum row with
|
115
126
|
title Total at column 1
|
116
127
|
|
117
|
-
$ sycsvpro -f in.csv -o out.csv count -r 2-20 -k 0:customer
|
128
|
+
$ sycsvpro -f in.csv -o out.csv count -r 2-20 -k 0:customer
|
129
|
+
-c 1:/^h/,5:">1.1.2000" --df "%d.%m.%Y" -s "Total:1"
|
118
130
|
|
119
131
|
The result in file out.csv is
|
120
132
|
|
@@ -143,12 +155,30 @@ The aggregation result in out.csv is
|
|
143
155
|
indix;1
|
144
156
|
chiro;2
|
145
157
|
|
158
|
+
Table
|
159
|
+
-----
|
160
|
+
Analyze the contract revenue per customer and per year
|
161
|
+
|
162
|
+
$ sycsvpro -f in.csv -o out.csv table
|
163
|
+
-h "Customer,c5=~/\\.(\\d{4})/"
|
164
|
+
-k c1
|
165
|
+
-c "c5=~/\\.\\d{4})/:+n1"
|
166
|
+
|
167
|
+
The table result will be in out.csv
|
168
|
+
|
169
|
+
$ cat out.csv
|
170
|
+
Customer;3013;3012;3011;3010
|
171
|
+
hello;2.5;12.1;0;0
|
172
|
+
indix;23.24;0;0;0
|
173
|
+
chiro;0;0;122.15;25.3
|
174
|
+
|
146
175
|
Calc
|
147
176
|
----
|
148
177
|
Process arithmetic operations on the contract count and create a target column
|
149
178
|
and a sum which is added at the end of the result file
|
150
179
|
|
151
|
-
$ sycsvpro -f in.csv -o out.csv calc -r 2-20 -h *,target
|
180
|
+
$ sycsvpro -f in.csv -o out.csv calc -r 2-20 -h *,target
|
181
|
+
-c 6:*2,7:target=c6*10
|
152
182
|
|
153
183
|
$ cat out.csv
|
154
184
|
customer;machine;control;drive;motor;date;contract;target
|
@@ -162,6 +192,26 @@ and a sum which is added at the end of the result file
|
|
162
192
|
In the sum row non-numbers in the colums are converted to 0. Therefore column 0
|
163
193
|
is summed up to 0 as all strings are converted to 0.
|
164
194
|
|
195
|
+
Join
|
196
|
+
----
|
197
|
+
Join the machine and contract file with columns from the customer address file
|
198
|
+
|
199
|
+
$ sycsvpro -f in.csv -o out.csv join address.csv -c 0,1
|
200
|
+
-p 2,1
|
201
|
+
-i "COUNTRY,REGION"
|
202
|
+
-j "3=8"
|
203
|
+
|
204
|
+
This will create the result
|
205
|
+
|
206
|
+
```
|
207
|
+
customer;COUNTRY;REGION;machine;control;drive;motor;date;contract;price;c-id
|
208
|
+
hello;DE;R1;h1;con123;dri120;mot100;1.01.3013;1;2.5;123
|
209
|
+
hello;DE;R1;h2;con123;dri130;mot110;1.02.3012;1;12.1;123
|
210
|
+
indix;US;R3i1;con456;dri130;mot090;5.11.3013;1;23.24;345
|
211
|
+
chiro;CA;R4;c1;con333;dri110;mot100;1.10.3011;1;122.15;456
|
212
|
+
chiro;CA;R4;c2;con331;dri100;mot130;3.05.3010;1;25.3;456
|
213
|
+
```
|
214
|
+
|
165
215
|
Sort
|
166
216
|
----
|
167
217
|
Sort rows on specified columns as an example sort rows based on customer
|
@@ -198,7 +248,7 @@ the name script.rb and a method call_me
|
|
198
248
|
List
|
199
249
|
----
|
200
250
|
List the scripts, insert-file or all scripts available in the scripts directory
|
201
|
-
which is also displayed
|
251
|
+
which is also displayed. Comments before methods are also displayed
|
202
252
|
|
203
253
|
script directory: ~/.syc/sycsvpro/scripts
|
204
254
|
$ sycsvpro list -m
|
@@ -257,6 +307,9 @@ end with _column_ or _columns_ dependent if a value or an array should be
|
|
257
307
|
returned. You can find the *rows* and *write_to* methods at
|
258
308
|
_lib/sycsvpro/dsl.rb_.
|
259
309
|
|
310
|
+
Examples for scripts using sycsvpro can be found at
|
311
|
+
[sugaryourcoffee/sycsvpro-scripts](https://github.com/sugaryourcoffee/sycsvpro-scripts)
|
312
|
+
|
260
313
|
Working with sycsvpro
|
261
314
|
=====================
|
262
315
|
|
@@ -316,19 +369,58 @@ Version 0.1.4
|
|
316
369
|
* Associate values to multi keys
|
317
370
|
* Create values based on arithmetic operations of source table data
|
318
371
|
Example
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
h
|
323
|
-
Another header column is created dynamicall based on the year part of
|
324
|
-
date in column 0
|
325
|
-
k
|
326
|
-
c
|
327
|
-
HeaderName is dynamically determined based on column 0 and added
|
328
|
-
of column 1 to this column that is associated to the key
|
372
|
+
`sycsvpro -f in.csv -o out.csv table -h "c4,c5,c0=~/\\.(\\d{4})/"
|
373
|
+
-k "c4,c5"
|
374
|
+
-c "c0=~/\\.(\\d{4})/:+n1"`
|
375
|
+
+ h the header is created from the source table header of column 4 and 5.
|
376
|
+
Another header column is created dynamicall based on the year part of
|
377
|
+
a date in column 0
|
378
|
+
+ k the key is based on source table of column 4 and 5
|
379
|
+
+ c the column operation is in the form HeaderName:Operation. In this case
|
380
|
+
the HeaderName is dynamically determined based on column 0 and added
|
381
|
+
the value of column 1 to this column that is associated to the key
|
329
382
|
|
330
383
|
c4, n4, d4 are string, number and date values respectively
|
331
384
|
|
385
|
+
Version 0.1.5
|
386
|
+
-------------
|
387
|
+
* Add a sum row after the heading or at the end of file like so
|
388
|
+
`sycsvpro -f in.csv -o out.csv table -h "c4,c5,c0=~/\\.(\\d{4})/"
|
389
|
+
-k "c4,c5"
|
390
|
+
-c "c0=~/\\.(\\d{4})/:+n1"
|
391
|
+
-s "c0=~/\\.(\\d{4})/"`
|
392
|
+
This will sum up the dynamically created column.
|
393
|
+
|
394
|
+
Version 0.1.6
|
395
|
+
-------------
|
396
|
+
* Commas within columns expression are now ignored while splitting columns of
|
397
|
+
table columns
|
398
|
+
* Table takes a number format now with `--nf DE` which will convert numbers
|
399
|
+
from DE locale like 1.000,00 to 1000.00
|
400
|
+
* Table uses a precision for numbers. Default is 2. Can be assigned with `pr: 2`
|
401
|
+
|
402
|
+
Version 0.1.7
|
403
|
+
-------------
|
404
|
+
* Calc can now be used not to only do arithmetic operations on columns but also
|
405
|
+
string operations. Ultimately any valid Ruby command can be used to process a
|
406
|
+
column value
|
407
|
+
`sycsvpro -f customer.csv -o customer-number.csv calc
|
408
|
+
-h "Customer_ID,Customer,Country"
|
409
|
+
-r "1-eof"
|
410
|
+
-c "2:s0.scan(/^([A-Z]+)\\//).flatten[0],
|
411
|
+
0:s0.scan(/(?<=\\/)(.*)$/).flatten[0],1:s1"
|
412
|
+
* Join is a new class that joins to tables based on a joint column value
|
413
|
+
`sycsvpro -f infile.csv -o outfile.csv join source.csv -c "2,4"
|
414
|
+
-j "1=3"
|
415
|
+
-p "1,3"
|
416
|
+
-h "*"
|
417
|
+
-i "A,B"`
|
418
|
+
This will join infile.csv with source.csv based on the join columns (j "1=3").
|
419
|
+
From source.csv columns 2 and 4 (-c "2,4") will be inserted at column
|
420
|
+
positions 1 and 3 (-p "1,3"). The header will be used from the infile.csv
|
421
|
+
(-h "*") supplemented by the columns A and B (-i "A,B") that will also be
|
422
|
+
positioned at column 1 and 3 (-p "1,3").
|
423
|
+
|
332
424
|
Installation
|
333
425
|
============
|
334
426
|
[![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)
|
data/bin/sycsvpro
CHANGED
@@ -89,12 +89,13 @@ command :extract do |c|
|
|
89
89
|
end
|
90
90
|
end
|
91
91
|
|
92
|
-
desc 'Collect values of specified rows and columns from the file and group
|
92
|
+
desc 'Collect values of specified rows and columns from the file and group '+
|
93
|
+
'them in categories'
|
93
94
|
command :collect do |c|
|
94
95
|
|
95
96
|
c.desc 'Rows to consider for collection'
|
96
97
|
c.arg_name 'ROW1,ROW2,ROW10-ROW30,45-EOF,REGEXP'
|
97
|
-
c.flag [:r, :row], :must_match => row_regex
|
98
|
+
c.flag [:r, :row], :must_match => row_regex
|
98
99
|
|
99
100
|
c.desc 'Columns to collect values from'
|
100
101
|
c.arg_name 'CATEGORY1:COL1,COL2,COL10-COL30+CATEGORY2:COL3-COL9'
|
@@ -120,7 +121,7 @@ desc 'Allocate specified columns from the file to a key value'
|
|
120
121
|
command :allocate do |c|
|
121
122
|
c.desc 'Rows to consider'
|
122
123
|
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
123
|
-
c.flag [:r, :row], :must_match => row_regex
|
124
|
+
c.flag [:r, :row], :must_match => row_regex
|
124
125
|
|
125
126
|
c.desc 'Key to allocate columns to'
|
126
127
|
c.arg_name '0'
|
@@ -147,7 +148,8 @@ command :allocate do |c|
|
|
147
148
|
end
|
148
149
|
end
|
149
150
|
|
150
|
-
desc 'Creates a script/insert file or opens a script/insert file for editing
|
151
|
+
desc 'Creates a script/insert file or opens a script/insert file for editing '+
|
152
|
+
'if it exists'
|
151
153
|
command :edit do |c|
|
152
154
|
c.desc 'Name of the script/insert file'
|
153
155
|
c.arg_name 'SCRIPT_NAME.rb|INSERT_NAME.ins'
|
@@ -159,12 +161,14 @@ command :edit do |c|
|
|
159
161
|
|
160
162
|
c.action do |global_options,options,args|
|
161
163
|
script_creator = Sycsvpro::ScriptCreator.new(dir: sycsvpro_directory,
|
162
|
-
script: options[:s],
|
164
|
+
script: options[:s],
|
165
|
+
method: options[:m])
|
163
166
|
system "vi #{script_creator.script_file}"
|
164
167
|
end
|
165
168
|
end
|
166
169
|
|
167
|
-
desc 'Lists script or insert files in the scripts directory with optionally
|
170
|
+
desc 'Lists script or insert files in the scripts directory with optionally '+
|
171
|
+
'listing methods of script files'
|
168
172
|
command :list do |c|
|
169
173
|
c.desc 'Type of script (Ruby, insert or all files)'
|
170
174
|
c.default_value 'script'
|
@@ -235,7 +239,7 @@ command :count do |c|
|
|
235
239
|
|
236
240
|
c.desc 'Rows to consider'
|
237
241
|
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
238
|
-
c.flag [:r, :row], :must_match => row_regex
|
242
|
+
c.flag [:r, :row], :must_match => row_regex
|
239
243
|
|
240
244
|
c.desc 'Columns to count where columns 2 and 3 are counted conditionally'
|
241
245
|
c.arg_name '1,2:<14.2.2014,10-30,3:>10'
|
@@ -274,7 +278,7 @@ command :aggregate do |c|
|
|
274
278
|
|
275
279
|
c.desc 'Rows to consider'
|
276
280
|
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
277
|
-
c.flag [:r, :row], :must_match => row_regex
|
281
|
+
c.flag [:r, :row], :must_match => row_regex
|
278
282
|
|
279
283
|
c.desc 'Columns to count'
|
280
284
|
c.arg_name '1,2-4'
|
@@ -311,18 +315,27 @@ command :table do |c|
|
|
311
315
|
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
312
316
|
c.flag [:r, :row], :must_match => row_regex
|
313
317
|
|
314
|
-
c.desc 'Header can be defined by Words (Year), references to source header
|
318
|
+
c.desc 'Header can be defined by Words (Year), references to source header '+
|
319
|
+
'(c1) and dynamically created header values (c1+c2,c0=~/\\.(\\d{4})/)'
|
315
320
|
c.arg_name "COL_A,c6,c2+c4,c0=~/\\.(\\d{4})/"
|
316
321
|
c.flag [:h, :header]
|
317
322
|
|
318
|
-
c.desc 'Key to that the other columns are associated to. A key can be
|
323
|
+
c.desc 'Key to that the other columns are associated to. A key can be '+
|
324
|
+
'created dynamically'
|
319
325
|
c.arg_name "c0=~/\\.(\\d{4})/,c6"
|
320
326
|
c.flag [:k, :key]
|
321
327
|
|
322
|
-
c.desc 'Columns to be associated to the key. Columns are identified by the
|
328
|
+
c.desc 'Columns to be associated to the key. Columns are identified by the '+
|
329
|
+
'column name. The operation to create the column value is separated '+
|
330
|
+
'by a colon (:) from the column name'
|
323
331
|
c.arg_name "c0=~/\\.(\\d{4})/:+n1,Value:+n2"
|
324
332
|
c.flag [:c, :col]
|
325
333
|
|
334
|
+
c.desc 'Adds a sum row after the heading or at the end of the file for col '+
|
335
|
+
'values'
|
336
|
+
c.arg_name "TOP|EOF:c0=~/\\.(\\d{4})/,Value"
|
337
|
+
c.flag [:s, :sum]
|
338
|
+
|
326
339
|
c.desc 'Format of date values'
|
327
340
|
c.arg_name '%d.%m.%Y|%m/%d/%Y|...'
|
328
341
|
c.flag [:df]
|
@@ -341,20 +354,76 @@ command :table do |c|
|
|
341
354
|
rows: options[:r],
|
342
355
|
header: options[:h],
|
343
356
|
key: options[:k],
|
344
|
-
cols: options[:c]
|
357
|
+
cols: options[:c],
|
358
|
+
sum: options[:s])
|
345
359
|
table.execute
|
346
360
|
puts "done"
|
347
361
|
end
|
348
362
|
|
349
363
|
end
|
350
364
|
|
365
|
+
desc 'Join two files based on a joint column value'
|
366
|
+
arg_name 'SOURCE_FILE'
|
367
|
+
command :join do |c|
|
368
|
+
c.desc 'Rows to consider'
|
369
|
+
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
370
|
+
c.flag [:r, :row], :must_match => row_regex
|
371
|
+
|
372
|
+
c.desc 'Columns to merge into the infile'
|
373
|
+
c.arg_name '1,5,7'
|
374
|
+
c.flag [:c, :cols], :must_match => /^\d+(?:,\d+)*/
|
375
|
+
|
376
|
+
c.desc 'The position at which column position to insert the columns within '+
|
377
|
+
'the infile. The sequence of the position is assigned to the columns '+
|
378
|
+
'to be inserted'
|
379
|
+
c.arg_name '5,1'
|
380
|
+
c.flag [:p, :pos], :must_match => /^\d+(?:,\d+)*/
|
381
|
+
|
382
|
+
c.desc 'The join columns in the source file, which contains the columns to '+
|
383
|
+
'be inserted into the infile'
|
384
|
+
c.arg_name '2=1'
|
385
|
+
c.flag [:j, :join], :must_match => /^\d+=\d+$/
|
386
|
+
|
387
|
+
c.desc 'Indicates whether the infile headerless'
|
388
|
+
c.default_value false
|
389
|
+
c.switch [:headerless]
|
390
|
+
|
391
|
+
c.desc 'Header columns of the infile'
|
392
|
+
c.arg_name '*,COL1,COL2'
|
393
|
+
c.default_value '*'
|
394
|
+
c.flag [:h, :header]
|
395
|
+
|
396
|
+
c.desc 'Header columns to be used for the inserted columns from the source '+
|
397
|
+
'file. The position (-p 5,1) determines where to insert the header '+
|
398
|
+
'columns'
|
399
|
+
c.arg_name 'INS_COL1,INS_COL2'
|
400
|
+
c.flag [:i, :insert]
|
401
|
+
|
402
|
+
c.action do |global_options,options,args|
|
403
|
+
join = Sycsvpro::Join.new(infile: global_options[:f],
|
404
|
+
outfile: global_options[:o],
|
405
|
+
source: args[0],
|
406
|
+
rows: options[:r],
|
407
|
+
cols: options[:c],
|
408
|
+
pos: options[:p],
|
409
|
+
joins: options[:j],
|
410
|
+
headerless: options[:headerless],
|
411
|
+
header: options[:h],
|
412
|
+
insert_header: options[:i])
|
413
|
+
print 'Joining...'
|
414
|
+
join.execute
|
415
|
+
print 'done'
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
351
419
|
desc 'Sort rows based on column values'
|
352
420
|
command :sort do |c|
|
353
421
|
c.desc 'Rows to consider'
|
354
422
|
c.arg_name '1,2,10-30,45-EOF,REGEXP'
|
355
|
-
c.flag [:r, :row], :must_match => row_regex
|
423
|
+
c.flag [:r, :row], :must_match => row_regex
|
356
424
|
|
357
|
-
c.desc 'Columns to sort based on a type (n = number, s = string, d = date)
|
425
|
+
c.desc 'Columns to sort based on a type (n = number, s = string, d = date) '+
|
426
|
+
'and its value'
|
358
427
|
c.arg_name 'n:1,s:2-5,d:7'
|
359
428
|
c.flag [:c, :col], :must_match => /[d|n|s]:\d+(?:-\d+|,[d|n|s]:\d+)*/
|
360
429
|
|
@@ -443,29 +512,33 @@ command :map do |c|
|
|
443
512
|
end
|
444
513
|
end
|
445
514
|
|
446
|
-
desc 'Process
|
515
|
+
desc 'Process operations on columns. Optionally add a sum row for columns with'+
|
516
|
+
'number values'
|
447
517
|
command :calc do |c|
|
448
518
|
c.desc 'The first non-empty column is considered the header. '+
|
449
|
-
'If additional columns are created then *,COL1,COL2 will create
|
450
|
-
'columns COL1 and COL2'
|
451
|
-
|
519
|
+
'If additional columns are created then *,COL1,COL2 will create '+
|
520
|
+
'the additional header columns COL1 and COL2. It is also possible '+
|
521
|
+
'to specify different header columns like COL1,COL2,COL3'
|
522
|
+
c.arg_name '*,COL2,COL2|COL1,COL2,COL3'
|
452
523
|
default_value '*'
|
453
|
-
c.flag [:h, :header], :must_match =>
|
524
|
+
c.flag [:h, :header], :must_match => /^[*|\w ]+(?:,[\w ]+)*/
|
454
525
|
|
455
526
|
c.desc 'Rows to consider for calculations'
|
456
527
|
c.arg_name 'ROW1,ROW2-ROW10,45-EOF,REGEXP'
|
457
|
-
c.flag [:r, :row], :must_match => row_regex
|
528
|
+
c.flag [:r, :row], :must_match => row_regex
|
458
529
|
|
459
|
-
c.desc 'Column to do
|
460
|
-
|
461
|
-
|
462
|
-
c.
|
530
|
+
c.desc 'Column to do operations on. s0 = String in column 0, c1 = number '+
|
531
|
+
'in column 1 and d2 = date in column 2. Examples: 2:c1+1,3:s0,'+
|
532
|
+
'4:s0.scan(/(\\d+)\//).flatten[0]'
|
533
|
+
c.arg_name "COL1:*2,COL2:-C3,COL3:*2+(4+C5)"
|
534
|
+
c.flag [:c, :col], :must_match => /^\d+:.+/
|
463
535
|
|
464
536
|
c.desc 'Date format of date columns'
|
465
537
|
c.arg_name '%d.%m.%Y|%Y-%m-%d|...'
|
466
538
|
c.flag [:df]
|
467
539
|
|
468
|
-
c.desc 'Indicate to add a sum row'
|
540
|
+
c.desc 'Indicate to add a sum row at end of file. Will sum up values with '+
|
541
|
+
'numbers. Columns with non-number values will be set to 0.'
|
469
542
|
c.switch [:s, :sum]
|
470
543
|
|
471
544
|
c.action do |global_options,options,args|
|
data/lib/sycsvpro/calculator.rb
CHANGED
@@ -6,9 +6,42 @@ require 'date'
|
|
6
6
|
# Operating csv files
|
7
7
|
module Sycsvpro
|
8
8
|
|
9
|
-
# Processes
|
10
|
-
#
|
11
|
-
#
|
9
|
+
# Processes operations on columns of a csv file.
|
10
|
+
#
|
11
|
+
# A column value has to be a number in case of arithmetical operations.
|
12
|
+
#
|
13
|
+
# Possible operations are +, -, *, /, % and **.
|
14
|
+
#
|
15
|
+
# It is possible to use values of columns as an operator like multiply
|
16
|
+
# column 1 of the csv file with 2 and assign it to column 4 of the result
|
17
|
+
# file: c1*2
|
18
|
+
#
|
19
|
+
# Other values might be dates or strings.
|
20
|
+
#
|
21
|
+
# d1:: date value in column 1
|
22
|
+
# s2:: string value in column 2
|
23
|
+
# c3:: number value in column 3
|
24
|
+
#
|
25
|
+
# To assign a string from column 1 of the csv file to column 3 of the
|
26
|
+
# resulting file you can do like so: 3:s1
|
27
|
+
#
|
28
|
+
# You can also use Ruby expressions to assign values: 0:[d1,d2,d3].min - This
|
29
|
+
# will assign the least date value from columns 1, 2 and 3 to column 0.
|
30
|
+
#
|
31
|
+
# Note: If you assign a value to column 1 and subsequently are using column 1
|
32
|
+
# in other assignments then column 1 will have the result of a previous
|
33
|
+
# operation.
|
34
|
+
#
|
35
|
+
# Example:
|
36
|
+
# Having a row "CA/123456" and you want to have 123456 in column 0
|
37
|
+
# of the resulting csv file and CA in column 2. If you conduct following
|
38
|
+
# operations it will fail
|
39
|
+
# 1:s0.scan(/\/(.+)/).flatten[0] -> 123456
|
40
|
+
# 2:s0.scan(/([A-Z]+)/).flatten[0] -> nil
|
41
|
+
# To achieve the required result you have to change the operational sequence
|
42
|
+
# like so
|
43
|
+
# 2:s0.scan(/([A-Z]+)/).flatten[0] -> CA
|
44
|
+
# 1.so.scan(/\/(.+)/).flatten[0] -> 123456
|
12
45
|
class Calculator
|
13
46
|
|
14
47
|
include Dsl
|
@@ -30,18 +63,24 @@ module Sycsvpro
|
|
30
63
|
# if true add a sum row at the bottom of the out file
|
31
64
|
attr_reader :add_sum_row
|
32
65
|
|
33
|
-
# Creates a new Calculator.
|
34
|
-
#
|
35
|
-
#
|
36
|
-
# arithmetic operation that creates new columns
|
66
|
+
# Creates a new Calculator. Optionally a header can be provided. The header
|
67
|
+
# can be supplemented with additional column names that are generated due
|
68
|
+
# to an arithmetic operation that creates new columns
|
37
69
|
# :call-seq:
|
38
70
|
# Sycsvpro::Calculator.new(infile: "in.csv",
|
39
71
|
# outfile: "out.csv",
|
40
72
|
# df: "%d.%m.%Y",
|
41
73
|
# rows: "1,2,BEGINn3>20END",
|
42
74
|
# header: "*,Count",
|
43
|
-
# cols: "4:
|
75
|
+
# cols: "4:c1+c2*2",
|
44
76
|
# sum: true).execute
|
77
|
+
# infile:: File that contains the rows to be operated on
|
78
|
+
# outfile:: Result of the operations
|
79
|
+
# df:: Date format
|
80
|
+
# rows:: Row filter that indicates which rows to consider
|
81
|
+
# header:: Header of the columns
|
82
|
+
# cols:: Operations on the column values
|
83
|
+
# sum:: Indicate whether to add a sum row
|
45
84
|
def initialize(options={})
|
46
85
|
@infile = options[:infile]
|
47
86
|
@outfile = options[:outfile]
|
@@ -59,6 +98,7 @@ module Sycsvpro
|
|
59
98
|
def method_missing(id, *args, &block)
|
60
99
|
return to_number(columns[$1.to_i]) if id =~ /c(\d+)/
|
61
100
|
return to_date(columns[$1.to_i]) if id =~ /d(\d+)/
|
101
|
+
return columns[$1.to_i] if id =~ /s(\d+)/
|
62
102
|
super
|
63
103
|
end
|
64
104
|
|
@@ -68,7 +108,7 @@ module Sycsvpro
|
|
68
108
|
|
69
109
|
File.open(outfile, 'w') do |out|
|
70
110
|
File.open(infile).each_with_index do |line, index|
|
71
|
-
next if line.chomp.empty?
|
111
|
+
next if line.chomp.empty? || unstring(line).chomp.split(';').empty?
|
72
112
|
|
73
113
|
unless processed_header
|
74
114
|
header_row = header.process(line.chomp)
|
@@ -115,7 +155,7 @@ module Sycsvpro
|
|
115
155
|
def create_calculator(code)
|
116
156
|
code.split(/,(?=\d+:)/).each do |operation|
|
117
157
|
col, term = operation.split(':')
|
118
|
-
term = "c#{col}#{term}"
|
158
|
+
term = "c#{col}#{term}" if term =~ /^[+\-*\/%]/
|
119
159
|
formulae[col] = term
|
120
160
|
end
|
121
161
|
end
|
data/lib/sycsvpro/dsl.rb
CHANGED
@@ -2,6 +2,12 @@ require_relative 'row_filter'
|
|
2
2
|
|
3
3
|
# Methods to be used in customer specific script files
|
4
4
|
module Dsl
|
5
|
+
|
6
|
+
# Splits comma separated strings that contain commas within the value. Such
|
7
|
+
# values have to be enclosed between BEGIN and END
|
8
|
+
# Example:
|
9
|
+
# Year,c1+c2,c1=~/[A-Z]{1,2}/,Month
|
10
|
+
COMMA_SPLITTER_REGEX = /(?<=,|^)(BEGIN.*?END|\/.*?\/|.*?)(?=,|$)/i
|
5
11
|
|
6
12
|
# read arguments provided at invocation
|
7
13
|
# :call-seq:
|
@@ -85,6 +91,12 @@ module Dsl
|
|
85
91
|
str.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
86
92
|
end
|
87
93
|
|
94
|
+
# Retrieves the values scanned by a COMMA_SPLITTER_REGEX
|
95
|
+
def split_by_comma_regex(values)
|
96
|
+
values.scan(COMMA_SPLITTER_REGEX).flatten.each.
|
97
|
+
collect { |h| h.gsub(/BEGIN|END/, "") }
|
98
|
+
end
|
99
|
+
|
88
100
|
private
|
89
101
|
|
90
102
|
# Assigns values to keys that are used in rows and yielded to the block
|
data/lib/sycsvpro/header.rb
CHANGED
@@ -11,14 +11,16 @@ module Sycsvpro
|
|
11
11
|
|
12
12
|
# Header columns
|
13
13
|
attr_reader :header_cols
|
14
|
+
# Columns that will be inserted into the header at the defined positions
|
15
|
+
attr_reader :insert_cols
|
16
|
+
# Positions where to insert the insert_cols
|
17
|
+
attr_reader :positions
|
14
18
|
|
15
19
|
# Create a new header
|
16
|
-
def initialize(header)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
@header_cols = []
|
21
|
-
end
|
20
|
+
def initialize(header, options = {})
|
21
|
+
@header_cols = split_by_comma_regex(header || "")
|
22
|
+
@insert_cols = (options[:insert] || "").split(',')
|
23
|
+
@positions = options[:pos] || []
|
22
24
|
end
|
23
25
|
|
24
26
|
def method_missing(id, *args, &block)
|
@@ -28,7 +30,7 @@ module Sycsvpro
|
|
28
30
|
|
29
31
|
# Returns the header
|
30
32
|
def process(line, values = true)
|
31
|
-
return "" if @header_cols.empty?
|
33
|
+
return "" if @header_cols.empty? && @insert_cols.empty?
|
32
34
|
header_patterns = {}
|
33
35
|
@row_cols = unstring(line).split(';')
|
34
36
|
if @header_cols[0] == '*'
|
@@ -52,13 +54,14 @@ module Sycsvpro
|
|
52
54
|
end
|
53
55
|
end
|
54
56
|
end
|
57
|
+
insert_header_cols
|
55
58
|
header_patterns.each { |i,h| @header_cols.insert(i,h) }
|
56
59
|
to_s
|
57
60
|
end
|
58
61
|
|
59
62
|
# Returns @header_cols without pattern
|
60
63
|
def clear_header_cols
|
61
|
-
@header_cols.
|
64
|
+
@header_cols.select { |col| col !~ /^c\d+[=~+]{1,2}/ }
|
62
65
|
end
|
63
66
|
|
64
67
|
# Returns the index of the column
|
@@ -66,11 +69,24 @@ module Sycsvpro
|
|
66
69
|
clear_header_cols.index(value)
|
67
70
|
end
|
68
71
|
|
72
|
+
# Returns the value of column number
|
73
|
+
def value_of(column)
|
74
|
+
clear_header_cols[column]
|
75
|
+
end
|
76
|
+
|
69
77
|
# Returns the header
|
70
78
|
def to_s
|
71
79
|
clear_header_cols.join(';')
|
72
80
|
end
|
73
81
|
|
82
|
+
private
|
83
|
+
|
84
|
+
def insert_header_cols
|
85
|
+
@header_cols.flatten!
|
86
|
+
positions.sort.each { |p| header_cols.insert(p, "") }
|
87
|
+
positions.each_with_index { |p,i| header_cols[p] = insert_cols[i] }
|
88
|
+
end
|
89
|
+
|
74
90
|
end
|
75
91
|
|
76
92
|
end
|