sycsvpro 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sycsvpro (0.1.7)
4
+ sycsvpro (0.1.8)
5
5
  gli (= 2.9.0)
6
6
  timeleap (~> 0.0.1)
7
7
 
data/README.md CHANGED
@@ -212,6 +212,15 @@ chiro;CA;R4;c1;con333;dri110;mot100;1.10.3011;1;122.15;456
212
212
  chiro;CA;R4;c2;con331;dri100;mot130;3.05.3010;1;25.3;456
213
213
  ```
214
214
 
215
+ If you have multiple IDs in a row than you can also conduct multiple joins in
216
+ on streak.
217
+
218
+ $ sycsvpro -f in.csv -o out.csv join address.csv -c 0,1;0,3
219
+ -p 2,1;4,5
220
+ -i "COUNTRY,REGION"
221
+ -j "3=8;3=10"
222
+
223
+
215
224
  Sort
216
225
  ----
217
226
  Sort rows on specified columns as an example sort rows based on customer
@@ -421,6 +430,10 @@ Version 0.1.7
421
430
  (-h "*") supplemented by the columns A and B (-i "A,B") that will also be
422
431
  positioned at column 1 and 3 (-p "1,3").
423
432
 
433
+ Version 0.1.8
434
+ -------------
435
+ * Join now can join multiple key values in 1 streak
436
+
424
437
  Installation
425
438
  ============
426
439
  [![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)
data/bin/sycsvpro CHANGED
@@ -227,9 +227,10 @@ command :execute do |c|
227
227
  end
228
228
  end
229
229
 
230
- desc 'Counts the occurences of column values. Uses column values as headings with count as ' +
231
- 'values. Columns with a condition will be added as new columns and the condition will ' +
232
- 'be set as column name. Optionally adds a sum row'
230
+ desc 'Counts the occurences of column values. Uses column values as headings '+
231
+ 'with count as values. Columns with a condition will be added as new '+
232
+ 'columns and the condition will be set as column name. Optionally adds a '+
233
+ 'sum row'
233
234
 
234
235
  command :count do |c|
235
236
 
@@ -284,8 +285,9 @@ command :aggregate do |c|
284
285
  c.arg_name '1,2-4'
285
286
  c.flag [:c, :col], :must_match => /^\d+(?:,\d+|-\d+)*/
286
287
 
287
- c.desc 'Adds a sum row and a sum column with TITLE for the counted columns. The sum row is ' +
288
- 'specified by the row position. The sum column is the last column in the row'
288
+ c.desc 'Adds a sum row and a sum column with TITLE for the counted columns. '+
289
+ 'The sum row is specified by the row position. The sum column is the '+
290
+ 'last column in the row'
289
291
  c.arg_name 'SUM_ROW_TITLE:ROW,SUM_COL_TITLE'
290
292
  c.flag [:s, :sum], :must_match => /^\w+:\d+(?:,\w+)?|^\w+/
291
293
 
@@ -307,7 +309,10 @@ command :aggregate do |c|
307
309
 
308
310
  end
309
311
 
310
- desc 'Creates a table from a source file'
312
+ desc 'Associates columns to a key value. A key value can be a combination of '+
313
+ 'multiple column values. Values associated can be generated from an '+
314
+ 'arithmetic or string operation. Header columns can be generated '+
315
+ 'dynamically based on column values'
311
316
 
312
317
  command :table do |c|
313
318
 
@@ -371,18 +376,18 @@ command :join do |c|
371
376
 
372
377
  c.desc 'Columns to merge into the infile'
373
378
  c.arg_name '1,5,7'
374
- c.flag [:c, :cols], :must_match => /^\d+(?:,\d+)*/
379
+ c.flag [:c, :cols], :must_match => /^\d+(?:[,;]\d+)*/
375
380
 
376
381
  c.desc 'The position at which column position to insert the columns within '+
377
382
  'the infile. The sequence of the position is assigned to the columns '+
378
383
  'to be inserted'
379
384
  c.arg_name '5,1'
380
- c.flag [:p, :pos], :must_match => /^\d+(?:,\d+)*/
385
+ c.flag [:p, :pos], :must_match => /^\d+(?:[,;]\d+)*/
381
386
 
382
387
  c.desc 'The join columns in the source file, which contains the columns to '+
383
388
  'be inserted into the infile'
384
389
  c.arg_name '2=1'
385
- c.flag [:j, :join], :must_match => /^\d+=\d+$/
390
+ c.flag [:j, :join], :must_match => /^\d+(?:[=;]\d+)*/
386
391
 
387
392
  c.desc 'Indicates whether the infile headerless'
388
393
  c.default_value false
@@ -416,7 +421,8 @@ command :join do |c|
416
421
  end
417
422
  end
418
423
 
419
- desc 'Sort rows based on column values'
424
+ desc 'Sort rows based on column values. It is possible to sort on multiple '+
425
+ 'columns'
420
426
  command :sort do |c|
421
427
  c.desc 'Rows to consider'
422
428
  c.arg_name '1,2,10-30,45-EOF,REGEXP'
@@ -457,7 +463,9 @@ command :sort do |c|
457
463
  end
458
464
  end
459
465
 
460
- desc 'Inserts rows from a file to a csv-file'
466
+ desc 'Inserts rows from a file to a csv-file. You can for instance add sum '+
467
+ 'operations in Excel or LibreOffice style if you want to process the '+
468
+ 'resulting file in Excel or LibreOffice'
461
469
  arg_name 'INSERT_FILE'
462
470
  command :insert do |c|
463
471
 
@@ -486,7 +494,7 @@ arg_name 'MAPPINGS-FILE'
486
494
  command :map do |c|
487
495
  c.desc 'Rows to consider'
488
496
  c.arg_name 'ROW1,ROW2,ROW10-ROW30,45-EOF,REGEXP'
489
- c.flag [:r, :row], :must_match => row_regex #/\d+(?:,\d+|-\d+|-eof|,\/.*\/)*|\/.*\/(?:,\/.*\/|\d+)*/i
497
+ c.flag [:r, :row], :must_match => row_regex
490
498
 
491
499
  c.desc 'Columns to consider for mapping'
492
500
  c.arg_name 'COL1,COL2,COL10-COL30'
@@ -648,7 +656,8 @@ on_error do |exception|
648
656
  true
649
657
  end
650
658
 
651
- # the -r flag can take a EOF value which is replaced by the actual row value of the input file
659
+ # the -r flag can take a EOF value which is replaced by the actual row value
660
+ # of the input file
652
661
  def set_max_row(options, max_row)
653
662
  options.each do |option, value|
654
663
  case option
@@ -19,7 +19,7 @@ module Sycsvpro
19
19
  # Create a new header
20
20
  def initialize(header, options = {})
21
21
  @header_cols = split_by_comma_regex(header || "")
22
- @insert_cols = (options[:insert] || "").split(',')
22
+ @insert_cols = (options[:insert] || "").split(/,|;/)
23
23
  @positions = options[:pos] || []
24
24
  end
25
25
 
data/lib/sycsvpro/join.rb CHANGED
@@ -1,23 +1,33 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
 
4
+ # Joiner holds all join data as join columns, positions where to insert the
5
+ # columns from the source file, cols wich are the cols inserted from the
6
+ # source file and the lookup table with keys and associated column values.
7
+ # :call-seq:
8
+ # Sycsvpro::Joiner.new([1,2], [3,4], [4,5,6], { rows: {} }
9
+ Joiner = Struct.new(:join, :pos, :cols, :lookup)
10
+
4
11
  # Join joins two files based on a join key value.
5
12
  # Example
6
13
  # File 1 (infile)
7
14
  # |Name |ID |
15
+ # |-----|---|
8
16
  # |Hank |123|
9
17
  # |Frank|234|
10
18
  # |Mia |345|
11
19
  # |Moira|234|
12
20
  #
13
21
  # File 2 (source)
14
- # |Company|Phone|ID|
22
+ # |Company|Phone|ID |
23
+ # |-------|-----|---|
15
24
  # |Siem |4848 |123|
16
25
  # |Helo |993 |345|
17
26
  # |Wara |3333 |234|
18
27
  #
19
28
  # File 3 (outfile)
20
29
  # |Name |ID |Company|Phone|
30
+ # |-----|---|-------|-----|
21
31
  # |Hank |123|Siem |4848 |
22
32
  # |Frank|234|Wara |3333 |
23
33
  # |Mia |345|Helo |993 |
@@ -34,16 +44,12 @@ module Sycsvpro
34
44
  attr_reader :source
35
45
  # filter that is used for rows
36
46
  attr_reader :row_filter
37
- # columns to insert
38
- attr_reader :columns
39
47
  # posititon where to insert the columns into the infile
40
48
  attr_reader :positions
41
49
  # header of the outfile
42
50
  attr_reader :header
43
51
  # indicates whether the infile is headerless
44
52
  attr_reader :headerless
45
- # lookup table where the assigned values are stored at
46
- attr_reader :lookup_table
47
53
 
48
54
  # Creates a Join which can be invoked as follows
49
55
  # :call-seq:
@@ -61,8 +67,8 @@ module Sycsvpro
61
67
  # infile:: csv file to operate on
62
68
  # outfile:: csv file with the result
63
69
  # source:: csv file that contains the values to join to infile
64
- # rows: rows to consider for operation. Rows that don't match the pattern
65
- # will be skipped for operation
70
+ # rows:: rows to consider for operation. Rows that don't match the pattern
71
+ # will be skipped for operation
66
72
  # cols:: columns to insert from the source to the infile
67
73
  # pos:: column positions where to insert the values and the insert_header
68
74
  # columns
@@ -76,9 +82,9 @@ module Sycsvpro
76
82
  @outfile = options[:outfile]
77
83
  @source = options[:source]
78
84
  @row_filter = RowFilter.new(options[:rows], df: options[:df])
79
- @columns = options[:cols].split(',').collect { |c| c.to_i }
80
- @positions = col_positions(options[:pos], @columns)
81
- @joins = options[:joins].split('=').collect { |j| j.to_i }
85
+ @positions = create_joiners(options[:joins],
86
+ options[:cols],
87
+ options[:pos])
82
88
  @headerless = options[:headerless].nil? ? false : options[:headerless]
83
89
  @header = Header.new(options[:header] || '*',
84
90
  pos: @positions,
@@ -108,14 +114,17 @@ module Sycsvpro
108
114
  next if row_filter.process(line, row: index).nil?
109
115
 
110
116
  values = line.split(';')
117
+ target = values.dup
118
+
119
+ @positions.sort.each { |p| target.insert(p, "") }
120
+
121
+ @joiners.each do |joiner|
122
+ key = values[joiner.join[1]]
123
+ row = joiner.lookup[:rows][key] || []
124
+ joiner.pos.each_with_index { |p,i| target[p] = row[i] }
125
+ end
111
126
 
112
- key = values[@joins[1]]
113
- row = lookup_table[:rows][key] || []
114
-
115
- lookup_table[:pos].sort.each { |p| values.insert(p, "") }
116
- lookup_table[:pos].each_with_index { |p,i| values[p] = row[i] }
117
-
118
- out.puts values.join(';')
127
+ out.puts target.join(';')
119
128
  end
120
129
  end
121
130
  end
@@ -125,8 +134,6 @@ module Sycsvpro
125
134
  # Creates a lookup table from the source file values. The join column of
126
135
  # the source file is the key
127
136
  def create_lookup_table
128
- @lookup_table = { pos: positions, rows: {} }
129
-
130
137
  File.open(source).each_with_index do |line|
131
138
  next if line.chomp.empty?
132
139
 
@@ -134,12 +141,15 @@ module Sycsvpro
134
141
 
135
142
  next if values.empty?
136
143
 
137
- key = values[@joins[0]]
138
- lookup_table[:rows][key] = []
144
+ @joiners.each do |joiner|
145
+ key = values[joiner.join[0]]
146
+ joiner.lookup[:rows][key] = []
139
147
 
140
- columns.each do |i|
141
- lookup_table[:rows][key] << values[i]
148
+ joiner.cols.each do |i|
149
+ joiner.lookup[:rows][key] << values[i]
150
+ end
142
151
  end
152
+
143
153
  end
144
154
  end
145
155
 
@@ -148,12 +158,39 @@ module Sycsvpro
148
158
  # are put at the beginning of the row
149
159
  def col_positions(pos, cols)
150
160
  if pos.nil? || pos.empty?
151
- Array.new(cols.size) { |c| c }
161
+ pos = []
162
+ cols.each { |c| pos << Array.new(c.size) { |c| c } }
163
+ pos
152
164
  else
153
- pos.split(',').collect { |p| p.to_i }
165
+ pos.split(';').collect { |p| p.split(',').collect { |p| p.to_i } }
154
166
  end
155
167
  end
156
168
 
169
+ # Initializes joiners based on joins, positions and columns
170
+ #
171
+ # Possible input forms are:
172
+ # joins:: "4=0;4=1" or "4=1"
173
+ # positions:: "1,2;4,5" or "1,2"
174
+ # columns:: "1,2;3,4"
175
+ #
176
+ # This has the semantic of 'insert columns 1 and 2 at positions 1 and 2
177
+ # for key 0 and columns 3 and 4 at positions 4 and 5 for key 1. Key 4 is
178
+ # the corresponding value in the source file
179
+ #
180
+ # Return value:: positions where to insert values from source file
181
+ def create_joiners(j, c, p)
182
+ js = j.split(';').collect { |j| j.split('=').collect { |j| j.to_i } }
183
+ cs = c.split(';').collect { |c| c.split(',').collect { |c| c.to_i } }
184
+ ps = col_positions(p, cs)
185
+
186
+ @joiners = []
187
+ (0...js.size).each do |i|
188
+ @joiners << Joiner.new(js[i], ps[i], cs[i], { rows: { } })
189
+ end
190
+
191
+ ps.flatten
192
+ end
193
+
157
194
  end
158
195
 
159
196
  end
@@ -24,7 +24,7 @@ module Sycsvpro
24
24
  include Dsl
25
25
 
26
26
  # Regex to split parameters
27
- COL_SPLITTER = /,(?=[\w +]*:)/
27
+ COL_SPLITTER = /,(?=['\w +-]*:)/
28
28
  # infile contains the data that is operated on
29
29
  attr_reader :infile
30
30
  # outfile is the file where the result is written to
@@ -59,9 +59,9 @@ module Sycsvpro
59
59
  # df:: date format
60
60
  # nf:: number format of number values. "DE" e.g. is 1.000,00 where as
61
61
  # US is 1,000.00
62
- # pr:: precision of number values. Default 2
63
- # rows: rows to consider for operation. Rows that don't match the pattern
64
- # will be skipped for operation
62
+ # pr:: precision of number values.
63
+ # rows:: rows to consider for operation. Rows that don't match the pattern
64
+ # will be skipped for operation
65
65
  # header:: Header of the csv file
66
66
  # key:: Values located at value 0 and subsequent columns
67
67
  # cols:: Values added to columns base on a operation or assignment
@@ -75,7 +75,7 @@ module Sycsvpro
75
75
  @keys = split_by_comma_regex(options[:key]) #options[:key].split(',')
76
76
  @cols = options[:cols].split(COL_SPLITTER)
77
77
  @number_format = options[:nf] || 'EN'
78
- @precision = options[:pr] || 2
78
+ @precision = options[:pr].to_i if options[:pr]
79
79
  prepare_sum_row options[:sum]
80
80
  @rows = {}
81
81
  end
@@ -162,7 +162,7 @@ module Sycsvpro
162
162
  column = evaluate(column) if column =~ /^c\d+[=~+]/
163
163
  previous_value = row[:cols][column]
164
164
  if value = eval("#{row[:cols][column]}#{formula}")
165
- row[:cols][column] = value.round(@precision)
165
+ row[:cols][column] = @precision ? value.round(@precision) : value
166
166
  add_to_sum_row(row[:cols][column] - previous_value, column)
167
167
  end
168
168
  end
@@ -1,5 +1,5 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
  # Version number of sycsvpro
4
- VERSION = '0.1.7'
4
+ VERSION = '0.1.8'
5
5
  end
@@ -39,7 +39,9 @@ module Sycsvpro
39
39
  end
40
40
 
41
41
  it "should extract rows base on regex including commas" do
42
- extractor = Extractor.new(infile: @in_file2, outfile: @out_file, rows: "/[56789]\\d+|\\d{3,}/")
42
+ extractor = Extractor.new(infile: @in_file2,
43
+ outfile: @out_file,
44
+ rows: "/[56789]\\d+|\\d{3,}/")
43
45
 
44
46
  extractor.execute
45
47
 
@@ -6,6 +6,7 @@ module Sycsvpro
6
6
 
7
7
  before do
8
8
  @in_file = File.join(File.dirname(__FILE__), "files/persons.csv")
9
+ @in_file_2 = File.join(File.dirname(__FILE__), "files/multiple-persons.csv")
9
10
  @source_file = File.join(File.dirname(__FILE__), "files/countries.csv")
10
11
  @out_file = File.join(File.dirname(__FILE__), "files/persons-countries.csv")
11
12
  end
@@ -45,6 +46,36 @@ module Sycsvpro
45
46
 
46
47
  end
47
48
 
49
+ it "should join files inserting values on multiple positions" do
50
+ cols = "1,2;1,2"
51
+ insert_col_pos = "3,2;6,5"
52
+ insert_header = "A-COUNTRY,A-STATE;B-COUNTRY,B-STATE"
53
+ joins = "0=1;0=2"
54
+
55
+ Sycsvpro::Join.new(infile: @in_file_2,
56
+ outfile: @out_file,
57
+ source: @source_file,
58
+ cols: cols,
59
+ joins: joins,
60
+ insert_header: insert_header,
61
+ pos: insert_col_pos).execute
62
+
63
+ result = [ "Name;A_ID;A-STATE;A-COUNTRY;B_ID;B-STATE;B-COUNTRY",
64
+ "Hank;123;A4;AT;234;C3;CA",
65
+ "Frank;234;C3;CA;345;D1;DE",
66
+ "Mia;345;D1;DE;456;U2;US",
67
+ "Arwen;456;U2;US;123;A4;AT" ]
68
+
69
+ rows = 0
70
+
71
+ File.new(@out_file, 'r').each_with_index do |line, index|
72
+ expect(line.chomp).to eq result[index]
73
+ rows += 1
74
+ end
75
+
76
+ rows.should eq result.size
77
+ end
78
+
48
79
  it "should join files without explicit insert header" do
49
80
  cols = "1,2"
50
81
  insert_col_pos = "2,1"
@@ -142,6 +142,7 @@ module Sycsvpro
142
142
  "RP:+n2 if #{rp_order_type}.index(c1),"+
143
143
  "Total:+n2",
144
144
  nf: "DE",
145
+ pr: "2",
145
146
  sum: "top:SP,RP,Total").execute
146
147
 
147
148
  result = [ "Year;SP;RP;Total",
@@ -167,6 +168,7 @@ module Sycsvpro
167
168
  key: "c0=~/\\.(\\d{4})/",
168
169
  cols: "c1=~/^([A-Z]{1,2})/:+n2,Total:+n2",
169
170
  nf: "DE",
171
+ pr: 2,
170
172
  sum: "top:BEGINc1=~/^([A-Z]{1,2})/END,Total").execute
171
173
 
172
174
  result = [ "Year;ZE;ZR;Total",
@@ -210,6 +212,29 @@ module Sycsvpro
210
212
 
211
213
  end
212
214
 
215
+ it "should add a count column for the occurance of column values" do
216
+ Sycsvpro::Table.new(infile: @in_file,
217
+ outfile: @out_file,
218
+ header: "Year,c6,c1,c2+c3,c2+c3+'-Count'",
219
+ key: "c0=~/\\.(\\d{4})/,c6",
220
+ cols: "Value:+n1,c2+c3:+n1,c2+c3+'-Count':+1",
221
+ sum: "top:Value,c2+c3").execute
222
+
223
+ result = [ "Year;Country;Value;A1;B2;B4;B4-Count;B2-Count;A1-Count",
224
+ ";;95.2;41.0;21.0;33.2;;;",
225
+ "2013;AT;53.7;20.5;0;33.2;1;0;1",
226
+ "2014;DE;21.0;0;21.0;0;0;1;0",
227
+ "2014;AT;20.5;20.5;0;0;0;0;1" ]
228
+
229
+ rows = 0
230
+
231
+ File.open(@out_file).each_with_index do |line, index|
232
+ line.chomp.should eq result[index]
233
+ rows += 1
234
+ end
235
+
236
+ rows.should eq result.size
237
+ end
213
238
 
214
239
  end
215
240
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sycsvpro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-06-27 00:00:00.000000000 Z
12
+ date: 2014-06-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake