sycsvpro 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sycsvpro (0.1.7)
4
+ sycsvpro (0.1.8)
5
5
  gli (= 2.9.0)
6
6
  timeleap (~> 0.0.1)
7
7
 
data/README.md CHANGED
@@ -212,6 +212,15 @@ chiro;CA;R4;c1;con333;dri110;mot100;1.10.3011;1;122.15;456
212
212
  chiro;CA;R4;c2;con331;dri100;mot130;3.05.3010;1;25.3;456
213
213
  ```
214
214
 
215
+ If you have multiple IDs in a row than you can also conduct multiple joins in
216
+ on streak.
217
+
218
+ $ sycsvpro -f in.csv -o out.csv join address.csv -c 0,1;0,3
219
+ -p 2,1;4,5
220
+ -i "COUNTRY,REGION"
221
+ -j "3=8;3=10"
222
+
223
+
215
224
  Sort
216
225
  ----
217
226
  Sort rows on specified columns as an example sort rows based on customer
@@ -421,6 +430,10 @@ Version 0.1.7
421
430
  (-h "*") supplemented by the columns A and B (-i "A,B") that will also be
422
431
  positioned at column 1 and 3 (-p "1,3").
423
432
 
433
+ Version 0.1.8
434
+ -------------
435
+ * Join now can join multiple key values in 1 streak
436
+
424
437
  Installation
425
438
  ============
426
439
  [![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)
data/bin/sycsvpro CHANGED
@@ -227,9 +227,10 @@ command :execute do |c|
227
227
  end
228
228
  end
229
229
 
230
- desc 'Counts the occurences of column values. Uses column values as headings with count as ' +
231
- 'values. Columns with a condition will be added as new columns and the condition will ' +
232
- 'be set as column name. Optionally adds a sum row'
230
+ desc 'Counts the occurences of column values. Uses column values as headings '+
231
+ 'with count as values. Columns with a condition will be added as new '+
232
+ 'columns and the condition will be set as column name. Optionally adds a '+
233
+ 'sum row'
233
234
 
234
235
  command :count do |c|
235
236
 
@@ -284,8 +285,9 @@ command :aggregate do |c|
284
285
  c.arg_name '1,2-4'
285
286
  c.flag [:c, :col], :must_match => /^\d+(?:,\d+|-\d+)*/
286
287
 
287
- c.desc 'Adds a sum row and a sum column with TITLE for the counted columns. The sum row is ' +
288
- 'specified by the row position. The sum column is the last column in the row'
288
+ c.desc 'Adds a sum row and a sum column with TITLE for the counted columns. '+
289
+ 'The sum row is specified by the row position. The sum column is the '+
290
+ 'last column in the row'
289
291
  c.arg_name 'SUM_ROW_TITLE:ROW,SUM_COL_TITLE'
290
292
  c.flag [:s, :sum], :must_match => /^\w+:\d+(?:,\w+)?|^\w+/
291
293
 
@@ -307,7 +309,10 @@ command :aggregate do |c|
307
309
 
308
310
  end
309
311
 
310
- desc 'Creates a table from a source file'
312
+ desc 'Associates columns to a key value. A key value can be a combination of '+
313
+ 'multiple column values. Values associated can be generated from an '+
314
+ 'arithmetic or string operation. Header columns can be generated '+
315
+ 'dynamically based on column values'
311
316
 
312
317
  command :table do |c|
313
318
 
@@ -371,18 +376,18 @@ command :join do |c|
371
376
 
372
377
  c.desc 'Columns to merge into the infile'
373
378
  c.arg_name '1,5,7'
374
- c.flag [:c, :cols], :must_match => /^\d+(?:,\d+)*/
379
+ c.flag [:c, :cols], :must_match => /^\d+(?:[,;]\d+)*/
375
380
 
376
381
  c.desc 'The position at which column position to insert the columns within '+
377
382
  'the infile. The sequence of the position is assigned to the columns '+
378
383
  'to be inserted'
379
384
  c.arg_name '5,1'
380
- c.flag [:p, :pos], :must_match => /^\d+(?:,\d+)*/
385
+ c.flag [:p, :pos], :must_match => /^\d+(?:[,;]\d+)*/
381
386
 
382
387
  c.desc 'The join columns in the source file, which contains the columns to '+
383
388
  'be inserted into the infile'
384
389
  c.arg_name '2=1'
385
- c.flag [:j, :join], :must_match => /^\d+=\d+$/
390
+ c.flag [:j, :join], :must_match => /^\d+(?:[=;]\d+)*/
386
391
 
387
392
  c.desc 'Indicates whether the infile headerless'
388
393
  c.default_value false
@@ -416,7 +421,8 @@ command :join do |c|
416
421
  end
417
422
  end
418
423
 
419
- desc 'Sort rows based on column values'
424
+ desc 'Sort rows based on column values. It is possible to sort on multiple '+
425
+ 'columns'
420
426
  command :sort do |c|
421
427
  c.desc 'Rows to consider'
422
428
  c.arg_name '1,2,10-30,45-EOF,REGEXP'
@@ -457,7 +463,9 @@ command :sort do |c|
457
463
  end
458
464
  end
459
465
 
460
- desc 'Inserts rows from a file to a csv-file'
466
+ desc 'Inserts rows from a file to a csv-file. You can for instance add sum '+
467
+ 'operations in Excel or LibreOffice style if you want to process the '+
468
+ 'resulting file in Excel or LibreOffice'
461
469
  arg_name 'INSERT_FILE'
462
470
  command :insert do |c|
463
471
 
@@ -486,7 +494,7 @@ arg_name 'MAPPINGS-FILE'
486
494
  command :map do |c|
487
495
  c.desc 'Rows to consider'
488
496
  c.arg_name 'ROW1,ROW2,ROW10-ROW30,45-EOF,REGEXP'
489
- c.flag [:r, :row], :must_match => row_regex #/\d+(?:,\d+|-\d+|-eof|,\/.*\/)*|\/.*\/(?:,\/.*\/|\d+)*/i
497
+ c.flag [:r, :row], :must_match => row_regex
490
498
 
491
499
  c.desc 'Columns to consider for mapping'
492
500
  c.arg_name 'COL1,COL2,COL10-COL30'
@@ -648,7 +656,8 @@ on_error do |exception|
648
656
  true
649
657
  end
650
658
 
651
- # the -r flag can take a EOF value which is replaced by the actual row value of the input file
659
+ # the -r flag can take a EOF value which is replaced by the actual row value
660
+ # of the input file
652
661
  def set_max_row(options, max_row)
653
662
  options.each do |option, value|
654
663
  case option
@@ -19,7 +19,7 @@ module Sycsvpro
19
19
  # Create a new header
20
20
  def initialize(header, options = {})
21
21
  @header_cols = split_by_comma_regex(header || "")
22
- @insert_cols = (options[:insert] || "").split(',')
22
+ @insert_cols = (options[:insert] || "").split(/,|;/)
23
23
  @positions = options[:pos] || []
24
24
  end
25
25
 
data/lib/sycsvpro/join.rb CHANGED
@@ -1,23 +1,33 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
 
4
+ # Joiner holds all join data as join columns, positions where to insert the
5
+ # columns from the source file, cols wich are the cols inserted from the
6
+ # source file and the lookup table with keys and associated column values.
7
+ # :call-seq:
8
+ # Sycsvpro::Joiner.new([1,2], [3,4], [4,5,6], { rows: {} }
9
+ Joiner = Struct.new(:join, :pos, :cols, :lookup)
10
+
4
11
  # Join joins two files based on a join key value.
5
12
  # Example
6
13
  # File 1 (infile)
7
14
  # |Name |ID |
15
+ # |-----|---|
8
16
  # |Hank |123|
9
17
  # |Frank|234|
10
18
  # |Mia |345|
11
19
  # |Moira|234|
12
20
  #
13
21
  # File 2 (source)
14
- # |Company|Phone|ID|
22
+ # |Company|Phone|ID |
23
+ # |-------|-----|---|
15
24
  # |Siem |4848 |123|
16
25
  # |Helo |993 |345|
17
26
  # |Wara |3333 |234|
18
27
  #
19
28
  # File 3 (outfile)
20
29
  # |Name |ID |Company|Phone|
30
+ # |-----|---|-------|-----|
21
31
  # |Hank |123|Siem |4848 |
22
32
  # |Frank|234|Wara |3333 |
23
33
  # |Mia |345|Helo |993 |
@@ -34,16 +44,12 @@ module Sycsvpro
34
44
  attr_reader :source
35
45
  # filter that is used for rows
36
46
  attr_reader :row_filter
37
- # columns to insert
38
- attr_reader :columns
39
47
  # posititon where to insert the columns into the infile
40
48
  attr_reader :positions
41
49
  # header of the outfile
42
50
  attr_reader :header
43
51
  # indicates whether the infile is headerless
44
52
  attr_reader :headerless
45
- # lookup table where the assigned values are stored at
46
- attr_reader :lookup_table
47
53
 
48
54
  # Creates a Join which can be invoked as follows
49
55
  # :call-seq:
@@ -61,8 +67,8 @@ module Sycsvpro
61
67
  # infile:: csv file to operate on
62
68
  # outfile:: csv file with the result
63
69
  # source:: csv file that contains the values to join to infile
64
- # rows: rows to consider for operation. Rows that don't match the pattern
65
- # will be skipped for operation
70
+ # rows:: rows to consider for operation. Rows that don't match the pattern
71
+ # will be skipped for operation
66
72
  # cols:: columns to insert from the source to the infile
67
73
  # pos:: column positions where to insert the values and the insert_header
68
74
  # columns
@@ -76,9 +82,9 @@ module Sycsvpro
76
82
  @outfile = options[:outfile]
77
83
  @source = options[:source]
78
84
  @row_filter = RowFilter.new(options[:rows], df: options[:df])
79
- @columns = options[:cols].split(',').collect { |c| c.to_i }
80
- @positions = col_positions(options[:pos], @columns)
81
- @joins = options[:joins].split('=').collect { |j| j.to_i }
85
+ @positions = create_joiners(options[:joins],
86
+ options[:cols],
87
+ options[:pos])
82
88
  @headerless = options[:headerless].nil? ? false : options[:headerless]
83
89
  @header = Header.new(options[:header] || '*',
84
90
  pos: @positions,
@@ -108,14 +114,17 @@ module Sycsvpro
108
114
  next if row_filter.process(line, row: index).nil?
109
115
 
110
116
  values = line.split(';')
117
+ target = values.dup
118
+
119
+ @positions.sort.each { |p| target.insert(p, "") }
120
+
121
+ @joiners.each do |joiner|
122
+ key = values[joiner.join[1]]
123
+ row = joiner.lookup[:rows][key] || []
124
+ joiner.pos.each_with_index { |p,i| target[p] = row[i] }
125
+ end
111
126
 
112
- key = values[@joins[1]]
113
- row = lookup_table[:rows][key] || []
114
-
115
- lookup_table[:pos].sort.each { |p| values.insert(p, "") }
116
- lookup_table[:pos].each_with_index { |p,i| values[p] = row[i] }
117
-
118
- out.puts values.join(';')
127
+ out.puts target.join(';')
119
128
  end
120
129
  end
121
130
  end
@@ -125,8 +134,6 @@ module Sycsvpro
125
134
  # Creates a lookup table from the source file values. The join column of
126
135
  # the source file is the key
127
136
  def create_lookup_table
128
- @lookup_table = { pos: positions, rows: {} }
129
-
130
137
  File.open(source).each_with_index do |line|
131
138
  next if line.chomp.empty?
132
139
 
@@ -134,12 +141,15 @@ module Sycsvpro
134
141
 
135
142
  next if values.empty?
136
143
 
137
- key = values[@joins[0]]
138
- lookup_table[:rows][key] = []
144
+ @joiners.each do |joiner|
145
+ key = values[joiner.join[0]]
146
+ joiner.lookup[:rows][key] = []
139
147
 
140
- columns.each do |i|
141
- lookup_table[:rows][key] << values[i]
148
+ joiner.cols.each do |i|
149
+ joiner.lookup[:rows][key] << values[i]
150
+ end
142
151
  end
152
+
143
153
  end
144
154
  end
145
155
 
@@ -148,12 +158,39 @@ module Sycsvpro
148
158
  # are put at the beginning of the row
149
159
  def col_positions(pos, cols)
150
160
  if pos.nil? || pos.empty?
151
- Array.new(cols.size) { |c| c }
161
+ pos = []
162
+ cols.each { |c| pos << Array.new(c.size) { |c| c } }
163
+ pos
152
164
  else
153
- pos.split(',').collect { |p| p.to_i }
165
+ pos.split(';').collect { |p| p.split(',').collect { |p| p.to_i } }
154
166
  end
155
167
  end
156
168
 
169
+ # Initializes joiners based on joins, positions and columns
170
+ #
171
+ # Possible input forms are:
172
+ # joins:: "4=0;4=1" or "4=1"
173
+ # positions:: "1,2;4,5" or "1,2"
174
+ # columns:: "1,2;3,4"
175
+ #
176
+ # This has the semantic of 'insert columns 1 and 2 at positions 1 and 2
177
+ # for key 0 and columns 3 and 4 at positions 4 and 5 for key 1. Key 4 is
178
+ # the corresponding value in the source file
179
+ #
180
+ # Return value:: positions where to insert values from source file
181
+ def create_joiners(j, c, p)
182
+ js = j.split(';').collect { |j| j.split('=').collect { |j| j.to_i } }
183
+ cs = c.split(';').collect { |c| c.split(',').collect { |c| c.to_i } }
184
+ ps = col_positions(p, cs)
185
+
186
+ @joiners = []
187
+ (0...js.size).each do |i|
188
+ @joiners << Joiner.new(js[i], ps[i], cs[i], { rows: { } })
189
+ end
190
+
191
+ ps.flatten
192
+ end
193
+
157
194
  end
158
195
 
159
196
  end
@@ -24,7 +24,7 @@ module Sycsvpro
24
24
  include Dsl
25
25
 
26
26
  # Regex to split parameters
27
- COL_SPLITTER = /,(?=[\w +]*:)/
27
+ COL_SPLITTER = /,(?=['\w +-]*:)/
28
28
  # infile contains the data that is operated on
29
29
  attr_reader :infile
30
30
  # outfile is the file where the result is written to
@@ -59,9 +59,9 @@ module Sycsvpro
59
59
  # df:: date format
60
60
  # nf:: number format of number values. "DE" e.g. is 1.000,00 where as
61
61
  # US is 1,000.00
62
- # pr:: precision of number values. Default 2
63
- # rows: rows to consider for operation. Rows that don't match the pattern
64
- # will be skipped for operation
62
+ # pr:: precision of number values.
63
+ # rows:: rows to consider for operation. Rows that don't match the pattern
64
+ # will be skipped for operation
65
65
  # header:: Header of the csv file
66
66
  # key:: Values located at value 0 and subsequent columns
67
67
  # cols:: Values added to columns base on a operation or assignment
@@ -75,7 +75,7 @@ module Sycsvpro
75
75
  @keys = split_by_comma_regex(options[:key]) #options[:key].split(',')
76
76
  @cols = options[:cols].split(COL_SPLITTER)
77
77
  @number_format = options[:nf] || 'EN'
78
- @precision = options[:pr] || 2
78
+ @precision = options[:pr].to_i if options[:pr]
79
79
  prepare_sum_row options[:sum]
80
80
  @rows = {}
81
81
  end
@@ -162,7 +162,7 @@ module Sycsvpro
162
162
  column = evaluate(column) if column =~ /^c\d+[=~+]/
163
163
  previous_value = row[:cols][column]
164
164
  if value = eval("#{row[:cols][column]}#{formula}")
165
- row[:cols][column] = value.round(@precision)
165
+ row[:cols][column] = @precision ? value.round(@precision) : value
166
166
  add_to_sum_row(row[:cols][column] - previous_value, column)
167
167
  end
168
168
  end
@@ -1,5 +1,5 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
  # Version number of sycsvpro
4
- VERSION = '0.1.7'
4
+ VERSION = '0.1.8'
5
5
  end
@@ -39,7 +39,9 @@ module Sycsvpro
39
39
  end
40
40
 
41
41
  it "should extract rows base on regex including commas" do
42
- extractor = Extractor.new(infile: @in_file2, outfile: @out_file, rows: "/[56789]\\d+|\\d{3,}/")
42
+ extractor = Extractor.new(infile: @in_file2,
43
+ outfile: @out_file,
44
+ rows: "/[56789]\\d+|\\d{3,}/")
43
45
 
44
46
  extractor.execute
45
47
 
@@ -6,6 +6,7 @@ module Sycsvpro
6
6
 
7
7
  before do
8
8
  @in_file = File.join(File.dirname(__FILE__), "files/persons.csv")
9
+ @in_file_2 = File.join(File.dirname(__FILE__), "files/multiple-persons.csv")
9
10
  @source_file = File.join(File.dirname(__FILE__), "files/countries.csv")
10
11
  @out_file = File.join(File.dirname(__FILE__), "files/persons-countries.csv")
11
12
  end
@@ -45,6 +46,36 @@ module Sycsvpro
45
46
 
46
47
  end
47
48
 
49
+ it "should join files inserting values on multiple positions" do
50
+ cols = "1,2;1,2"
51
+ insert_col_pos = "3,2;6,5"
52
+ insert_header = "A-COUNTRY,A-STATE;B-COUNTRY,B-STATE"
53
+ joins = "0=1;0=2"
54
+
55
+ Sycsvpro::Join.new(infile: @in_file_2,
56
+ outfile: @out_file,
57
+ source: @source_file,
58
+ cols: cols,
59
+ joins: joins,
60
+ insert_header: insert_header,
61
+ pos: insert_col_pos).execute
62
+
63
+ result = [ "Name;A_ID;A-STATE;A-COUNTRY;B_ID;B-STATE;B-COUNTRY",
64
+ "Hank;123;A4;AT;234;C3;CA",
65
+ "Frank;234;C3;CA;345;D1;DE",
66
+ "Mia;345;D1;DE;456;U2;US",
67
+ "Arwen;456;U2;US;123;A4;AT" ]
68
+
69
+ rows = 0
70
+
71
+ File.new(@out_file, 'r').each_with_index do |line, index|
72
+ expect(line.chomp).to eq result[index]
73
+ rows += 1
74
+ end
75
+
76
+ rows.should eq result.size
77
+ end
78
+
48
79
  it "should join files without explicit insert header" do
49
80
  cols = "1,2"
50
81
  insert_col_pos = "2,1"
@@ -142,6 +142,7 @@ module Sycsvpro
142
142
  "RP:+n2 if #{rp_order_type}.index(c1),"+
143
143
  "Total:+n2",
144
144
  nf: "DE",
145
+ pr: "2",
145
146
  sum: "top:SP,RP,Total").execute
146
147
 
147
148
  result = [ "Year;SP;RP;Total",
@@ -167,6 +168,7 @@ module Sycsvpro
167
168
  key: "c0=~/\\.(\\d{4})/",
168
169
  cols: "c1=~/^([A-Z]{1,2})/:+n2,Total:+n2",
169
170
  nf: "DE",
171
+ pr: 2,
170
172
  sum: "top:BEGINc1=~/^([A-Z]{1,2})/END,Total").execute
171
173
 
172
174
  result = [ "Year;ZE;ZR;Total",
@@ -210,6 +212,29 @@ module Sycsvpro
210
212
 
211
213
  end
212
214
 
215
+ it "should add a count column for the occurance of column values" do
216
+ Sycsvpro::Table.new(infile: @in_file,
217
+ outfile: @out_file,
218
+ header: "Year,c6,c1,c2+c3,c2+c3+'-Count'",
219
+ key: "c0=~/\\.(\\d{4})/,c6",
220
+ cols: "Value:+n1,c2+c3:+n1,c2+c3+'-Count':+1",
221
+ sum: "top:Value,c2+c3").execute
222
+
223
+ result = [ "Year;Country;Value;A1;B2;B4;B4-Count;B2-Count;A1-Count",
224
+ ";;95.2;41.0;21.0;33.2;;;",
225
+ "2013;AT;53.7;20.5;0;33.2;1;0;1",
226
+ "2014;DE;21.0;0;21.0;0;0;1;0",
227
+ "2014;AT;20.5;20.5;0;0;0;0;1" ]
228
+
229
+ rows = 0
230
+
231
+ File.open(@out_file).each_with_index do |line, index|
232
+ line.chomp.should eq result[index]
233
+ rows += 1
234
+ end
235
+
236
+ rows.should eq result.size
237
+ end
213
238
 
214
239
  end
215
240
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sycsvpro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-06-27 00:00:00.000000000 Z
12
+ date: 2014-06-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake