carray-io-csv 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 62bb651dde7c1b298668b3248497cae5ae1099b4f8c9a09ed8927167860ec489
4
+ data.tar.gz: 67886d1e087cd637f1d143f8fb97d68f2437f2b2076941211084981cadd6a0e0
5
+ SHA512:
6
+ metadata.gz: c332f91806db558da7678e8210ad42d1050a037f5b63f048e785f95e862b14fa7dbc4b5be0764676f2135f4ee553b4703a38d59619177b854f9ef156cc1eefc0
7
+ data.tar.gz: 62f26d4f25c025611b62e781d9456f51d71f70a1635edbb109530e0b2e3427b0aea8b1848e896fbed9b88188480b13cdd62dec0ff099e8ed41762de1ea005869
@@ -0,0 +1,11 @@
1
+ GEMSPEC = "carray-io-csv.gemspec"
2
+
3
+ task :install do
4
+ spec = eval File.read(GEMSPEC)
5
+ system %{
6
+ gem build #{GEMSPEC}; gem install #{spec.full_name}.gem
7
+ }
8
+ end
9
+
10
+ require 'rspec/core/rake_task'
11
+ RSpec::Core::RakeTask.new
@@ -0,0 +1,27 @@
1
+ Gem::Specification::new do |s|
2
+ version = "0.9.0"
3
+ files = Dir.glob("**/*") - [
4
+ Dir.glob("carray-io-csv-*.gem"),
5
+ Dir.glob("test/**/*"),
6
+ Dir.glob("work/**/*"),
7
+ ].flatten
8
+
9
+ s.platform = Gem::Platform::RUBY
10
+ s.name = "carray-io-csv"
11
+ s.summary = "CSV interface for CArray"
12
+ s.description = <<-HERE
13
+ CSV interface for CArray
14
+ HERE
15
+ s.version = version
16
+ s.licenses = ['MIT']
17
+ s.author = "Hiroki Motoyoshi"
18
+ s.email = ""
19
+ s.homepage = 'https://github.com/himotoyoshi/carray-io-csv'
20
+ s.files = files
21
+ # s.extensions = [ "ext/extconf.rb" ]
22
+ s.required_ruby_version = ">= 1.8.1"
23
+ s.add_runtime_dependency 'carray', '~> 1.3'
24
+ s.add_runtime_dependency 'rcsv', '~> 0.3.1'
25
+
26
+ end
27
+
@@ -0,0 +1,2 @@
1
+ require "carray"
2
+ require "carray-io-csv/core"
@@ -0,0 +1,597 @@
1
+ # ----------------------------------------------------------------------------
2
+ #
3
+ # carray/io/csv.rb
4
+ #
5
+ # This file is part of Ruby/CArray extension library.
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Ruby Licence.
8
+ #
9
+ # Copyright (C) 2005 Hiroki Motoyoshi
10
+ #
11
+ # ----------------------------------------------------------------------------
12
+ #
13
+ # CSV data reader/writer for CArray (in DSL approach)
14
+ #
15
+ # For reading (explicit form)
16
+ #
17
+ # csv = CA::CSVReader.new {
18
+ # header # read a line as "names" header
19
+ # header :units # read a line as "units" header
20
+ # skip 1 # skip a line
21
+ # body 10 # read values
22
+ # process { |name, column| # post processing
23
+ # case name # name : name of columns (string|integer)
24
+ # when "Date" # column : column carray
25
+ # column.map!{|x| Date.parse(x) }
26
+ # else
27
+ # column[] = column.double
28
+ # end
29
+ # }
30
+ # }
31
+ #
32
+ # data1 = csv.read_file(file1)
33
+ # data2 = csv.read_file(file2)
34
+ # data3 = csv.read_file(file3)
35
+ #
36
+ # For reading (implicit form)
37
+ #
38
+ # data = CArray.load_csv(file) { ... definitions ... }
39
+ # data = CArray.from_csv(io|string) { ... definitions ... }
40
+ #
41
+ # For writing (explict form)
42
+ #
43
+ # csv = CA::CSVWriter.new {
44
+ # names ["Date", "a", "b", "c"] # set name of columns
45
+ # process { |name, column| # pre processing
46
+ # case name # name : name of column
47
+ # when "Date" # column : column carray
48
+ # column.map!{|x| x.to_s }
49
+ # else
50
+ # column.map!{|x| "%.2f" % x }
51
+ # end
52
+ # }
53
+ # puts "sample CSV data" # print any string
54
+ # header # write names in header
55
+ # header ["","mm","cm","m"] # write units in header
56
+ # write # write values
57
+ # }
58
+ #
59
+ # data1.write_file(file1)
60
+ # data2.write_file(file2)
61
+ # data3.write_file(file3)
62
+ #
63
+ # For writing (implicit form)
64
+ #
65
+ # data.save_csv(file) { ... definitions ... }
66
+ # data.to_csv([io|string]) { ... definitions ... }
67
+ #
68
+
69
+ require "carray/io/table"
70
+ require "stringio"
71
+ require "rcsv"
72
+ require "strscan"
73
+
74
+ module CA
75
+
76
+ class CSVReader
77
+
78
+ def initialize (sep: ",", rs: $/, &block)
79
+ @sep = sep
80
+ @rs = rs
81
+ @block = block
82
+ end
83
+
84
+ def read_io (io)
85
+ return Processor.new(io, sep: @sep, rs: @rs, &@block).run
86
+ end
87
+
88
+ def read_string (string)
89
+ return read_io(StringIO.new(string))
90
+ end
91
+
92
+ def read_file (filename, encoding: nil)
93
+ File.open(filename, encoding: encoding) { |io|
94
+ return read_io(io)
95
+ }
96
+ end
97
+
98
+ class Processor
99
+
100
+ def initialize (io, sep:, rs:, &block)
101
+ @io = io
102
+ @sep = sep
103
+ @rs = rs
104
+ @block = block || proc { body }
105
+ @namelist = nil
106
+ @names = nil
107
+ @headerlist = []
108
+ @header = {}
109
+ @note = ""
110
+ @table = nil
111
+ @regexp_simple1 = /#{@sep}/
112
+ @regexp_simple2 = / *#{@sep} */
113
+ @regexp_pat1 = /\A([^"#{@sep}][^#{@sep}]*) *#{@sep} */
114
+ @regexp_pat2 = /\A"([^"]+)" *#{@sep} */
115
+ @regexp_pat3 = /\A"((?:[^"]+|"")+)" *#{@sep} */
116
+ @regexp_pat4 = /\A(?:""|) *#{@sep} */
117
+ @sc = StringScanner.new("")
118
+ end
119
+
120
+ def run
121
+ case @block.arity
122
+ when 1
123
+ @block.call(self)
124
+ when -1, 0
125
+ instance_exec(&@block)
126
+ else
127
+ raise "invalid block paramter"
128
+ end
129
+ if @header.has_key?("names")
130
+ @header["names"].each_with_index do |name, k|
131
+ if name.nil? or name.empty?
132
+ @header["names"][k] = "c#{k}"
133
+ end
134
+ end
135
+ else
136
+ @header["names"] = (0...@cols).map{|k| "c#{k}"}
137
+ end
138
+ header = @header
139
+ note = @note
140
+ @table.instance_exec{
141
+ @names = header["names"]
142
+ @header = header
143
+ @note = note
144
+ }
145
+ @table.extend(CA::TableMethods)
146
+ @table.column_names = header["names"]
147
+ class << @table
148
+ attr_reader :note
149
+ def header (name=nil)
150
+ if name
151
+ return @header[name.to_s]
152
+ else
153
+ return @header
154
+ end
155
+ end
156
+ end
157
+ return @table
158
+ end
159
+
160
+ def column_names (*namelist)
161
+ if @header.has_key?("names")
162
+ warn "override header['names']"
163
+ end
164
+ @names = namelist.map(&:to_s)
165
+ @header["names"] = @names
166
+ return namelist
167
+ end
168
+
169
+ alias columns column_names
170
+
171
+ def header (name = "names")
172
+ name = name.to_s
173
+ list = csv_feed()
174
+ if name == "names"
175
+ if @names
176
+ raise "already 'names' defined"
177
+ end
178
+ @names = list
179
+ end
180
+ @header[name] = list
181
+ @headerlist.push(name)
182
+ return list
183
+ end
184
+
185
+ attr_reader :names
186
+
187
+ def note (n=1)
188
+ list = []
189
+ n.times { list << @io.gets(@rs) }
190
+ @note << (text = list.join)
191
+ return text
192
+ end
193
+
194
+ def skip (n=1)
195
+ n.times { @io.gets(@rs) }
196
+ end
197
+
198
+ def body (n=nil, cols=nil)
199
+ data = []
200
+ count = 0
201
+ if cols
202
+ @cols = cols
203
+ elsif @names
204
+ @cols = @names.size
205
+ else
206
+ list = csv_feed()
207
+ if list.nil?
208
+ @rows = 0
209
+ @table = CArray.object(@rows, @cols)
210
+ return
211
+ end
212
+ data.push(list)
213
+ count += 1
214
+ @cols = list.size
215
+ end
216
+ if n
217
+ lsize = nil
218
+ while count < n and list = csv_feed(@cols)
219
+ lsize = list.size
220
+ if lsize == @cols
221
+ data.push(list)
222
+ elsif lsize <= @cols
223
+ record = Array.new(@cols, nil)
224
+ record[0,lsize] = list
225
+ data.push(record)
226
+ else
227
+ extra = Array.new(lsize - @cols, nil)
228
+ data.each do |row|
229
+ row.push(*extra)
230
+ end
231
+ data.push(list)
232
+ @cols = lsize
233
+ # raise "csv parse error : too large column number at line #{@io.lineno}"
234
+ end
235
+ count += 1
236
+ end
237
+ else
238
+ unless @io.eof?
239
+ data += Rcsv.parse(@io, column_separator: @sep, header: :none)
240
+ end
241
+ end
242
+ @rows = data.size
243
+ @table = CArray.object(@rows, @cols){ data }
244
+ @table[:eq,""] = nil
245
+ end
246
+
247
+ def rename (name, newname)
248
+ names = @header["names"]
249
+ i = names.index(name)
250
+ names[i] = newname
251
+ @names = @header["names"]
252
+ end
253
+
254
+ def downcase
255
+ @header["names"] = @header["names"].map(&:downcase)
256
+ @names = @header["names"]
257
+ end
258
+
259
+ def select (*namelist)
260
+ @namelist = namelist.empty? ? nil : namelist
261
+ case @namelist
262
+ when nil
263
+ when Array
264
+ index = (0...@cols).map.to_a
265
+ index_list = @namelist.map{ |x|
266
+ case x
267
+ when Integer
268
+ x
269
+ when Range
270
+ index[x]
271
+ when String, Symbol
272
+ if @names and i = @names.index(x.to_s)
273
+ i
274
+ else
275
+ raise "invalid argument #{x}"
276
+ end
277
+ else
278
+ raise "invalid argument"
279
+ end
280
+ }.flatten
281
+ @table = @table[nil, CA_INT(index_list)].to_ca
282
+ @header.keys.each do |k|
283
+ @header[k] = @header[k].values_at(*index_list)
284
+ end
285
+ @names = @header["names"]
286
+ else
287
+ raise
288
+ end
289
+ end
290
+
291
+ def process
292
+ if @namelist
293
+ @namelist.each_with_index do |name, i|
294
+ yield(name, @table[nil, i])
295
+ end
296
+ elsif @names
297
+ @names.each_with_index do |name, i|
298
+ yield(name, @table[nil, i])
299
+ end
300
+ else
301
+ @table.dim1.times do |i|
302
+ yield(i, @table[nil,i])
303
+ end
304
+ end
305
+ end
306
+
307
+ def convert (data_type, options={}, &block)
308
+ if block_given?
309
+ if data_type.is_a?(Class) and data_type < CA::Struct
310
+ @table = @table[:i, nil].convert(data_type, &block)
311
+ else
312
+ @table = @table.convert(data_type, options, &block)
313
+ end
314
+ else
315
+ if data_type.is_a?(Class) and data_type < CA::Struct
316
+ @table = @table[:i,nil].convert(data_type) { |b|
317
+ data_type.new(*b[0,nil])
318
+ }
319
+ else
320
+ @table = @table.to_type(data_type, options)
321
+ end
322
+ end
323
+ end
324
+
325
+ private
326
+
327
+ def csv_feed (cols=nil)
328
+ if @io.eof?
329
+ return nil
330
+ end
331
+ line = nil
332
+ loop do
333
+ if newline = @io.gets(@rs)
334
+ if line
335
+ line << newline
336
+ else
337
+ line = newline
338
+ end
339
+ count_quote = line.count('"')
340
+ else
341
+ line = ""
342
+ count_quote = 0
343
+ end
344
+ if count_quote == 0
345
+ line.chomp!
346
+ if line.count(' ') == 0
347
+ return line.split(@sep, -1) ### /#{@sep}/
348
+ else
349
+ return line.split(@regexp_simple2, -1) ### / *#{@sep} */
350
+ end
351
+ end
352
+ if count_quote % 2 == 0
353
+ line.chomp!
354
+ return csv_split(line, cols)
355
+ else
356
+ if newline
357
+ next
358
+ else
359
+ raise "csv parse error"
360
+ end
361
+ end
362
+ end
363
+ end
364
+
365
+ def csv_split (text, cols=nil)
366
+ if cols
367
+ csv = Array.new(cols)
368
+ else
369
+ csv = []
370
+ end
371
+ text << @sep
372
+ @sc.string = text
373
+ i = 0
374
+ begin
375
+ case
376
+ when @sc.scan(@regexp_pat1)
377
+ ### /\A([^"#{@sep}][^#{@sep}]*) *#{@sep} */
378
+ csv[i] = @sc[1]
379
+ when @sc.scan(@regexp_pat2)
380
+ ### /\A"([^"]+)" *#{@sep} */
381
+ csv[i] = @sc[1]
382
+ when @sc.scan(@regexp_pat3)
383
+ ### /\A"((?:[^"]+|"")+)" *#{@sep} */
384
+ s = @sc[1]
385
+ if s =~ /"/
386
+ csv[i] = s.gsub(/""/, '"')
387
+ else
388
+ csv[i] = s
389
+ end
390
+ when @sc.scan(@regexp_pat4)
391
+ ### /\A(?:""|) *#{@sep} */
392
+ csv[i] = nil
393
+ else
394
+ raise "csv parse error"
395
+ end
396
+ i += 1
397
+ end until @sc.eos?
398
+ return csv
399
+ end
400
+
401
+ end
402
+
403
+ end
404
+
405
+ end
406
+
407
+ module CA
408
+
409
+ class CSVWriter # :nodoc:
410
+
411
+ def initialize (sep=",", rs=$/, fill="", &block)
412
+ @block = block
413
+ @sep = sep
414
+ @rs = rs
415
+ @fill = fill
416
+ end
417
+
418
+ def write_io (table, io)
419
+ return Processor.new(table, io, @sep, @rs, @fill, &@block).run
420
+ end
421
+
422
+ def write_string (table, string)
423
+ write_io(table, StringIO.new(string))
424
+ return string
425
+ end
426
+
427
+ def write_file (table, filename, mode="w")
428
+ open(filename, mode) { |io|
429
+ return write_io(table, io)
430
+ }
431
+ end
432
+
433
+ class Processor # :nodoc:
434
+
435
+ def initialize (table, io, sep, rs, fill, &block)
436
+ @io = io
437
+ @sep = sep
438
+ @rs = rs
439
+ @fill = fill
440
+ @block = block || proc { body }
441
+ if table.has_data_class?
442
+ @names = table.members
443
+ @table = CArray.merge(CA_OBJECT, table[nil].fields)
444
+ else
445
+ @names = table.instance_exec{ @names }
446
+ if @names.nil?
447
+ @names = table.instance_exec{ @column_names }
448
+ end
449
+ case
450
+ when table.rank > 2
451
+ @table = table.reshape(false,nil).object
452
+ when table.rank == 1
453
+ @table = table[:%,1].object ### convert to CA_OBJECT
454
+ else
455
+ @table = table.object ### convert to CA_OBJECT
456
+ end
457
+ end
458
+ if @table.has_mask?
459
+ @table.unmask(@fill)
460
+ end
461
+ @regexp_simple = /#{@sep}/o
462
+ end
463
+
464
+ def csv_quote (text)
465
+ text = text.dup
466
+ if text.gsub!(/"/, '""') or text =~ @regexp_simple ### /#{@sep}|"/
467
+ text = '"' + text + '"'
468
+ end
469
+ return text
470
+ end
471
+
472
+ def run
473
+ case @block.arity
474
+ when 1
475
+ @block.call(self)
476
+ when -1, 0
477
+ instance_exec(&@block)
478
+ else
479
+ raise "invalid block parameter"
480
+ end
481
+ end
482
+
483
+ # set @names
484
+ def names (list)
485
+ @names = list
486
+ end
487
+
488
+ # puts header
489
+ def header (list = @names)
490
+ @io.write list.map{|s| csv_quote(s)}.join(@sep)
491
+ @io.write(@rs)
492
+ end
493
+
494
+ # puts any strings
495
+ def puts (*argv)
496
+ @io.print(*argv)
497
+ @io.write(@rs)
498
+ end
499
+
500
+ # write value
501
+ # If option :strict is set, do csv_quote for string element
502
+ def body (strict: true, format: nil)
503
+ if strict
504
+ case @table.data_type
505
+ when CA_OBJECT
506
+ table = @table.to_ca
507
+ table[:is_kind_of, String].map! { |s| csv_quote(s) }
508
+ when CA_FIXLEN
509
+ table = @table.object
510
+ table.map! { |s| csv_quote(s) }
511
+ else
512
+ table = @table.object
513
+ end
514
+ else
515
+ table = @table
516
+ end
517
+ if format
518
+ table.dim0.times do |i|
519
+ @io.write Kernel::format(format,*table[i,nil].to_a)
520
+ @io.write(@rs)
521
+ end
522
+ else
523
+ table.dim0.times do |i|
524
+ @io.write table[i,nil].to_a.join(@sep)
525
+ @io.write(@rs)
526
+ end
527
+ end
528
+ end
529
+
530
+ # pre processing data
531
+ def process (namelist = @names)
532
+ if namelist
533
+ namelist.each_with_index do |name, i|
534
+ yield(name, @table[nil, i])
535
+ end
536
+ else
537
+ @table.dim1.times do |i|
538
+ yield(i, @table[nil,i])
539
+ end
540
+ end
541
+ end
542
+ end
543
+ end
544
+ end
545
+
546
+ class CArray
547
+
548
+ def self.load_csv (file, sep: ",", rs: $/, encoding: nil, &block)
549
+ reader = CA::CSVReader.new(sep: sep, rs: rs, &block)
550
+ return reader.read_file(file, encoding: encoding)
551
+ end
552
+
553
+ def self.from_csv (io, sep: ",", rs: $/, &block)
554
+ reader = CA::CSVReader.new(sep: sep, rs: rs, &block)
555
+ case io
556
+ when IO, StringIO
557
+ return reader.read_io(io)
558
+ when String
559
+ return reader.read_string(io)
560
+ else
561
+ raise "invalid argument"
562
+ end
563
+ end
564
+
565
+ def save_csv (file, option = {}, rs: $/, sep: ",", fill: "", mode: "w", &block)
566
+ option = {:sep=>sep, :rs=>rs, :fill=>fill, :mode=>mode}.update(option)
567
+ writer = CA::CSVWriter.new(option[:sep], option[:rs], option[:fill], &block)
568
+ return writer.write_file(self, file, option[:mode])
569
+ end
570
+
571
+ def to_csv (io="", option ={}, rs: $/, sep: ",", fill: "", &block)
572
+ option = {:sep=>sep, :rs=>rs, :fill=>fill}.update(option)
573
+ writer = CA::CSVWriter.new(option[:sep], option[:rs], option[:fill], &block)
574
+ case io
575
+ when IO, StringIO
576
+ return writer.write_io(self, io)
577
+ when String
578
+ return writer.write_string(self, io)
579
+ end
580
+ end
581
+
582
+ def to_tabular (option = {})
583
+ option = {:sep=>" ", :names=>nil}.update(option)
584
+ if option[:names]
585
+ names = option[:names]
586
+ elsif self.respond_to?(:names)
587
+ names = self.names
588
+ end
589
+ sep = option[:sep]
590
+ data = self.to_ca.map! {|s| s.to_s }
591
+ table = CArray.join([names.to_ca], [data])
592
+ length = table.convert{|s| s.length}.max(0)
593
+ table.map_with_index! {|s, idx| s.rjust(length[idx[1]]) }.to_csv.gsub(/,/,sep)
594
+ end
595
+
596
+ end
597
+
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: carray-io-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.0
5
+ platform: ruby
6
+ authors:
7
+ - Hiroki Motoyoshi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-06-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: carray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rcsv
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.3.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.3.1
41
+ description: " CSV interface for CArray\n"
42
+ email: ''
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - Rakefile
48
+ - carray-io-csv.gemspec
49
+ - lib/carray-io-csv.rb
50
+ - lib/carray-io-csv/core.rb
51
+ homepage: https://github.com/himotoyoshi/carray-io-csv
52
+ licenses:
53
+ - MIT
54
+ metadata: {}
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 1.8.1
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubyforge_project:
71
+ rubygems_version: 2.7.7
72
+ signing_key:
73
+ specification_version: 4
74
+ summary: CSV interface for CArray
75
+ test_files: []