carray-io-csv 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 62bb651dde7c1b298668b3248497cae5ae1099b4f8c9a09ed8927167860ec489
4
+ data.tar.gz: 67886d1e087cd637f1d143f8fb97d68f2437f2b2076941211084981cadd6a0e0
5
+ SHA512:
6
+ metadata.gz: c332f91806db558da7678e8210ad42d1050a037f5b63f048e785f95e862b14fa7dbc4b5be0764676f2135f4ee553b4703a38d59619177b854f9ef156cc1eefc0
7
+ data.tar.gz: 62f26d4f25c025611b62e781d9456f51d71f70a1635edbb109530e0b2e3427b0aea8b1848e896fbed9b88188480b13cdd62dec0ff099e8ed41762de1ea005869
@@ -0,0 +1,11 @@
1
+ GEMSPEC = "carray-io-csv.gemspec"
2
+
3
+ task :install do
4
+ spec = eval File.read(GEMSPEC)
5
+ system %{
6
+ gem build #{GEMSPEC}; gem install #{spec.full_name}.gem
7
+ }
8
+ end
9
+
10
+ require 'rspec/core/rake_task'
11
+ RSpec::Core::RakeTask.new
@@ -0,0 +1,27 @@
1
+ Gem::Specification::new do |s|
2
+ version = "0.9.0"
3
+ files = Dir.glob("**/*") - [
4
+ Dir.glob("carray-io-csv-*.gem"),
5
+ Dir.glob("test/**/*"),
6
+ Dir.glob("work/**/*"),
7
+ ].flatten
8
+
9
+ s.platform = Gem::Platform::RUBY
10
+ s.name = "carray-io-csv"
11
+ s.summary = "CSV interface for CArray"
12
+ s.description = <<-HERE
13
+ CSV interface for CArray
14
+ HERE
15
+ s.version = version
16
+ s.licenses = ['MIT']
17
+ s.author = "Hiroki Motoyoshi"
18
+ s.email = ""
19
+ s.homepage = 'https://github.com/himotoyoshi/carray-io-csv'
20
+ s.files = files
21
+ # s.extensions = [ "ext/extconf.rb" ]
22
+ s.required_ruby_version = ">= 1.8.1"
23
+ s.add_runtime_dependency 'carray', '~> 1.3'
24
+ s.add_runtime_dependency 'rcsv', '~> 0.3.1'
25
+
26
+ end
27
+
@@ -0,0 +1,2 @@
1
+ require "carray"
2
+ require "carray-io-csv/core"
@@ -0,0 +1,597 @@
1
+ # ----------------------------------------------------------------------------
2
+ #
3
+ # carray/io/csv.rb
4
+ #
5
+ # This file is part of Ruby/CArray extension library.
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Ruby Licence.
8
+ #
9
+ # Copyright (C) 2005 Hiroki Motoyoshi
10
+ #
11
+ # ----------------------------------------------------------------------------
12
+ #
13
+ # CSV data reader/writer for CArray (in DSL approach)
14
+ #
15
+ # For reading (explicit form)
16
+ #
17
+ # csv = CA::CSVReader.new {
18
+ # header # read a line as "names" header
19
+ # header :units # read a line as "units" header
20
+ # skip 1 # skip a line
21
+ # body 10 # read values
22
+ # process { |name, column| # post processing
23
+ # case name # name : name of columns (string|integer)
24
+ # when "Date" # column : column carray
25
+ # column.map!{|x| Date.parse(x) }
26
+ # else
27
+ # column[] = column.double
28
+ # end
29
+ # }
30
+ # }
31
+ #
32
+ # data1 = csv.read_file(file1)
33
+ # data2 = csv.read_file(file2)
34
+ # data3 = csv.read_file(file3)
35
+ #
36
+ # For reading (implicit form)
37
+ #
38
+ # data = CArray.load_csv(file) { ... definitions ... }
39
+ # data = CArray.from_csv(io|string) { ... definitions ... }
40
+ #
41
+ # For writing (explict form)
42
+ #
43
+ # csv = CA::CSVWriter.new {
44
+ # names ["Date", "a", "b", "c"] # set name of columns
45
+ # process { |name, column| # pre processing
46
+ # case name # name : name of column
47
+ # when "Date" # column : column carray
48
+ # column.map!{|x| x.to_s }
49
+ # else
50
+ # column.map!{|x| "%.2f" % x }
51
+ # end
52
+ # }
53
+ # puts "sample CSV data" # print any string
54
+ # header # write names in header
55
+ # header ["","mm","cm","m"] # write units in header
56
+ # write # write values
57
+ # }
58
+ #
59
+ # data1.write_file(file1)
60
+ # data2.write_file(file2)
61
+ # data3.write_file(file3)
62
+ #
63
+ # For writing (implicit form)
64
+ #
65
+ # data.save_csv(file) { ... definitions ... }
66
+ # data.to_csv([io|string]) { ... definitions ... }
67
+ #
68
+
69
+ require "carray/io/table"
70
+ require "stringio"
71
+ require "rcsv"
72
+ require "strscan"
73
+
74
+ module CA
75
+
76
+ class CSVReader
77
+
78
+ def initialize (sep: ",", rs: $/, &block)
79
+ @sep = sep
80
+ @rs = rs
81
+ @block = block
82
+ end
83
+
84
+ def read_io (io)
85
+ return Processor.new(io, sep: @sep, rs: @rs, &@block).run
86
+ end
87
+
88
+ def read_string (string)
89
+ return read_io(StringIO.new(string))
90
+ end
91
+
92
+ def read_file (filename, encoding: nil)
93
+ File.open(filename, encoding: encoding) { |io|
94
+ return read_io(io)
95
+ }
96
+ end
97
+
98
+ class Processor
99
+
100
+ def initialize (io, sep:, rs:, &block)
101
+ @io = io
102
+ @sep = sep
103
+ @rs = rs
104
+ @block = block || proc { body }
105
+ @namelist = nil
106
+ @names = nil
107
+ @headerlist = []
108
+ @header = {}
109
+ @note = ""
110
+ @table = nil
111
+ @regexp_simple1 = /#{@sep}/
112
+ @regexp_simple2 = / *#{@sep} */
113
+ @regexp_pat1 = /\A([^"#{@sep}][^#{@sep}]*) *#{@sep} */
114
+ @regexp_pat2 = /\A"([^"]+)" *#{@sep} */
115
+ @regexp_pat3 = /\A"((?:[^"]+|"")+)" *#{@sep} */
116
+ @regexp_pat4 = /\A(?:""|) *#{@sep} */
117
+ @sc = StringScanner.new("")
118
+ end
119
+
120
+ def run
121
+ case @block.arity
122
+ when 1
123
+ @block.call(self)
124
+ when -1, 0
125
+ instance_exec(&@block)
126
+ else
127
+ raise "invalid block paramter"
128
+ end
129
+ if @header.has_key?("names")
130
+ @header["names"].each_with_index do |name, k|
131
+ if name.nil? or name.empty?
132
+ @header["names"][k] = "c#{k}"
133
+ end
134
+ end
135
+ else
136
+ @header["names"] = (0...@cols).map{|k| "c#{k}"}
137
+ end
138
+ header = @header
139
+ note = @note
140
+ @table.instance_exec{
141
+ @names = header["names"]
142
+ @header = header
143
+ @note = note
144
+ }
145
+ @table.extend(CA::TableMethods)
146
+ @table.column_names = header["names"]
147
+ class << @table
148
+ attr_reader :note
149
+ def header (name=nil)
150
+ if name
151
+ return @header[name.to_s]
152
+ else
153
+ return @header
154
+ end
155
+ end
156
+ end
157
+ return @table
158
+ end
159
+
160
+ def column_names (*namelist)
161
+ if @header.has_key?("names")
162
+ warn "override header['names']"
163
+ end
164
+ @names = namelist.map(&:to_s)
165
+ @header["names"] = @names
166
+ return namelist
167
+ end
168
+
169
+ alias columns column_names
170
+
171
+ def header (name = "names")
172
+ name = name.to_s
173
+ list = csv_feed()
174
+ if name == "names"
175
+ if @names
176
+ raise "already 'names' defined"
177
+ end
178
+ @names = list
179
+ end
180
+ @header[name] = list
181
+ @headerlist.push(name)
182
+ return list
183
+ end
184
+
185
+ attr_reader :names
186
+
187
+ def note (n=1)
188
+ list = []
189
+ n.times { list << @io.gets(@rs) }
190
+ @note << (text = list.join)
191
+ return text
192
+ end
193
+
194
+ def skip (n=1)
195
+ n.times { @io.gets(@rs) }
196
+ end
197
+
198
+ def body (n=nil, cols=nil)
199
+ data = []
200
+ count = 0
201
+ if cols
202
+ @cols = cols
203
+ elsif @names
204
+ @cols = @names.size
205
+ else
206
+ list = csv_feed()
207
+ if list.nil?
208
+ @rows = 0
209
+ @table = CArray.object(@rows, @cols)
210
+ return
211
+ end
212
+ data.push(list)
213
+ count += 1
214
+ @cols = list.size
215
+ end
216
+ if n
217
+ lsize = nil
218
+ while count < n and list = csv_feed(@cols)
219
+ lsize = list.size
220
+ if lsize == @cols
221
+ data.push(list)
222
+ elsif lsize <= @cols
223
+ record = Array.new(@cols, nil)
224
+ record[0,lsize] = list
225
+ data.push(record)
226
+ else
227
+ extra = Array.new(lsize - @cols, nil)
228
+ data.each do |row|
229
+ row.push(*extra)
230
+ end
231
+ data.push(list)
232
+ @cols = lsize
233
+ # raise "csv parse error : too large column number at line #{@io.lineno}"
234
+ end
235
+ count += 1
236
+ end
237
+ else
238
+ unless @io.eof?
239
+ data += Rcsv.parse(@io, column_separator: @sep, header: :none)
240
+ end
241
+ end
242
+ @rows = data.size
243
+ @table = CArray.object(@rows, @cols){ data }
244
+ @table[:eq,""] = nil
245
+ end
246
+
247
+ def rename (name, newname)
248
+ names = @header["names"]
249
+ i = names.index(name)
250
+ names[i] = newname
251
+ @names = @header["names"]
252
+ end
253
+
254
+ def downcase
255
+ @header["names"] = @header["names"].map(&:downcase)
256
+ @names = @header["names"]
257
+ end
258
+
259
+ def select (*namelist)
260
+ @namelist = namelist.empty? ? nil : namelist
261
+ case @namelist
262
+ when nil
263
+ when Array
264
+ index = (0...@cols).map.to_a
265
+ index_list = @namelist.map{ |x|
266
+ case x
267
+ when Integer
268
+ x
269
+ when Range
270
+ index[x]
271
+ when String, Symbol
272
+ if @names and i = @names.index(x.to_s)
273
+ i
274
+ else
275
+ raise "invalid argument #{x}"
276
+ end
277
+ else
278
+ raise "invalid argument"
279
+ end
280
+ }.flatten
281
+ @table = @table[nil, CA_INT(index_list)].to_ca
282
+ @header.keys.each do |k|
283
+ @header[k] = @header[k].values_at(*index_list)
284
+ end
285
+ @names = @header["names"]
286
+ else
287
+ raise
288
+ end
289
+ end
290
+
291
+ def process
292
+ if @namelist
293
+ @namelist.each_with_index do |name, i|
294
+ yield(name, @table[nil, i])
295
+ end
296
+ elsif @names
297
+ @names.each_with_index do |name, i|
298
+ yield(name, @table[nil, i])
299
+ end
300
+ else
301
+ @table.dim1.times do |i|
302
+ yield(i, @table[nil,i])
303
+ end
304
+ end
305
+ end
306
+
307
+ def convert (data_type, options={}, &block)
308
+ if block_given?
309
+ if data_type.is_a?(Class) and data_type < CA::Struct
310
+ @table = @table[:i, nil].convert(data_type, &block)
311
+ else
312
+ @table = @table.convert(data_type, options, &block)
313
+ end
314
+ else
315
+ if data_type.is_a?(Class) and data_type < CA::Struct
316
+ @table = @table[:i,nil].convert(data_type) { |b|
317
+ data_type.new(*b[0,nil])
318
+ }
319
+ else
320
+ @table = @table.to_type(data_type, options)
321
+ end
322
+ end
323
+ end
324
+
325
+ private
326
+
327
+ def csv_feed (cols=nil)
328
+ if @io.eof?
329
+ return nil
330
+ end
331
+ line = nil
332
+ loop do
333
+ if newline = @io.gets(@rs)
334
+ if line
335
+ line << newline
336
+ else
337
+ line = newline
338
+ end
339
+ count_quote = line.count('"')
340
+ else
341
+ line = ""
342
+ count_quote = 0
343
+ end
344
+ if count_quote == 0
345
+ line.chomp!
346
+ if line.count(' ') == 0
347
+ return line.split(@sep, -1) ### /#{@sep}/
348
+ else
349
+ return line.split(@regexp_simple2, -1) ### / *#{@sep} */
350
+ end
351
+ end
352
+ if count_quote % 2 == 0
353
+ line.chomp!
354
+ return csv_split(line, cols)
355
+ else
356
+ if newline
357
+ next
358
+ else
359
+ raise "csv parse error"
360
+ end
361
+ end
362
+ end
363
+ end
364
+
365
+ def csv_split (text, cols=nil)
366
+ if cols
367
+ csv = Array.new(cols)
368
+ else
369
+ csv = []
370
+ end
371
+ text << @sep
372
+ @sc.string = text
373
+ i = 0
374
+ begin
375
+ case
376
+ when @sc.scan(@regexp_pat1)
377
+ ### /\A([^"#{@sep}][^#{@sep}]*) *#{@sep} */
378
+ csv[i] = @sc[1]
379
+ when @sc.scan(@regexp_pat2)
380
+ ### /\A"([^"]+)" *#{@sep} */
381
+ csv[i] = @sc[1]
382
+ when @sc.scan(@regexp_pat3)
383
+ ### /\A"((?:[^"]+|"")+)" *#{@sep} */
384
+ s = @sc[1]
385
+ if s =~ /"/
386
+ csv[i] = s.gsub(/""/, '"')
387
+ else
388
+ csv[i] = s
389
+ end
390
+ when @sc.scan(@regexp_pat4)
391
+ ### /\A(?:""|) *#{@sep} */
392
+ csv[i] = nil
393
+ else
394
+ raise "csv parse error"
395
+ end
396
+ i += 1
397
+ end until @sc.eos?
398
+ return csv
399
+ end
400
+
401
+ end
402
+
403
+ end
404
+
405
+ end
406
+
407
+ module CA
408
+
409
+ class CSVWriter # :nodoc:
410
+
411
+ def initialize (sep=",", rs=$/, fill="", &block)
412
+ @block = block
413
+ @sep = sep
414
+ @rs = rs
415
+ @fill = fill
416
+ end
417
+
418
+ def write_io (table, io)
419
+ return Processor.new(table, io, @sep, @rs, @fill, &@block).run
420
+ end
421
+
422
+ def write_string (table, string)
423
+ write_io(table, StringIO.new(string))
424
+ return string
425
+ end
426
+
427
+ def write_file (table, filename, mode="w")
428
+ open(filename, mode) { |io|
429
+ return write_io(table, io)
430
+ }
431
+ end
432
+
433
+ class Processor # :nodoc:
434
+
435
+ def initialize (table, io, sep, rs, fill, &block)
436
+ @io = io
437
+ @sep = sep
438
+ @rs = rs
439
+ @fill = fill
440
+ @block = block || proc { body }
441
+ if table.has_data_class?
442
+ @names = table.members
443
+ @table = CArray.merge(CA_OBJECT, table[nil].fields)
444
+ else
445
+ @names = table.instance_exec{ @names }
446
+ if @names.nil?
447
+ @names = table.instance_exec{ @column_names }
448
+ end
449
+ case
450
+ when table.rank > 2
451
+ @table = table.reshape(false,nil).object
452
+ when table.rank == 1
453
+ @table = table[:%,1].object ### convert to CA_OBJECT
454
+ else
455
+ @table = table.object ### convert to CA_OBJECT
456
+ end
457
+ end
458
+ if @table.has_mask?
459
+ @table.unmask(@fill)
460
+ end
461
+ @regexp_simple = /#{@sep}/o
462
+ end
463
+
464
+ def csv_quote (text)
465
+ text = text.dup
466
+ if text.gsub!(/"/, '""') or text =~ @regexp_simple ### /#{@sep}|"/
467
+ text = '"' + text + '"'
468
+ end
469
+ return text
470
+ end
471
+
472
+ def run
473
+ case @block.arity
474
+ when 1
475
+ @block.call(self)
476
+ when -1, 0
477
+ instance_exec(&@block)
478
+ else
479
+ raise "invalid block parameter"
480
+ end
481
+ end
482
+
483
+ # set @names
484
+ def names (list)
485
+ @names = list
486
+ end
487
+
488
+ # puts header
489
+ def header (list = @names)
490
+ @io.write list.map{|s| csv_quote(s)}.join(@sep)
491
+ @io.write(@rs)
492
+ end
493
+
494
+ # puts any strings
495
+ def puts (*argv)
496
+ @io.print(*argv)
497
+ @io.write(@rs)
498
+ end
499
+
500
+ # write value
501
+ # If option :strict is set, do csv_quote for string element
502
+ def body (strict: true, format: nil)
503
+ if strict
504
+ case @table.data_type
505
+ when CA_OBJECT
506
+ table = @table.to_ca
507
+ table[:is_kind_of, String].map! { |s| csv_quote(s) }
508
+ when CA_FIXLEN
509
+ table = @table.object
510
+ table.map! { |s| csv_quote(s) }
511
+ else
512
+ table = @table.object
513
+ end
514
+ else
515
+ table = @table
516
+ end
517
+ if format
518
+ table.dim0.times do |i|
519
+ @io.write Kernel::format(format,*table[i,nil].to_a)
520
+ @io.write(@rs)
521
+ end
522
+ else
523
+ table.dim0.times do |i|
524
+ @io.write table[i,nil].to_a.join(@sep)
525
+ @io.write(@rs)
526
+ end
527
+ end
528
+ end
529
+
530
+ # pre processing data
531
+ def process (namelist = @names)
532
+ if namelist
533
+ namelist.each_with_index do |name, i|
534
+ yield(name, @table[nil, i])
535
+ end
536
+ else
537
+ @table.dim1.times do |i|
538
+ yield(i, @table[nil,i])
539
+ end
540
+ end
541
+ end
542
+ end
543
+ end
544
+ end
545
+
546
+ class CArray
547
+
548
+ def self.load_csv (file, sep: ",", rs: $/, encoding: nil, &block)
549
+ reader = CA::CSVReader.new(sep: sep, rs: rs, &block)
550
+ return reader.read_file(file, encoding: encoding)
551
+ end
552
+
553
+ def self.from_csv (io, sep: ",", rs: $/, &block)
554
+ reader = CA::CSVReader.new(sep: sep, rs: rs, &block)
555
+ case io
556
+ when IO, StringIO
557
+ return reader.read_io(io)
558
+ when String
559
+ return reader.read_string(io)
560
+ else
561
+ raise "invalid argument"
562
+ end
563
+ end
564
+
565
+ def save_csv (file, option = {}, rs: $/, sep: ",", fill: "", mode: "w", &block)
566
+ option = {:sep=>sep, :rs=>rs, :fill=>fill, :mode=>mode}.update(option)
567
+ writer = CA::CSVWriter.new(option[:sep], option[:rs], option[:fill], &block)
568
+ return writer.write_file(self, file, option[:mode])
569
+ end
570
+
571
+ def to_csv (io="", option ={}, rs: $/, sep: ",", fill: "", &block)
572
+ option = {:sep=>sep, :rs=>rs, :fill=>fill}.update(option)
573
+ writer = CA::CSVWriter.new(option[:sep], option[:rs], option[:fill], &block)
574
+ case io
575
+ when IO, StringIO
576
+ return writer.write_io(self, io)
577
+ when String
578
+ return writer.write_string(self, io)
579
+ end
580
+ end
581
+
582
+ def to_tabular (option = {})
583
+ option = {:sep=>" ", :names=>nil}.update(option)
584
+ if option[:names]
585
+ names = option[:names]
586
+ elsif self.respond_to?(:names)
587
+ names = self.names
588
+ end
589
+ sep = option[:sep]
590
+ data = self.to_ca.map! {|s| s.to_s }
591
+ table = CArray.join([names.to_ca], [data])
592
+ length = table.convert{|s| s.length}.max(0)
593
+ table.map_with_index! {|s, idx| s.rjust(length[idx[1]]) }.to_csv.gsub(/,/,sep)
594
+ end
595
+
596
+ end
597
+
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: carray-io-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.0
5
+ platform: ruby
6
+ authors:
7
+ - Hiroki Motoyoshi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-06-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: carray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rcsv
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.3.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.3.1
41
+ description: " CSV interface for CArray\n"
42
+ email: ''
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - Rakefile
48
+ - carray-io-csv.gemspec
49
+ - lib/carray-io-csv.rb
50
+ - lib/carray-io-csv/core.rb
51
+ homepage: https://github.com/himotoyoshi/carray-io-csv
52
+ licenses:
53
+ - MIT
54
+ metadata: {}
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 1.8.1
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubyforge_project:
71
+ rubygems_version: 2.7.7
72
+ signing_key:
73
+ specification_version: 4
74
+ summary: CSV interface for CArray
75
+ test_files: []