carray-dataframe 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0561562238eb969944e6e7977f3cdc4edbe3370187ea1f7ba01a2f6af60bf40
4
- data.tar.gz: 738a716a1b5055fefb8b182dd26a3a9ac559563072b02df151bf16eab652f245
3
+ metadata.gz: 00a3f769c9096380db116cb74660aebc6451f5508029c8e97d720ed254b9847b
4
+ data.tar.gz: 6e810823d0ee16ef2a7a60295b983ffdb46ccb83591c49b19c0285a49c14d4cb
5
5
  SHA512:
6
- metadata.gz: 320b6326fb40b111bc601c38d0e2a5a917434dedfd96f9830c4d24dd66df507d7450d30027829f2a832c0673c69bbe20cb7bb5201adad4cc41728505ecbea9d3
7
- data.tar.gz: 4bd51f6c826a561dbb108aa57988520ca870fc02d8615babaa65644ca4869dd781b48c6015b19ba5235943fa7d793d0789ced918ae3675b9f4ae3aef0eb0aaf7
6
+ metadata.gz: 14880525aec99af0ba133d08eb19ec0715f2be56777593cdccb21f7e54ab4c0c51335ffaf557f86621f0eda815e6859dc4bc18465f7f74355f3f7365c7abfadd
7
+ data.tar.gz: '028c8109f1ada33196663884043c44134491ce58649a5e9341f91e616f9edbb113fbc202965782bf762bf398d483ce12fdd18cf0d511cce03237f4ad174cb612'
@@ -1,6 +1,6 @@
1
1
 
2
2
  Gem::Specification::new do |s|
3
- version = "1.1.1"
3
+ version = "1.2.0"
4
4
 
5
5
  files = Dir.glob("**/*") - [
6
6
  Dir.glob("carray-dataframe*.gem"),
@@ -22,6 +22,6 @@ Gem::Specification::new do |s|
22
22
  s.files = files
23
23
  s.required_ruby_version = ">= 1.8.1"
24
24
  s.add_runtime_dependency 'carray', '~> 1.5'
25
- s.add_runtime_dependency 'axlsx', '~> 2.0'
25
+ s.add_runtime_dependency 'caxlsx', '~> 3.0'
26
26
  s.add_runtime_dependency 'spreadsheet', '~> 1.1'
27
27
  end
@@ -1,4 +1,5 @@
1
1
  require "carray-timeindex"
2
+ require "carray-io-csv"
2
3
  require "carray-dataframe/dataframe"
3
4
  require "carray-dataframe/reference"
4
5
  require "carray-dataframe/loc_accessor"
@@ -73,7 +73,7 @@ class CADataFrame
73
73
  end
74
74
 
75
75
  def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: false, &block)
76
- require "axlsx"
76
+ require "caxlsx"
77
77
  xl = Axlsx::Package.new
78
78
  xl.use_shared_strings = true
79
79
  sheet = xl.workbook.add_worksheet(name: sheet_name)
@@ -28,7 +28,11 @@ class CADataFrame
28
28
  @column_names = columns.map(&:to_s)
29
29
  else
30
30
  if data.respond_to?(:column_names)
31
- @column_names = data.column_names.map(&:to_s)
31
+ if data.column_names.is_a?(Array)
32
+ @column_names = data.column_names.map(&:to_s)
33
+ else
34
+ @column_names = data.dim1.times.map{|i| "c#{i}" }
35
+ end
32
36
  elsif order
33
37
  @column_names = order.map(&:to_s)
34
38
  else
@@ -88,9 +92,13 @@ class CADataFrame
88
92
  end
89
93
 
90
94
  # Sets @row_number and check column length
91
- @row_number = @column_data.first[1].size
92
- if @column_names.any?{ |key| @column_data[key].size != @row_number }
93
- raise "column sizes mismatch"
95
+ if @column_data.empty? and index
96
+ @row_number = index.size
97
+ else
98
+ @row_number = @column_data.first[1].size
99
+ if @column_names.any?{ |key| @column_data[key].size != @row_number }
100
+ raise "column sizes mismatch"
101
+ end
94
102
  end
95
103
 
96
104
  # Processing option 'index'
@@ -206,6 +214,10 @@ class CADataFrame
206
214
  end
207
215
  end
208
216
 
217
+ def has_index?
218
+ @row_index ? true : false
219
+ end
220
+
209
221
  def replace (other)
210
222
  @column_names = other.column_names
211
223
  @column_data = other.column_data
@@ -215,8 +227,13 @@ class CADataFrame
215
227
  return self
216
228
  end
217
229
 
218
- def has_column?(name)
219
- return @column_names.include?(name)
230
+ def has_column? (name)
231
+ case name.to_s
232
+ when "index"
233
+ return has_index?
234
+ else
235
+ return @column_names.include?(name)
236
+ end
220
237
  end
221
238
 
222
239
  def column_types
@@ -239,6 +256,18 @@ class CADataFrame
239
256
  end
240
257
  alias col column
241
258
 
259
+ def set_column (spec, col)
260
+ case spec
261
+ when Integer
262
+ return @column_data[@column_names[spec]] = col
263
+ when String, Symbol
264
+ return @column_data[spec.to_s] = col
265
+ else
266
+ raise "invalid column specifier"
267
+ end
268
+ end
269
+ private set_column
270
+
242
271
  def loc
243
272
  @loc ||= CADataFrame::LocAccessor.new(self)
244
273
  return @loc
@@ -429,10 +458,11 @@ class CADataFrame
429
458
  end
430
459
 
431
460
  def append_column (name, new_column = nil, &block)
461
+ name = name.to_s
432
462
  if new_column
433
463
  # do nothing
434
464
  elsif block
435
- new_column = instance_exec(&block)
465
+ new_column = instance_exec(self, &block)
436
466
  else
437
467
  new_column = @column_data.first[1].template(:object)
438
468
  end
@@ -442,18 +472,39 @@ class CADataFrame
442
472
  if new_column.rank != 1 or new_column.size != @row_number
443
473
  raise "invalid shape of appended column"
444
474
  end
445
- @column_names.push(name.to_s)
446
- @column_data[name.to_s] = new_column
475
+ @column_names.push(name)
476
+ @column_data[name] = new_column
477
+ return new_column
478
+ end
479
+
480
+ def insert_column (pos, name, new_column = nil, &block)
481
+ name = name.to_s
482
+ if new_column
483
+ # do nothing
484
+ elsif block
485
+ new_column = instance_exec(self, &block)
486
+ else
487
+ new_column = @column_data.first[1].template(:object)
488
+ end
489
+ unless new_column.is_a?(CArray)
490
+ new_column = new_column.to_ca
491
+ end
492
+ if new_column.rank != 1 or new_column.size != @row_number
493
+ raise "invalid shape of appended column"
494
+ end
495
+ @column_names.insert(pos, name)
496
+ @column_data[name] = new_column
447
497
  return new_column
448
498
  end
449
499
 
450
500
  alias append append_column
451
501
 
452
502
  def prepend_column (name, new_column = nil, &block)
503
+ name = name.to_s
453
504
  if new_column
454
505
  # do nothing
455
506
  elsif block
456
- new_column = instance_exec(&block)
507
+ new_column = instance_exec(self, &block)
457
508
  else
458
509
  new_column = @column_data.first[1].template(:object)
459
510
  end
@@ -463,8 +514,8 @@ class CADataFrame
463
514
  if new_column.rank != 1 or new_column.size != @row_number
464
515
  raise "invalid shape of appended column"
465
516
  end
466
- @column_names.unshift(name.to_s)
467
- @column_data[name.to_s] = new_column
517
+ @column_names.unshift(name)
518
+ @column_data[name] = new_column
468
519
  return new_column
469
520
  end
470
521
 
@@ -572,6 +623,7 @@ class CADataFrame
572
623
  end
573
624
  new_columns = {}
574
625
  names.map(&:to_s).each do |name|
626
+ raise "unknown column '#{name}'" unless column(name)
575
627
  new_columns[name] = column(name)[row]
576
628
  end
577
629
  return CADataFrame.new(new_columns, index: @row_index ? @row_index[row] : nil)
@@ -977,11 +1029,11 @@ end
977
1029
  class CADataFrame
978
1030
 
979
1031
  def matchup (keyname, reference)
980
- key = column(keyname.to_s)
1032
+ key = column(keyname)
981
1033
  idx = reference.matchup(key)
982
1034
  new_columns = {}
983
1035
  each_column_name do |name|
984
- if name == keyname
1036
+ if name == keyname.to_s
985
1037
  new_columns[name] = reference
986
1038
  else
987
1039
  new_columns[name] = column(name).project(idx)
@@ -992,9 +1044,7 @@ class CADataFrame
992
1044
  else
993
1045
  new_row_index = nil
994
1046
  end
995
- return CADataFrame.new(new_columns, index: new_row_index) {
996
- self.send(keyname)[] = reference
997
- }
1047
+ return CADataFrame.new(new_columns, index: new_row_index)
998
1048
  end
999
1049
 
1000
1050
  def histogram (name, scale = nil, options = nil)
@@ -4,6 +4,8 @@
4
4
  #
5
5
  ######################################
6
6
  require "spreadsheet"
7
+ require "carray-io-sqlite3"
8
+
7
9
  class CArray
8
10
 
9
11
  def save_excel (filename, &block)
@@ -42,8 +44,8 @@ class CADataFrame
42
44
  end
43
45
  end
44
46
 
45
- def self.load_csv (file, sep: ",", rs: $/, encoding: nil, index: nil, &block)
46
- df = CArray.load_csv(file, sep: sep, rs: rs, encoding: encoding, &block).to_dataframe(index: index)
47
+ def self.read_csv (file, sep: ",", rs: $/, quote_char: '"', encoding: nil, index: nil, &block)
48
+ df = CArray.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block).to_dataframe(index: index)
47
49
  if df
48
50
  return df.arrange{
49
51
  column_names.each do |name|
@@ -55,8 +57,13 @@ class CADataFrame
55
57
  end
56
58
  end
57
59
 
58
- def self.from_csv (file, sep: ",", rs: $/, index: nil, &block)
59
- df = CArray.from_csv(file, sep: sep, rs: rs, &block).to_dataframe(index: index)
60
+ def self.load_csv (file, sep: ",", rs: $/, encoding: nil, quote_char: '"', index: nil, &block)
61
+ warn "CADataFrame.load_csv will be obsolete, use CADataFrame.read_csv"
62
+ self.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block)
63
+ end
64
+
65
+ def self.parse_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block)
66
+ df = CArray.parse_csv(file, sep: sep, rs: rs, quote_char: quote_char, &block).to_dataframe(index: index)
60
67
  if df
61
68
  return df.arrange{
62
69
  column_names.each do |name|
@@ -68,8 +75,14 @@ class CADataFrame
68
75
  end
69
76
  end
70
77
 
71
- def to_sqlite3 (*args)
72
- self.to_ca.to_sqlite3(*args)
78
+ def self.from_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block)
79
+ warn "CADataFrame.from_csv will be obsolete, use CADataFrame.parse_csv"
80
+ self.parse_csv(file, sep: sep, rs: rs, index: index, &block)
81
+ end
82
+
83
+
84
+ def to_sqlite3 (**args)
85
+ self.to_ca.to_sqlite3(**args)
73
86
  end
74
87
 
75
88
  def to_sql (tablename)
@@ -0,0 +1,96 @@
1
+ ######################################
2
+ #
3
+ # IO methods
4
+ #
5
+ ######################################
6
+ require "spreadsheet"
7
+ class CArray
8
+
9
+ def save_excel (filename, &block)
10
+ if self.rank >= 3
11
+ raise "too large rank (>2) to write excel file"
12
+ end
13
+ book = Spreadsheet::Workbook.new
14
+ worksheet = book.create_worksheet
15
+ self.dim0.times do |i|
16
+ worksheet.row(i).push *self[i,nil]
17
+ end
18
+ if block
19
+ block.call(worksheet)
20
+ end
21
+ book.write(filename)
22
+ end
23
+
24
+ def self.load_excel (filename, sheet=0)
25
+ book = Spreadsheet.open(filename)
26
+ sheet = book.worksheet(sheet)
27
+ return sheet.map(&:to_a).to_ca
28
+ end
29
+ end
30
+ class CADataFrame
31
+
32
+ def self.load_sqlite3 (*args)
33
+ df = CArray.load_sqlite3(*args).to_dataframe
34
+ if df
35
+ return df.arrange{
36
+ column_names.each do |name|
37
+ mask name, nil
38
+ end
39
+ }
40
+ else
41
+ return nil
42
+ end
43
+ end
44
+
45
+ def self.load_csv (file, sep: ",", rs: $/, encoding: nil, index: nil, &block)
46
+ df = CArray.load_csv(file, sep: sep, rs: rs, encoding: encoding, &block).to_dataframe(index: index)
47
+ if df
48
+ return df.arrange{
49
+ column_names.each do |name|
50
+ mask name, nil
51
+ end
52
+ }
53
+ else
54
+ return nil
55
+ end
56
+ end
57
+
58
+ def self.from_csv (file, sep: ",", rs: $/, index: nil, &block)
59
+ df = CArray.from_csv(file, sep: sep, rs: rs, &block).to_dataframe(index: index)
60
+ if df
61
+ return df.arrange{
62
+ column_names.each do |name|
63
+ mask name, nil
64
+ end
65
+ }
66
+ else
67
+ return nil
68
+ end
69
+ end
70
+
71
+ def to_sqlite3 (*args)
72
+ self.to_ca.to_sqlite3(*args)
73
+ end
74
+
75
+ def to_sql (tablename)
76
+ if @column_names.any?{ |s| s =~ /[\. \-]/ }
77
+ columns = {}
78
+ each_column_name do |name|
79
+ name2 = name.gsub(/[\. \-]/, '_')
80
+ columns[name2] = column(name)
81
+ end
82
+ df = CADataFrame.new(columns)
83
+ return df.to_sqlite3(database: ":memory:", table: tablename)
84
+ else
85
+ return to_sqlite3(database: ":memory:", table: tablename)
86
+ end
87
+ end
88
+ end
89
+ module SQLite3
90
+ class Database
91
+
92
+ def to_df (expr)
93
+ return CADataFrame.load_sqlite3 self, expr
94
+ end
95
+ end
96
+ end
@@ -27,7 +27,9 @@ class CADataFrame
27
27
  CADataFrame::Merge.join(self, other_df, opts)
28
28
  end
29
29
  end
30
+
30
31
  class CADataFrame
32
+
31
33
  class MergeFrame
32
34
  class NilSorter
33
35
  include Comparable
@@ -65,7 +67,12 @@ class CADataFrame
65
67
  rkey = first_right_key
66
68
  row(lkey, rkey).tap { |r| res << r if r }
67
69
  end
68
- CADataFrame.new(res, order: dataframe_vector_names)
70
+ df = CADataFrame.new(res, order: dataframe_vector_names)
71
+ if dataframe_vector_names.include?("index")
72
+ df.set_index("index")
73
+ else
74
+ df
75
+ end
69
76
  end
70
77
  private
71
78
  attr_reader :on, :indicator,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: carray-dataframe
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hiroki Motoyoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-09 00:00:00.000000000 Z
11
+ date: 2021-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: carray
@@ -25,19 +25,19 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.5'
27
27
  - !ruby/object:Gem::Dependency
28
- name: axlsx
28
+ name: caxlsx
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.0'
33
+ version: '3.0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.0'
40
+ version: '3.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: spreadsheet
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -70,6 +70,7 @@ files:
70
70
  - lib/carray-dataframe/group.rb
71
71
  - lib/carray-dataframe/iloc_accessor.rb
72
72
  - lib/carray-dataframe/io.rb
73
+ - lib/carray-dataframe/io.rb~
73
74
  - lib/carray-dataframe/join.rb
74
75
  - lib/carray-dataframe/loc_accessor.rb
75
76
  - lib/carray-dataframe/pivot.rb