carray-dataframe 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0561562238eb969944e6e7977f3cdc4edbe3370187ea1f7ba01a2f6af60bf40
4
- data.tar.gz: 738a716a1b5055fefb8b182dd26a3a9ac559563072b02df151bf16eab652f245
3
+ metadata.gz: 00a3f769c9096380db116cb74660aebc6451f5508029c8e97d720ed254b9847b
4
+ data.tar.gz: 6e810823d0ee16ef2a7a60295b983ffdb46ccb83591c49b19c0285a49c14d4cb
5
5
  SHA512:
6
- metadata.gz: 320b6326fb40b111bc601c38d0e2a5a917434dedfd96f9830c4d24dd66df507d7450d30027829f2a832c0673c69bbe20cb7bb5201adad4cc41728505ecbea9d3
7
- data.tar.gz: 4bd51f6c826a561dbb108aa57988520ca870fc02d8615babaa65644ca4869dd781b48c6015b19ba5235943fa7d793d0789ced918ae3675b9f4ae3aef0eb0aaf7
6
+ metadata.gz: 14880525aec99af0ba133d08eb19ec0715f2be56777593cdccb21f7e54ab4c0c51335ffaf557f86621f0eda815e6859dc4bc18465f7f74355f3f7365c7abfadd
7
+ data.tar.gz: '028c8109f1ada33196663884043c44134491ce58649a5e9341f91e616f9edbb113fbc202965782bf762bf398d483ce12fdd18cf0d511cce03237f4ad174cb612'
@@ -1,6 +1,6 @@
1
1
 
2
2
  Gem::Specification::new do |s|
3
- version = "1.1.1"
3
+ version = "1.2.0"
4
4
 
5
5
  files = Dir.glob("**/*") - [
6
6
  Dir.glob("carray-dataframe*.gem"),
@@ -22,6 +22,6 @@ Gem::Specification::new do |s|
22
22
  s.files = files
23
23
  s.required_ruby_version = ">= 1.8.1"
24
24
  s.add_runtime_dependency 'carray', '~> 1.5'
25
- s.add_runtime_dependency 'axlsx', '~> 2.0'
25
+ s.add_runtime_dependency 'caxlsx', '~> 3.0'
26
26
  s.add_runtime_dependency 'spreadsheet', '~> 1.1'
27
27
  end
@@ -1,4 +1,5 @@
1
1
  require "carray-timeindex"
2
+ require "carray-io-csv"
2
3
  require "carray-dataframe/dataframe"
3
4
  require "carray-dataframe/reference"
4
5
  require "carray-dataframe/loc_accessor"
@@ -73,7 +73,7 @@ class CADataFrame
73
73
  end
74
74
 
75
75
  def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: false, &block)
76
- require "axlsx"
76
+ require "caxlsx"
77
77
  xl = Axlsx::Package.new
78
78
  xl.use_shared_strings = true
79
79
  sheet = xl.workbook.add_worksheet(name: sheet_name)
@@ -28,7 +28,11 @@ class CADataFrame
28
28
  @column_names = columns.map(&:to_s)
29
29
  else
30
30
  if data.respond_to?(:column_names)
31
- @column_names = data.column_names.map(&:to_s)
31
+ if data.column_names.is_a?(Array)
32
+ @column_names = data.column_names.map(&:to_s)
33
+ else
34
+ @column_names = data.dim1.times.map{|i| "c#{i}" }
35
+ end
32
36
  elsif order
33
37
  @column_names = order.map(&:to_s)
34
38
  else
@@ -88,9 +92,13 @@ class CADataFrame
88
92
  end
89
93
 
90
94
  # Sets @row_number and check column length
91
- @row_number = @column_data.first[1].size
92
- if @column_names.any?{ |key| @column_data[key].size != @row_number }
93
- raise "column sizes mismatch"
95
+ if @column_data.empty? and index
96
+ @row_number = index.size
97
+ else
98
+ @row_number = @column_data.first[1].size
99
+ if @column_names.any?{ |key| @column_data[key].size != @row_number }
100
+ raise "column sizes mismatch"
101
+ end
94
102
  end
95
103
 
96
104
  # Processing option 'index'
@@ -206,6 +214,10 @@ class CADataFrame
206
214
  end
207
215
  end
208
216
 
217
+ def has_index?
218
+ @row_index ? true : false
219
+ end
220
+
209
221
  def replace (other)
210
222
  @column_names = other.column_names
211
223
  @column_data = other.column_data
@@ -215,8 +227,13 @@ class CADataFrame
215
227
  return self
216
228
  end
217
229
 
218
- def has_column?(name)
219
- return @column_names.include?(name)
230
+ def has_column? (name)
231
+ case name.to_s
232
+ when "index"
233
+ return has_index?
234
+ else
235
+ return @column_names.include?(name)
236
+ end
220
237
  end
221
238
 
222
239
  def column_types
@@ -239,6 +256,18 @@ class CADataFrame
239
256
  end
240
257
  alias col column
241
258
 
259
+ def set_column (spec, col)
260
+ case spec
261
+ when Integer
262
+ return @column_data[@column_names[spec]] = col
263
+ when String, Symbol
264
+ return @column_data[spec.to_s] = col
265
+ else
266
+ raise "invalid column specifier"
267
+ end
268
+ end
269
+ private set_column
270
+
242
271
  def loc
243
272
  @loc ||= CADataFrame::LocAccessor.new(self)
244
273
  return @loc
@@ -429,10 +458,11 @@ class CADataFrame
429
458
  end
430
459
 
431
460
  def append_column (name, new_column = nil, &block)
461
+ name = name.to_s
432
462
  if new_column
433
463
  # do nothing
434
464
  elsif block
435
- new_column = instance_exec(&block)
465
+ new_column = instance_exec(self, &block)
436
466
  else
437
467
  new_column = @column_data.first[1].template(:object)
438
468
  end
@@ -442,18 +472,39 @@ class CADataFrame
442
472
  if new_column.rank != 1 or new_column.size != @row_number
443
473
  raise "invalid shape of appended column"
444
474
  end
445
- @column_names.push(name.to_s)
446
- @column_data[name.to_s] = new_column
475
+ @column_names.push(name)
476
+ @column_data[name] = new_column
477
+ return new_column
478
+ end
479
+
480
+ def insert_column (pos, name, new_column = nil, &block)
481
+ name = name.to_s
482
+ if new_column
483
+ # do nothing
484
+ elsif block
485
+ new_column = instance_exec(self, &block)
486
+ else
487
+ new_column = @column_data.first[1].template(:object)
488
+ end
489
+ unless new_column.is_a?(CArray)
490
+ new_column = new_column.to_ca
491
+ end
492
+ if new_column.rank != 1 or new_column.size != @row_number
493
+ raise "invalid shape of appended column"
494
+ end
495
+ @column_names.insert(pos, name)
496
+ @column_data[name] = new_column
447
497
  return new_column
448
498
  end
449
499
 
450
500
  alias append append_column
451
501
 
452
502
  def prepend_column (name, new_column = nil, &block)
503
+ name = name.to_s
453
504
  if new_column
454
505
  # do nothing
455
506
  elsif block
456
- new_column = instance_exec(&block)
507
+ new_column = instance_exec(self, &block)
457
508
  else
458
509
  new_column = @column_data.first[1].template(:object)
459
510
  end
@@ -463,8 +514,8 @@ class CADataFrame
463
514
  if new_column.rank != 1 or new_column.size != @row_number
464
515
  raise "invalid shape of appended column"
465
516
  end
466
- @column_names.unshift(name.to_s)
467
- @column_data[name.to_s] = new_column
517
+ @column_names.unshift(name)
518
+ @column_data[name] = new_column
468
519
  return new_column
469
520
  end
470
521
 
@@ -572,6 +623,7 @@ class CADataFrame
572
623
  end
573
624
  new_columns = {}
574
625
  names.map(&:to_s).each do |name|
626
+ raise "unknown column '#{name}'" unless column(name)
575
627
  new_columns[name] = column(name)[row]
576
628
  end
577
629
  return CADataFrame.new(new_columns, index: @row_index ? @row_index[row] : nil)
@@ -977,11 +1029,11 @@ end
977
1029
  class CADataFrame
978
1030
 
979
1031
  def matchup (keyname, reference)
980
- key = column(keyname.to_s)
1032
+ key = column(keyname)
981
1033
  idx = reference.matchup(key)
982
1034
  new_columns = {}
983
1035
  each_column_name do |name|
984
- if name == keyname
1036
+ if name == keyname.to_s
985
1037
  new_columns[name] = reference
986
1038
  else
987
1039
  new_columns[name] = column(name).project(idx)
@@ -992,9 +1044,7 @@ class CADataFrame
992
1044
  else
993
1045
  new_row_index = nil
994
1046
  end
995
- return CADataFrame.new(new_columns, index: new_row_index) {
996
- self.send(keyname)[] = reference
997
- }
1047
+ return CADataFrame.new(new_columns, index: new_row_index)
998
1048
  end
999
1049
 
1000
1050
  def histogram (name, scale = nil, options = nil)
@@ -4,6 +4,8 @@
4
4
  #
5
5
  ######################################
6
6
  require "spreadsheet"
7
+ require "carray-io-sqlite3"
8
+
7
9
  class CArray
8
10
 
9
11
  def save_excel (filename, &block)
@@ -42,8 +44,8 @@ class CADataFrame
42
44
  end
43
45
  end
44
46
 
45
- def self.load_csv (file, sep: ",", rs: $/, encoding: nil, index: nil, &block)
46
- df = CArray.load_csv(file, sep: sep, rs: rs, encoding: encoding, &block).to_dataframe(index: index)
47
+ def self.read_csv (file, sep: ",", rs: $/, quote_char: '"', encoding: nil, index: nil, &block)
48
+ df = CArray.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block).to_dataframe(index: index)
47
49
  if df
48
50
  return df.arrange{
49
51
  column_names.each do |name|
@@ -55,8 +57,13 @@ class CADataFrame
55
57
  end
56
58
  end
57
59
 
58
- def self.from_csv (file, sep: ",", rs: $/, index: nil, &block)
59
- df = CArray.from_csv(file, sep: sep, rs: rs, &block).to_dataframe(index: index)
60
+ def self.load_csv (file, sep: ",", rs: $/, encoding: nil, quote_char: '"', index: nil, &block)
61
+ warn "CADataFrame.load_csv will be obsolete, use CADataFrame.read_csv"
62
+ self.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block)
63
+ end
64
+
65
+ def self.parse_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block)
66
+ df = CArray.parse_csv(file, sep: sep, rs: rs, quote_char: quote_char, &block).to_dataframe(index: index)
60
67
  if df
61
68
  return df.arrange{
62
69
  column_names.each do |name|
@@ -68,8 +75,14 @@ class CADataFrame
68
75
  end
69
76
  end
70
77
 
71
- def to_sqlite3 (*args)
72
- self.to_ca.to_sqlite3(*args)
78
+ def self.from_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block)
79
+ warn "CADataFrame.from_csv will be obsolete, use CADataFrame.parse_csv"
80
+ self.parse_csv(file, sep: sep, rs: rs, index: index, &block)
81
+ end
82
+
83
+
84
+ def to_sqlite3 (**args)
85
+ self.to_ca.to_sqlite3(**args)
73
86
  end
74
87
 
75
88
  def to_sql (tablename)
@@ -0,0 +1,96 @@
1
+ ######################################
2
+ #
3
+ # IO methods
4
+ #
5
+ ######################################
6
+ require "spreadsheet"
7
+ class CArray
8
+
9
+ def save_excel (filename, &block)
10
+ if self.rank >= 3
11
+ raise "too large rank (>2) to write excel file"
12
+ end
13
+ book = Spreadsheet::Workbook.new
14
+ worksheet = book.create_worksheet
15
+ self.dim0.times do |i|
16
+ worksheet.row(i).push *self[i,nil]
17
+ end
18
+ if block
19
+ block.call(worksheet)
20
+ end
21
+ book.write(filename)
22
+ end
23
+
24
+ def self.load_excel (filename, sheet=0)
25
+ book = Spreadsheet.open(filename)
26
+ sheet = book.worksheet(sheet)
27
+ return sheet.map(&:to_a).to_ca
28
+ end
29
+ end
30
+ class CADataFrame
31
+
32
+ def self.load_sqlite3 (*args)
33
+ df = CArray.load_sqlite3(*args).to_dataframe
34
+ if df
35
+ return df.arrange{
36
+ column_names.each do |name|
37
+ mask name, nil
38
+ end
39
+ }
40
+ else
41
+ return nil
42
+ end
43
+ end
44
+
45
+ def self.load_csv (file, sep: ",", rs: $/, encoding: nil, index: nil, &block)
46
+ df = CArray.load_csv(file, sep: sep, rs: rs, encoding: encoding, &block).to_dataframe(index: index)
47
+ if df
48
+ return df.arrange{
49
+ column_names.each do |name|
50
+ mask name, nil
51
+ end
52
+ }
53
+ else
54
+ return nil
55
+ end
56
+ end
57
+
58
+ def self.from_csv (file, sep: ",", rs: $/, index: nil, &block)
59
+ df = CArray.from_csv(file, sep: sep, rs: rs, &block).to_dataframe(index: index)
60
+ if df
61
+ return df.arrange{
62
+ column_names.each do |name|
63
+ mask name, nil
64
+ end
65
+ }
66
+ else
67
+ return nil
68
+ end
69
+ end
70
+
71
+ def to_sqlite3 (*args)
72
+ self.to_ca.to_sqlite3(*args)
73
+ end
74
+
75
+ def to_sql (tablename)
76
+ if @column_names.any?{ |s| s =~ /[\. \-]/ }
77
+ columns = {}
78
+ each_column_name do |name|
79
+ name2 = name.gsub(/[\. \-]/, '_')
80
+ columns[name2] = column(name)
81
+ end
82
+ df = CADataFrame.new(columns)
83
+ return df.to_sqlite3(database: ":memory:", table: tablename)
84
+ else
85
+ return to_sqlite3(database: ":memory:", table: tablename)
86
+ end
87
+ end
88
+ end
89
+ module SQLite3
90
+ class Database
91
+
92
+ def to_df (expr)
93
+ return CADataFrame.load_sqlite3 self, expr
94
+ end
95
+ end
96
+ end
@@ -27,7 +27,9 @@ class CADataFrame
27
27
  CADataFrame::Merge.join(self, other_df, opts)
28
28
  end
29
29
  end
30
+
30
31
  class CADataFrame
32
+
31
33
  class MergeFrame
32
34
  class NilSorter
33
35
  include Comparable
@@ -65,7 +67,12 @@ class CADataFrame
65
67
  rkey = first_right_key
66
68
  row(lkey, rkey).tap { |r| res << r if r }
67
69
  end
68
- CADataFrame.new(res, order: dataframe_vector_names)
70
+ df = CADataFrame.new(res, order: dataframe_vector_names)
71
+ if dataframe_vector_names.include?("index")
72
+ df.set_index("index")
73
+ else
74
+ df
75
+ end
69
76
  end
70
77
  private
71
78
  attr_reader :on, :indicator,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: carray-dataframe
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hiroki Motoyoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-09 00:00:00.000000000 Z
11
+ date: 2021-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: carray
@@ -25,19 +25,19 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.5'
27
27
  - !ruby/object:Gem::Dependency
28
- name: axlsx
28
+ name: caxlsx
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.0'
33
+ version: '3.0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.0'
40
+ version: '3.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: spreadsheet
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -70,6 +70,7 @@ files:
70
70
  - lib/carray-dataframe/group.rb
71
71
  - lib/carray-dataframe/iloc_accessor.rb
72
72
  - lib/carray-dataframe/io.rb
73
+ - lib/carray-dataframe/io.rb~
73
74
  - lib/carray-dataframe/join.rb
74
75
  - lib/carray-dataframe/loc_accessor.rb
75
76
  - lib/carray-dataframe/pivot.rb