carray-dataframe 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/carray-dataframe.gemspec +2 -2
- data/lib/carray-dataframe.rb +1 -0
- data/lib/carray-dataframe/converter.rb +1 -1
- data/lib/carray-dataframe/dataframe.rb +67 -17
- data/lib/carray-dataframe/io.rb +19 -6
- data/lib/carray-dataframe/io.rb~ +96 -0
- data/lib/carray-dataframe/join.rb +8 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 00a3f769c9096380db116cb74660aebc6451f5508029c8e97d720ed254b9847b
|
4
|
+
data.tar.gz: 6e810823d0ee16ef2a7a60295b983ffdb46ccb83591c49b19c0285a49c14d4cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14880525aec99af0ba133d08eb19ec0715f2be56777593cdccb21f7e54ab4c0c51335ffaf557f86621f0eda815e6859dc4bc18465f7f74355f3f7365c7abfadd
|
7
|
+
data.tar.gz: '028c8109f1ada33196663884043c44134491ce58649a5e9341f91e616f9edbb113fbc202965782bf762bf398d483ce12fdd18cf0d511cce03237f4ad174cb612'
|
data/carray-dataframe.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
|
2
2
|
Gem::Specification::new do |s|
|
3
|
-
version = "1.
|
3
|
+
version = "1.2.0"
|
4
4
|
|
5
5
|
files = Dir.glob("**/*") - [
|
6
6
|
Dir.glob("carray-dataframe*.gem"),
|
@@ -22,6 +22,6 @@ Gem::Specification::new do |s|
|
|
22
22
|
s.files = files
|
23
23
|
s.required_ruby_version = ">= 1.8.1"
|
24
24
|
s.add_runtime_dependency 'carray', '~> 1.5'
|
25
|
-
s.add_runtime_dependency '
|
25
|
+
s.add_runtime_dependency 'caxlsx', '~> 3.0'
|
26
26
|
s.add_runtime_dependency 'spreadsheet', '~> 1.1'
|
27
27
|
end
|
data/lib/carray-dataframe.rb
CHANGED
@@ -28,7 +28,11 @@ class CADataFrame
|
|
28
28
|
@column_names = columns.map(&:to_s)
|
29
29
|
else
|
30
30
|
if data.respond_to?(:column_names)
|
31
|
-
|
31
|
+
if data.column_names.is_a?(Array)
|
32
|
+
@column_names = data.column_names.map(&:to_s)
|
33
|
+
else
|
34
|
+
@column_names = data.dim1.times.map{|i| "c#{i}" }
|
35
|
+
end
|
32
36
|
elsif order
|
33
37
|
@column_names = order.map(&:to_s)
|
34
38
|
else
|
@@ -88,9 +92,13 @@ class CADataFrame
|
|
88
92
|
end
|
89
93
|
|
90
94
|
# Sets @row_number and check column length
|
91
|
-
|
92
|
-
|
93
|
-
|
95
|
+
if @column_data.empty? and index
|
96
|
+
@row_number = index.size
|
97
|
+
else
|
98
|
+
@row_number = @column_data.first[1].size
|
99
|
+
if @column_names.any?{ |key| @column_data[key].size != @row_number }
|
100
|
+
raise "column sizes mismatch"
|
101
|
+
end
|
94
102
|
end
|
95
103
|
|
96
104
|
# Processing option 'index'
|
@@ -206,6 +214,10 @@ class CADataFrame
|
|
206
214
|
end
|
207
215
|
end
|
208
216
|
|
217
|
+
def has_index?
|
218
|
+
@row_index ? true : false
|
219
|
+
end
|
220
|
+
|
209
221
|
def replace (other)
|
210
222
|
@column_names = other.column_names
|
211
223
|
@column_data = other.column_data
|
@@ -215,8 +227,13 @@ class CADataFrame
|
|
215
227
|
return self
|
216
228
|
end
|
217
229
|
|
218
|
-
def has_column?(name)
|
219
|
-
|
230
|
+
def has_column? (name)
|
231
|
+
case name.to_s
|
232
|
+
when "index"
|
233
|
+
return has_index?
|
234
|
+
else
|
235
|
+
return @column_names.include?(name)
|
236
|
+
end
|
220
237
|
end
|
221
238
|
|
222
239
|
def column_types
|
@@ -239,6 +256,18 @@ class CADataFrame
|
|
239
256
|
end
|
240
257
|
alias col column
|
241
258
|
|
259
|
+
def set_column (spec, col)
|
260
|
+
case spec
|
261
|
+
when Integer
|
262
|
+
return @column_data[@column_names[spec]] = col
|
263
|
+
when String, Symbol
|
264
|
+
return @column_data[spec.to_s] = col
|
265
|
+
else
|
266
|
+
raise "invalid column specifier"
|
267
|
+
end
|
268
|
+
end
|
269
|
+
private set_column
|
270
|
+
|
242
271
|
def loc
|
243
272
|
@loc ||= CADataFrame::LocAccessor.new(self)
|
244
273
|
return @loc
|
@@ -429,10 +458,11 @@ class CADataFrame
|
|
429
458
|
end
|
430
459
|
|
431
460
|
def append_column (name, new_column = nil, &block)
|
461
|
+
name = name.to_s
|
432
462
|
if new_column
|
433
463
|
# do nothing
|
434
464
|
elsif block
|
435
|
-
new_column = instance_exec(&block)
|
465
|
+
new_column = instance_exec(self, &block)
|
436
466
|
else
|
437
467
|
new_column = @column_data.first[1].template(:object)
|
438
468
|
end
|
@@ -442,18 +472,39 @@ class CADataFrame
|
|
442
472
|
if new_column.rank != 1 or new_column.size != @row_number
|
443
473
|
raise "invalid shape of appended column"
|
444
474
|
end
|
445
|
-
@column_names.push(name
|
446
|
-
@column_data[name
|
475
|
+
@column_names.push(name)
|
476
|
+
@column_data[name] = new_column
|
477
|
+
return new_column
|
478
|
+
end
|
479
|
+
|
480
|
+
def insert_column (pos, name, new_column = nil, &block)
|
481
|
+
name = name.to_s
|
482
|
+
if new_column
|
483
|
+
# do nothing
|
484
|
+
elsif block
|
485
|
+
new_column = instance_exec(self, &block)
|
486
|
+
else
|
487
|
+
new_column = @column_data.first[1].template(:object)
|
488
|
+
end
|
489
|
+
unless new_column.is_a?(CArray)
|
490
|
+
new_column = new_column.to_ca
|
491
|
+
end
|
492
|
+
if new_column.rank != 1 or new_column.size != @row_number
|
493
|
+
raise "invalid shape of appended column"
|
494
|
+
end
|
495
|
+
@column_names.insert(pos, name)
|
496
|
+
@column_data[name] = new_column
|
447
497
|
return new_column
|
448
498
|
end
|
449
499
|
|
450
500
|
alias append append_column
|
451
501
|
|
452
502
|
def prepend_column (name, new_column = nil, &block)
|
503
|
+
name = name.to_s
|
453
504
|
if new_column
|
454
505
|
# do nothing
|
455
506
|
elsif block
|
456
|
-
new_column = instance_exec(&block)
|
507
|
+
new_column = instance_exec(self, &block)
|
457
508
|
else
|
458
509
|
new_column = @column_data.first[1].template(:object)
|
459
510
|
end
|
@@ -463,8 +514,8 @@ class CADataFrame
|
|
463
514
|
if new_column.rank != 1 or new_column.size != @row_number
|
464
515
|
raise "invalid shape of appended column"
|
465
516
|
end
|
466
|
-
@column_names.unshift(name
|
467
|
-
@column_data[name
|
517
|
+
@column_names.unshift(name)
|
518
|
+
@column_data[name] = new_column
|
468
519
|
return new_column
|
469
520
|
end
|
470
521
|
|
@@ -572,6 +623,7 @@ class CADataFrame
|
|
572
623
|
end
|
573
624
|
new_columns = {}
|
574
625
|
names.map(&:to_s).each do |name|
|
626
|
+
raise "unknown column '#{name}'" unless column(name)
|
575
627
|
new_columns[name] = column(name)[row]
|
576
628
|
end
|
577
629
|
return CADataFrame.new(new_columns, index: @row_index ? @row_index[row] : nil)
|
@@ -977,11 +1029,11 @@ end
|
|
977
1029
|
class CADataFrame
|
978
1030
|
|
979
1031
|
def matchup (keyname, reference)
|
980
|
-
key = column(keyname
|
1032
|
+
key = column(keyname)
|
981
1033
|
idx = reference.matchup(key)
|
982
1034
|
new_columns = {}
|
983
1035
|
each_column_name do |name|
|
984
|
-
if name == keyname
|
1036
|
+
if name == keyname.to_s
|
985
1037
|
new_columns[name] = reference
|
986
1038
|
else
|
987
1039
|
new_columns[name] = column(name).project(idx)
|
@@ -992,9 +1044,7 @@ class CADataFrame
|
|
992
1044
|
else
|
993
1045
|
new_row_index = nil
|
994
1046
|
end
|
995
|
-
return CADataFrame.new(new_columns, index: new_row_index)
|
996
|
-
self.send(keyname)[] = reference
|
997
|
-
}
|
1047
|
+
return CADataFrame.new(new_columns, index: new_row_index)
|
998
1048
|
end
|
999
1049
|
|
1000
1050
|
def histogram (name, scale = nil, options = nil)
|
data/lib/carray-dataframe/io.rb
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
#
|
5
5
|
######################################
|
6
6
|
require "spreadsheet"
|
7
|
+
require "carray-io-sqlite3"
|
8
|
+
|
7
9
|
class CArray
|
8
10
|
|
9
11
|
def save_excel (filename, &block)
|
@@ -42,8 +44,8 @@ class CADataFrame
|
|
42
44
|
end
|
43
45
|
end
|
44
46
|
|
45
|
-
def self.
|
46
|
-
df = CArray.
|
47
|
+
def self.read_csv (file, sep: ",", rs: $/, quote_char: '"', encoding: nil, index: nil, &block)
|
48
|
+
df = CArray.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block).to_dataframe(index: index)
|
47
49
|
if df
|
48
50
|
return df.arrange{
|
49
51
|
column_names.each do |name|
|
@@ -55,8 +57,13 @@ class CADataFrame
|
|
55
57
|
end
|
56
58
|
end
|
57
59
|
|
58
|
-
def self.
|
59
|
-
|
60
|
+
def self.load_csv (file, sep: ",", rs: $/, encoding: nil, quote_char: '"', index: nil, &block)
|
61
|
+
warn "CADataFrame.load_csv will be obsolete, use CADataFrame.read_csv"
|
62
|
+
self.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block)
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.parse_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block)
|
66
|
+
df = CArray.parse_csv(file, sep: sep, rs: rs, quote_char: quote_char, &block).to_dataframe(index: index)
|
60
67
|
if df
|
61
68
|
return df.arrange{
|
62
69
|
column_names.each do |name|
|
@@ -68,8 +75,14 @@ class CADataFrame
|
|
68
75
|
end
|
69
76
|
end
|
70
77
|
|
71
|
-
def
|
72
|
-
|
78
|
+
def self.from_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block)
|
79
|
+
warn "CADataFrame.from_csv will be obsolete, use CADataFrame.parse_csv"
|
80
|
+
self.parse_csv(file, sep: sep, rs: rs, index: index, &block)
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
def to_sqlite3 (**args)
|
85
|
+
self.to_ca.to_sqlite3(**args)
|
73
86
|
end
|
74
87
|
|
75
88
|
def to_sql (tablename)
|
@@ -0,0 +1,96 @@
|
|
1
|
+
######################################
|
2
|
+
#
|
3
|
+
# IO methods
|
4
|
+
#
|
5
|
+
######################################
|
6
|
+
require "spreadsheet"
|
7
|
+
class CArray
|
8
|
+
|
9
|
+
def save_excel (filename, &block)
|
10
|
+
if self.rank >= 3
|
11
|
+
raise "too large rank (>2) to write excel file"
|
12
|
+
end
|
13
|
+
book = Spreadsheet::Workbook.new
|
14
|
+
worksheet = book.create_worksheet
|
15
|
+
self.dim0.times do |i|
|
16
|
+
worksheet.row(i).push *self[i,nil]
|
17
|
+
end
|
18
|
+
if block
|
19
|
+
block.call(worksheet)
|
20
|
+
end
|
21
|
+
book.write(filename)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.load_excel (filename, sheet=0)
|
25
|
+
book = Spreadsheet.open(filename)
|
26
|
+
sheet = book.worksheet(sheet)
|
27
|
+
return sheet.map(&:to_a).to_ca
|
28
|
+
end
|
29
|
+
end
|
30
|
+
class CADataFrame
|
31
|
+
|
32
|
+
def self.load_sqlite3 (*args)
|
33
|
+
df = CArray.load_sqlite3(*args).to_dataframe
|
34
|
+
if df
|
35
|
+
return df.arrange{
|
36
|
+
column_names.each do |name|
|
37
|
+
mask name, nil
|
38
|
+
end
|
39
|
+
}
|
40
|
+
else
|
41
|
+
return nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.load_csv (file, sep: ",", rs: $/, encoding: nil, index: nil, &block)
|
46
|
+
df = CArray.load_csv(file, sep: sep, rs: rs, encoding: encoding, &block).to_dataframe(index: index)
|
47
|
+
if df
|
48
|
+
return df.arrange{
|
49
|
+
column_names.each do |name|
|
50
|
+
mask name, nil
|
51
|
+
end
|
52
|
+
}
|
53
|
+
else
|
54
|
+
return nil
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.from_csv (file, sep: ",", rs: $/, index: nil, &block)
|
59
|
+
df = CArray.from_csv(file, sep: sep, rs: rs, &block).to_dataframe(index: index)
|
60
|
+
if df
|
61
|
+
return df.arrange{
|
62
|
+
column_names.each do |name|
|
63
|
+
mask name, nil
|
64
|
+
end
|
65
|
+
}
|
66
|
+
else
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def to_sqlite3 (*args)
|
72
|
+
self.to_ca.to_sqlite3(*args)
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_sql (tablename)
|
76
|
+
if @column_names.any?{ |s| s =~ /[\. \-]/ }
|
77
|
+
columns = {}
|
78
|
+
each_column_name do |name|
|
79
|
+
name2 = name.gsub(/[\. \-]/, '_')
|
80
|
+
columns[name2] = column(name)
|
81
|
+
end
|
82
|
+
df = CADataFrame.new(columns)
|
83
|
+
return df.to_sqlite3(database: ":memory:", table: tablename)
|
84
|
+
else
|
85
|
+
return to_sqlite3(database: ":memory:", table: tablename)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
module SQLite3
|
90
|
+
class Database
|
91
|
+
|
92
|
+
def to_df (expr)
|
93
|
+
return CADataFrame.load_sqlite3 self, expr
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -27,7 +27,9 @@ class CADataFrame
|
|
27
27
|
CADataFrame::Merge.join(self, other_df, opts)
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
30
31
|
class CADataFrame
|
32
|
+
|
31
33
|
class MergeFrame
|
32
34
|
class NilSorter
|
33
35
|
include Comparable
|
@@ -65,7 +67,12 @@ class CADataFrame
|
|
65
67
|
rkey = first_right_key
|
66
68
|
row(lkey, rkey).tap { |r| res << r if r }
|
67
69
|
end
|
68
|
-
CADataFrame.new(res, order: dataframe_vector_names)
|
70
|
+
df = CADataFrame.new(res, order: dataframe_vector_names)
|
71
|
+
if dataframe_vector_names.include?("index")
|
72
|
+
df.set_index("index")
|
73
|
+
else
|
74
|
+
df
|
75
|
+
end
|
69
76
|
end
|
70
77
|
private
|
71
78
|
attr_reader :on, :indicator,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: carray-dataframe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hiroki Motoyoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: carray
|
@@ -25,19 +25,19 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.5'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: caxlsx
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '3.0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '3.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: spreadsheet
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -70,6 +70,7 @@ files:
|
|
70
70
|
- lib/carray-dataframe/group.rb
|
71
71
|
- lib/carray-dataframe/iloc_accessor.rb
|
72
72
|
- lib/carray-dataframe/io.rb
|
73
|
+
- lib/carray-dataframe/io.rb~
|
73
74
|
- lib/carray-dataframe/join.rb
|
74
75
|
- lib/carray-dataframe/loc_accessor.rb
|
75
76
|
- lib/carray-dataframe/pivot.rb
|