carray-dataframe 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/carray-dataframe.gemspec +2 -2
- data/lib/carray-dataframe.rb +1 -0
- data/lib/carray-dataframe/converter.rb +1 -1
- data/lib/carray-dataframe/dataframe.rb +67 -17
- data/lib/carray-dataframe/io.rb +19 -6
- data/lib/carray-dataframe/io.rb~ +96 -0
- data/lib/carray-dataframe/join.rb +8 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 00a3f769c9096380db116cb74660aebc6451f5508029c8e97d720ed254b9847b
|
4
|
+
data.tar.gz: 6e810823d0ee16ef2a7a60295b983ffdb46ccb83591c49b19c0285a49c14d4cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14880525aec99af0ba133d08eb19ec0715f2be56777593cdccb21f7e54ab4c0c51335ffaf557f86621f0eda815e6859dc4bc18465f7f74355f3f7365c7abfadd
|
7
|
+
data.tar.gz: '028c8109f1ada33196663884043c44134491ce58649a5e9341f91e616f9edbb113fbc202965782bf762bf398d483ce12fdd18cf0d511cce03237f4ad174cb612'
|
data/carray-dataframe.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
|
2
2
|
Gem::Specification::new do |s|
|
3
|
-
version = "1.
|
3
|
+
version = "1.2.0"
|
4
4
|
|
5
5
|
files = Dir.glob("**/*") - [
|
6
6
|
Dir.glob("carray-dataframe*.gem"),
|
@@ -22,6 +22,6 @@ Gem::Specification::new do |s|
|
|
22
22
|
s.files = files
|
23
23
|
s.required_ruby_version = ">= 1.8.1"
|
24
24
|
s.add_runtime_dependency 'carray', '~> 1.5'
|
25
|
-
s.add_runtime_dependency '
|
25
|
+
s.add_runtime_dependency 'caxlsx', '~> 3.0'
|
26
26
|
s.add_runtime_dependency 'spreadsheet', '~> 1.1'
|
27
27
|
end
|
data/lib/carray-dataframe.rb
CHANGED
@@ -28,7 +28,11 @@ class CADataFrame
|
|
28
28
|
@column_names = columns.map(&:to_s)
|
29
29
|
else
|
30
30
|
if data.respond_to?(:column_names)
|
31
|
-
|
31
|
+
if data.column_names.is_a?(Array)
|
32
|
+
@column_names = data.column_names.map(&:to_s)
|
33
|
+
else
|
34
|
+
@column_names = data.dim1.times.map{|i| "c#{i}" }
|
35
|
+
end
|
32
36
|
elsif order
|
33
37
|
@column_names = order.map(&:to_s)
|
34
38
|
else
|
@@ -88,9 +92,13 @@ class CADataFrame
|
|
88
92
|
end
|
89
93
|
|
90
94
|
# Sets @row_number and check column length
|
91
|
-
|
92
|
-
|
93
|
-
|
95
|
+
if @column_data.empty? and index
|
96
|
+
@row_number = index.size
|
97
|
+
else
|
98
|
+
@row_number = @column_data.first[1].size
|
99
|
+
if @column_names.any?{ |key| @column_data[key].size != @row_number }
|
100
|
+
raise "column sizes mismatch"
|
101
|
+
end
|
94
102
|
end
|
95
103
|
|
96
104
|
# Processing option 'index'
|
@@ -206,6 +214,10 @@ class CADataFrame
|
|
206
214
|
end
|
207
215
|
end
|
208
216
|
|
217
|
+
def has_index?
|
218
|
+
@row_index ? true : false
|
219
|
+
end
|
220
|
+
|
209
221
|
def replace (other)
|
210
222
|
@column_names = other.column_names
|
211
223
|
@column_data = other.column_data
|
@@ -215,8 +227,13 @@ class CADataFrame
|
|
215
227
|
return self
|
216
228
|
end
|
217
229
|
|
218
|
-
def has_column?(name)
|
219
|
-
|
230
|
+
def has_column? (name)
|
231
|
+
case name.to_s
|
232
|
+
when "index"
|
233
|
+
return has_index?
|
234
|
+
else
|
235
|
+
return @column_names.include?(name)
|
236
|
+
end
|
220
237
|
end
|
221
238
|
|
222
239
|
def column_types
|
@@ -239,6 +256,18 @@ class CADataFrame
|
|
239
256
|
end
|
240
257
|
alias col column
|
241
258
|
|
259
|
+
def set_column (spec, col)
|
260
|
+
case spec
|
261
|
+
when Integer
|
262
|
+
return @column_data[@column_names[spec]] = col
|
263
|
+
when String, Symbol
|
264
|
+
return @column_data[spec.to_s] = col
|
265
|
+
else
|
266
|
+
raise "invalid column specifier"
|
267
|
+
end
|
268
|
+
end
|
269
|
+
private set_column
|
270
|
+
|
242
271
|
def loc
|
243
272
|
@loc ||= CADataFrame::LocAccessor.new(self)
|
244
273
|
return @loc
|
@@ -429,10 +458,11 @@ class CADataFrame
|
|
429
458
|
end
|
430
459
|
|
431
460
|
def append_column (name, new_column = nil, &block)
|
461
|
+
name = name.to_s
|
432
462
|
if new_column
|
433
463
|
# do nothing
|
434
464
|
elsif block
|
435
|
-
new_column = instance_exec(&block)
|
465
|
+
new_column = instance_exec(self, &block)
|
436
466
|
else
|
437
467
|
new_column = @column_data.first[1].template(:object)
|
438
468
|
end
|
@@ -442,18 +472,39 @@ class CADataFrame
|
|
442
472
|
if new_column.rank != 1 or new_column.size != @row_number
|
443
473
|
raise "invalid shape of appended column"
|
444
474
|
end
|
445
|
-
@column_names.push(name
|
446
|
-
@column_data[name
|
475
|
+
@column_names.push(name)
|
476
|
+
@column_data[name] = new_column
|
477
|
+
return new_column
|
478
|
+
end
|
479
|
+
|
480
|
+
def insert_column (pos, name, new_column = nil, &block)
|
481
|
+
name = name.to_s
|
482
|
+
if new_column
|
483
|
+
# do nothing
|
484
|
+
elsif block
|
485
|
+
new_column = instance_exec(self, &block)
|
486
|
+
else
|
487
|
+
new_column = @column_data.first[1].template(:object)
|
488
|
+
end
|
489
|
+
unless new_column.is_a?(CArray)
|
490
|
+
new_column = new_column.to_ca
|
491
|
+
end
|
492
|
+
if new_column.rank != 1 or new_column.size != @row_number
|
493
|
+
raise "invalid shape of appended column"
|
494
|
+
end
|
495
|
+
@column_names.insert(pos, name)
|
496
|
+
@column_data[name] = new_column
|
447
497
|
return new_column
|
448
498
|
end
|
449
499
|
|
450
500
|
alias append append_column
|
451
501
|
|
452
502
|
def prepend_column (name, new_column = nil, &block)
|
503
|
+
name = name.to_s
|
453
504
|
if new_column
|
454
505
|
# do nothing
|
455
506
|
elsif block
|
456
|
-
new_column = instance_exec(&block)
|
507
|
+
new_column = instance_exec(self, &block)
|
457
508
|
else
|
458
509
|
new_column = @column_data.first[1].template(:object)
|
459
510
|
end
|
@@ -463,8 +514,8 @@ class CADataFrame
|
|
463
514
|
if new_column.rank != 1 or new_column.size != @row_number
|
464
515
|
raise "invalid shape of appended column"
|
465
516
|
end
|
466
|
-
@column_names.unshift(name
|
467
|
-
@column_data[name
|
517
|
+
@column_names.unshift(name)
|
518
|
+
@column_data[name] = new_column
|
468
519
|
return new_column
|
469
520
|
end
|
470
521
|
|
@@ -572,6 +623,7 @@ class CADataFrame
|
|
572
623
|
end
|
573
624
|
new_columns = {}
|
574
625
|
names.map(&:to_s).each do |name|
|
626
|
+
raise "unknown column '#{name}'" unless column(name)
|
575
627
|
new_columns[name] = column(name)[row]
|
576
628
|
end
|
577
629
|
return CADataFrame.new(new_columns, index: @row_index ? @row_index[row] : nil)
|
@@ -977,11 +1029,11 @@ end
|
|
977
1029
|
class CADataFrame
|
978
1030
|
|
979
1031
|
def matchup (keyname, reference)
|
980
|
-
key = column(keyname
|
1032
|
+
key = column(keyname)
|
981
1033
|
idx = reference.matchup(key)
|
982
1034
|
new_columns = {}
|
983
1035
|
each_column_name do |name|
|
984
|
-
if name == keyname
|
1036
|
+
if name == keyname.to_s
|
985
1037
|
new_columns[name] = reference
|
986
1038
|
else
|
987
1039
|
new_columns[name] = column(name).project(idx)
|
@@ -992,9 +1044,7 @@ class CADataFrame
|
|
992
1044
|
else
|
993
1045
|
new_row_index = nil
|
994
1046
|
end
|
995
|
-
return CADataFrame.new(new_columns, index: new_row_index)
|
996
|
-
self.send(keyname)[] = reference
|
997
|
-
}
|
1047
|
+
return CADataFrame.new(new_columns, index: new_row_index)
|
998
1048
|
end
|
999
1049
|
|
1000
1050
|
def histogram (name, scale = nil, options = nil)
|
data/lib/carray-dataframe/io.rb
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
#
|
5
5
|
######################################
|
6
6
|
require "spreadsheet"
|
7
|
+
require "carray-io-sqlite3"
|
8
|
+
|
7
9
|
class CArray
|
8
10
|
|
9
11
|
def save_excel (filename, &block)
|
@@ -42,8 +44,8 @@ class CADataFrame
|
|
42
44
|
end
|
43
45
|
end
|
44
46
|
|
45
|
-
def self.
|
46
|
-
df = CArray.
|
47
|
+
def self.read_csv (file, sep: ",", rs: $/, quote_char: '"', encoding: nil, index: nil, &block)
|
48
|
+
df = CArray.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block).to_dataframe(index: index)
|
47
49
|
if df
|
48
50
|
return df.arrange{
|
49
51
|
column_names.each do |name|
|
@@ -55,8 +57,13 @@ class CADataFrame
|
|
55
57
|
end
|
56
58
|
end
|
57
59
|
|
58
|
-
def self.
|
59
|
-
|
60
|
+
def self.load_csv (file, sep: ",", rs: $/, encoding: nil, quote_char: '"', index: nil, &block)
|
61
|
+
warn "CADataFrame.load_csv will be obsolete, use CADataFrame.read_csv"
|
62
|
+
self.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block)
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.parse_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block)
|
66
|
+
df = CArray.parse_csv(file, sep: sep, rs: rs, quote_char: quote_char, &block).to_dataframe(index: index)
|
60
67
|
if df
|
61
68
|
return df.arrange{
|
62
69
|
column_names.each do |name|
|
@@ -68,8 +75,14 @@ class CADataFrame
|
|
68
75
|
end
|
69
76
|
end
|
70
77
|
|
71
|
-
def
|
72
|
-
|
78
|
+
def self.from_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block)
|
79
|
+
warn "CADataFrame.from_csv will be obsolete, use CADataFrame.parse_csv"
|
80
|
+
self.parse_csv(file, sep: sep, rs: rs, index: index, &block)
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
def to_sqlite3 (**args)
|
85
|
+
self.to_ca.to_sqlite3(**args)
|
73
86
|
end
|
74
87
|
|
75
88
|
def to_sql (tablename)
|
@@ -0,0 +1,96 @@
|
|
1
|
+
######################################
|
2
|
+
#
|
3
|
+
# IO methods
|
4
|
+
#
|
5
|
+
######################################
|
6
|
+
require "spreadsheet"
|
7
|
+
class CArray
|
8
|
+
|
9
|
+
def save_excel (filename, &block)
|
10
|
+
if self.rank >= 3
|
11
|
+
raise "too large rank (>2) to write excel file"
|
12
|
+
end
|
13
|
+
book = Spreadsheet::Workbook.new
|
14
|
+
worksheet = book.create_worksheet
|
15
|
+
self.dim0.times do |i|
|
16
|
+
worksheet.row(i).push *self[i,nil]
|
17
|
+
end
|
18
|
+
if block
|
19
|
+
block.call(worksheet)
|
20
|
+
end
|
21
|
+
book.write(filename)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.load_excel (filename, sheet=0)
|
25
|
+
book = Spreadsheet.open(filename)
|
26
|
+
sheet = book.worksheet(sheet)
|
27
|
+
return sheet.map(&:to_a).to_ca
|
28
|
+
end
|
29
|
+
end
|
30
|
+
class CADataFrame
|
31
|
+
|
32
|
+
def self.load_sqlite3 (*args)
|
33
|
+
df = CArray.load_sqlite3(*args).to_dataframe
|
34
|
+
if df
|
35
|
+
return df.arrange{
|
36
|
+
column_names.each do |name|
|
37
|
+
mask name, nil
|
38
|
+
end
|
39
|
+
}
|
40
|
+
else
|
41
|
+
return nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.load_csv (file, sep: ",", rs: $/, encoding: nil, index: nil, &block)
|
46
|
+
df = CArray.load_csv(file, sep: sep, rs: rs, encoding: encoding, &block).to_dataframe(index: index)
|
47
|
+
if df
|
48
|
+
return df.arrange{
|
49
|
+
column_names.each do |name|
|
50
|
+
mask name, nil
|
51
|
+
end
|
52
|
+
}
|
53
|
+
else
|
54
|
+
return nil
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.from_csv (file, sep: ",", rs: $/, index: nil, &block)
|
59
|
+
df = CArray.from_csv(file, sep: sep, rs: rs, &block).to_dataframe(index: index)
|
60
|
+
if df
|
61
|
+
return df.arrange{
|
62
|
+
column_names.each do |name|
|
63
|
+
mask name, nil
|
64
|
+
end
|
65
|
+
}
|
66
|
+
else
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def to_sqlite3 (*args)
|
72
|
+
self.to_ca.to_sqlite3(*args)
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_sql (tablename)
|
76
|
+
if @column_names.any?{ |s| s =~ /[\. \-]/ }
|
77
|
+
columns = {}
|
78
|
+
each_column_name do |name|
|
79
|
+
name2 = name.gsub(/[\. \-]/, '_')
|
80
|
+
columns[name2] = column(name)
|
81
|
+
end
|
82
|
+
df = CADataFrame.new(columns)
|
83
|
+
return df.to_sqlite3(database: ":memory:", table: tablename)
|
84
|
+
else
|
85
|
+
return to_sqlite3(database: ":memory:", table: tablename)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
module SQLite3
|
90
|
+
class Database
|
91
|
+
|
92
|
+
def to_df (expr)
|
93
|
+
return CADataFrame.load_sqlite3 self, expr
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -27,7 +27,9 @@ class CADataFrame
|
|
27
27
|
CADataFrame::Merge.join(self, other_df, opts)
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
30
31
|
class CADataFrame
|
32
|
+
|
31
33
|
class MergeFrame
|
32
34
|
class NilSorter
|
33
35
|
include Comparable
|
@@ -65,7 +67,12 @@ class CADataFrame
|
|
65
67
|
rkey = first_right_key
|
66
68
|
row(lkey, rkey).tap { |r| res << r if r }
|
67
69
|
end
|
68
|
-
CADataFrame.new(res, order: dataframe_vector_names)
|
70
|
+
df = CADataFrame.new(res, order: dataframe_vector_names)
|
71
|
+
if dataframe_vector_names.include?("index")
|
72
|
+
df.set_index("index")
|
73
|
+
else
|
74
|
+
df
|
75
|
+
end
|
69
76
|
end
|
70
77
|
private
|
71
78
|
attr_reader :on, :indicator,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: carray-dataframe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hiroki Motoyoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: carray
|
@@ -25,19 +25,19 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.5'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: caxlsx
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '3.0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '3.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: spreadsheet
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -70,6 +70,7 @@ files:
|
|
70
70
|
- lib/carray-dataframe/group.rb
|
71
71
|
- lib/carray-dataframe/iloc_accessor.rb
|
72
72
|
- lib/carray-dataframe/io.rb
|
73
|
+
- lib/carray-dataframe/io.rb~
|
73
74
|
- lib/carray-dataframe/join.rb
|
74
75
|
- lib/carray-dataframe/loc_accessor.rb
|
75
76
|
- lib/carray-dataframe/pivot.rb
|