carray-dataframe 1.0.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,145 @@
1
+ class CADataFrame
2
+
3
+ class LocAccessor
4
+
5
+ def initialize (dataframe)
6
+ @dataframe = dataframe
7
+ end
8
+
9
+ def [] (*argv)
10
+ @dataframe.instance_eval {
11
+ index = argv.first
12
+ case index
13
+ when nil
14
+ when CArray
15
+ when Integer ### df[3]
16
+ index = [index]
17
+ when Range ### df["a".."d"]
18
+ if @row_index
19
+ idx1 = @row_index.search(index.begin)
20
+ idx2 = @row_index.search(index.end)
21
+ if idx1 and idx2
22
+ index = idx1..idx2
23
+ elsif index.begin.is_a?(Integer) || index.end.is_a?(Integer)
24
+ iloc[*argv]
25
+ else
26
+ raise "invalid index '#{index}'"
27
+ end
28
+ end
29
+ when Array
30
+ if index[0].is_a?(Range)
31
+ if @row_index
32
+ range = index[0]
33
+ idx1 = @row_index.search(range.begin)
34
+ idx2 = @row_index.search(range.end)
35
+ if idx1 and idx2
36
+ range = idx1..idx2
37
+ elsif range.begin.is_a?(Integer) || range.end.is_a?(Integer)
38
+ else
39
+ raise "invalid index '#{index}'"
40
+ end
41
+ index[0] = range
42
+ end
43
+ end
44
+ else
45
+ if @row_index
46
+ idx1 = @row_index.search(index)
47
+ if idx1
48
+ index = [idx1]
49
+ else
50
+ raise "invalid index '#{index}'"
51
+ end
52
+ end
53
+ end
54
+ column_selector = select_columns(argv[1])
55
+ columns = {}
56
+ column_selector.each do |name|
57
+ columns[name] = @column_data[name][index] ### df[...]
58
+ end
59
+ return CADataFrame.new(columns, index: @row_index ? @row_index[index] : nil)
60
+ }
61
+ end
62
+
63
+ def []= (*argv)
64
+ value = argv.pop
65
+ @dataframe.instance_eval {
66
+ index = argv.first
67
+ case index
68
+ when nil
69
+ when CArray
70
+ when Range ### df["a".."d"] = value
71
+ if @row_index
72
+ idx1 = @row_index.search(index.begin)
73
+ idx2 = @row_index.search(index.end)
74
+ if idx1 and idx2
75
+ index = idx1..idx2
76
+ elsif index.begin.is_a?(Integer) || index.end.is_a?(Integer)
77
+ iloc[*argv] = value
78
+ return
79
+ else
80
+ raise "invalid index '#{index}'"
81
+ end
82
+ end
83
+ when Array
84
+ if index[0].is_a?(Range)
85
+ if @row_index
86
+ range = index[0]
87
+ idx1 = @row_index.search(range.begin)
88
+ idx2 = @row_index.search(range.end)
89
+ if idx1 and idx2
90
+ range = idx1..idx2
91
+ elsif range.begin.is_a?(Integer) || range.end.is_a?(Integer)
92
+ else
93
+ raise "invalid index '#{index}'"
94
+ end
95
+ index[0] = range
96
+ end
97
+ end
98
+ else
99
+ if @row_index
100
+ idx1 = @row_index.search(index)
101
+ if idx1
102
+ index = [idx1]
103
+ else
104
+ raise "invalid index '#{index}'"
105
+ end
106
+ end
107
+ end
108
+ column_selector = select_columns(argv[1])
109
+ case value
110
+ when Hash ### value = {"a"=> [1,2,3], ... }
111
+ value = value.map{|k,v| [k.to_s, v]}.to_h
112
+ column_selector.each do |name|
113
+ @column_data[name][index] = value[name]
114
+ end
115
+ when Array
116
+ case value.first
117
+ when Hash ### value = [{"a"=>1,"b"=>11}, {"a"=>2,""=>12} ...]
118
+ table = {}
119
+ column_selector.each do |name|
120
+ table[name] = []
121
+ end
122
+ value.each do |hash|
123
+ hash = hash.map{|k,v| [k.to_s, v]}.to_h
124
+ column_selector.each do |name|
125
+ table[name] << hash[name]
126
+ end
127
+ end
128
+ column_selector.each do |name|
129
+ @column_data[name][index] = table[name]
130
+ end
131
+ else ### value = [[1,11],[2,12],...]
132
+ value = value.transpose
133
+ column_selector.each_with_index do |name, k|
134
+ @column_data[name][index] = value[k]
135
+ end
136
+ end
137
+ else ### value = any value
138
+ column_selector.each_with_index do |name, k|
139
+ @column_data[name][index] = value
140
+ end
141
+ end
142
+ }
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,54 @@
1
+ #############################################################
2
+ #
3
+ # PIVOT TABLE
4
+ #
5
+ #############################################################
6
+ class CADataFrame
7
+
8
+ def pivot (name1, name2)
9
+ return CADataFramePivot.new(self, name1, name2)
10
+ end
11
+ end
12
+ class CADataFramePivot
13
+
14
+ def initialize (dataframe, name1, name2)
15
+ @dataframe = dataframe
16
+ case name1
17
+ when Hash
18
+ name1, list = name1.first
19
+ @column1 = @dataframe.col(name1)
20
+ @keys1 = list.to_ca
21
+ else
22
+ @column1 = @dataframe.col(name1)
23
+ @keys1 = @column1.uniq.sort
24
+ end
25
+ case name2
26
+ when Hash
27
+ name2, list = name2.first
28
+ @column2 = @dataframe.col(name2)
29
+ @keys2 = list
30
+ else
31
+ @column2 = @dataframe.col(name2)
32
+ @keys2 = @column2.uniq.sort
33
+ end
34
+ @addrs = {}
35
+ @keys1.each do |k1|
36
+ @keys2.each do |k2|
37
+ @addrs[[k1,k2]] = (@column1.eq(k1) & @column2.eq(k2)).where
38
+ end
39
+ end
40
+ end
41
+
42
+ def table (&block)
43
+ columns = {}
44
+ @keys2.each do |k2|
45
+ columns[k2] = CArray.object(@keys1.size) { UNDEF }
46
+ end
47
+ @keys1.each_with_index do |k1, i|
48
+ @keys2.each do |k2|
49
+ columns[k2][i] = block.call(@dataframe[@addrs[[k1,k2]]])
50
+ end
51
+ end
52
+ return CADataFrame.new(columns, index: @keys1)
53
+ end
54
+ end
@@ -0,0 +1,142 @@
1
+
2
+ class CADataFrame
3
+
4
+ def select_columns (selector = nil)
5
+ case selector
6
+ when nil ### all
7
+ return @column_names
8
+ when Integer
9
+ name = @column_names[selector]
10
+ raise "invalid column index" unless selector
11
+ return [name]
12
+ when String, Symbol ### "AAA"
13
+ if @column_names.include?(selector.to_s)
14
+ return [selector.to_s]
15
+ else
16
+ raise "invalid column specified #{selector}"
17
+ end
18
+ when Array ### ["AAA", "BBB"]
19
+ if selector.size == 1 && selector.first.is_a?(Hash)
20
+ return select_columns(selector.first)
21
+ else
22
+ selector.each do |name|
23
+ unless @column_names.include?(name.to_s)
24
+ raise "invalid column specified #{name}"
25
+ end
26
+ end
27
+ return selector.map(&:to_s)
28
+ end
29
+ when Range ### "AAA".."BBB", 0..1
30
+ case selector.begin
31
+ when nil
32
+ idx1 = 0
33
+ when Integer
34
+ idx1 = selector.begin
35
+ when String, Symbol
36
+ idx1 = @column_names.search(selector.begin.to_s)
37
+ raise "can't find column #{selector.begin}" unless idx1
38
+ else
39
+ raise "invalid column specified #{selector.begin}"
40
+ end
41
+ if selector.exclude_end?
42
+ case selector.end
43
+ when nil
44
+ idx2 = -2
45
+ when Integer
46
+ idx2 = selector.end - 1
47
+ when String
48
+ idx2 = @column_names.search(selector.end.to_s)
49
+ raise "can't find column #{selector.end}" unless idx2
50
+ idx2 = idx2 - 1
51
+ else
52
+ raise "invalid column specified #{selector.end}"
53
+ end
54
+ else
55
+ case selector.end
56
+ when nil
57
+ idx2 = -1
58
+ when Integer
59
+ idx2 = selector.end
60
+ when String
61
+ idx2 = @column_names.search(selector.end.to_s)
62
+ raise "can't find column #{selector.end}" unless idx2
63
+ else
64
+ raise "invalid column specified #{selector.end}"
65
+ end
66
+ end
67
+ return @column_names[idx1..idx2]
68
+ else
69
+ raise "invalid column selector #{selector}"
70
+ end
71
+ end
72
+
73
+ def [] (arg, opt = :__dummy__)
74
+ if opt != :__dummy__
75
+ return loc[arg, opt]
76
+ else
77
+ case arg
78
+ when Range
79
+ if arg.begin.is_a?(Integer)
80
+ return iloc[arg] unless @row_index
81
+ end
82
+ return loc[arg]
83
+ when CArray
84
+ if arg.rank == 1
85
+ return loc[arg]
86
+ else
87
+ raise "index should be 1-dim array"
88
+ end
89
+ when String, Symbol
90
+ return column(arg.to_s)
91
+ else
92
+ column_selector = select_columns(arg)
93
+ new_columns = {}
94
+ column_selector.each do |key|
95
+ new_columns[key] = @column_data[key]
96
+ end
97
+ return CADataFrame.new(new_columns, index: @row_index)
98
+ end
99
+ end
100
+ end
101
+
102
+ def []= (arg, opt = :__dummy__, value)
103
+ if opt != :__dummy__
104
+ loc[arg, opt] = value
105
+ else
106
+ case arg
107
+ when Range
108
+ if arg.begin.is_a?(Integer)
109
+ iloc[arg] = value unless @row_index
110
+ end
111
+ loc[arg] = value
112
+ when CArray
113
+ loc[arg] = value
114
+ when String, Symbol
115
+ if column(arg.to_s)
116
+ column(arg.to_s)[] = value
117
+ else
118
+ arrange {
119
+ append arg, value
120
+ }
121
+ end
122
+ else
123
+ case value
124
+ when CADataFrame
125
+ column_selector = select_columns(arg)
126
+ values = column_selector.each_index.map { |i|
127
+ value.column(i).to_ca
128
+ }
129
+ column_selector.each_with_index do |key, i|
130
+ column(key)[] = values[i]
131
+ end
132
+ else
133
+ column_selector = select_columns(arg)
134
+ column_selector.each do |key|
135
+ column(key)[] = value
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ end
@@ -0,0 +1,102 @@
1
+ class CADataFrame
2
+
3
+ def to_html (threshold = 8, time_format: nil, index: true)
4
+ columns = @column_data.clone
5
+ @column_names.each do |name|
6
+ if columns[name].is_a?(CATimeIndex)
7
+ if time_format
8
+ columns[name] = columns[name].time.time_strftime(time_format)
9
+ else
10
+ columns[name] = columns[name].time.time_format("%F %T%:z")
11
+ end
12
+ end
13
+ end
14
+ if index
15
+ if @row_index
16
+ namelist = [" "] + @column_names
17
+ if @row_index.is_a?(CATimeIndex)
18
+ if time_format
19
+ row_index = @row_index.time.time_strftime(time_format)
20
+ else
21
+ row_index = @row_index.time.time_format("%F %T%:z")
22
+ end
23
+ else
24
+ row_index = @row_index
25
+ end
26
+ tbl = CADFArray.new(namelist, columns.update(" " => row_index))
27
+ else
28
+ namelist = [" "] + @column_names
29
+ tbl = CADFArray.new(namelist, columns.update(" " => CArray.int(@row_number).seq))
30
+ end
31
+ else
32
+ namelist = @column_names
33
+ tbl = CADFArray.new(namelist, columns)
34
+ end
35
+ if threshold.is_a?(Integer) and @row_number > threshold
36
+ list = tbl[0..(threshold/2),nil].to_a
37
+ list.push namelist.map { "..." }
38
+ list.push *(tbl[-threshold/2+1..-1,nil].to_a)
39
+ tbl = list.to_ca
40
+ end
41
+ datastr = tbl.convert {|c| __obj_to_string__(c) }.unmask("")
42
+ datamb = datastr.convert(:boolean, &:"ascii_only?").not.sum(0).ne(0)
43
+ namemb = namelist.to_ca.convert(:boolean) {|c| c.to_s.ascii_only? }.eq(0)
44
+ mb = datamb.or(namemb)
45
+ namelen = namelist.map(&:length).to_ca
46
+ datalen = datastr.convert(&:length)
47
+ if mb.max == 0
48
+ if datalen.size == 0
49
+ lengths = namelen.to_a
50
+ else
51
+ lengths = datalen.max(0).pmax(namelen).to_a
52
+ end
53
+ table_in = "<table>"
54
+ header = "<thead><tr>" +
55
+ [namelist, lengths].transpose.map{|name, len|
56
+ "<th>#{name.to_s.ljust(len)}</th>"
57
+ }.join() + "</tr></thead>"
58
+ body_in = "<tbody>"
59
+ ary = [table_in, header, body_in]
60
+ if datalen.size > 0
61
+ datastr[:i,nil].each_with_index do |blk, i|
62
+ list = blk.flatten.to_a
63
+ ary << "<tr>" + [list, lengths].transpose.map {|value, len|
64
+ "<td>#{value.ljust(len)}</td>"
65
+ }.join() + "</tr>"
66
+ end
67
+ end
68
+ ary << "</tbody>"
69
+ ary << "</table>"
70
+ return "DataFrame: rows#=#{@row_number}: \n" + ary.join("\n")
71
+ else
72
+ namewidth = namelist.to_ca.convert{|c| __strwidth__(c.to_s) }
73
+ if datalen.size == 0
74
+ maxwidth = namewidth
75
+ else
76
+ datawidth = datastr.convert{|c| __strwidth__(c.to_s) }
77
+ maxwidth = datawidth.max(0).pmax(namewidth)
78
+ end
79
+ len = maxwidth[:*,nil] - datawidth + datalen
80
+ table_in = "<table>"
81
+ header = "<thead><tr>" +
82
+ [namelist, maxwidth.to_a].transpose.map{|name, len|
83
+ "<th>#{name.to_s.ljust(len-__strwidth__(name.to_s)+name.to_s.length)}</th>"
84
+ }.join() + "</tr></thead>"
85
+ body_in = "<tbody>"
86
+ ary = [table_in, header, body_in]
87
+ if datalen.size > 0
88
+ datastr[:i,nil].each_with_addr do |blk, i|
89
+ list = blk.flatten.to_a
90
+ ary << "<tr>" + list.map.with_index {|value, j|
91
+
92
+ "<td>#{value.ljust(len[i,j])}</td>"
93
+ }.join() + "</tr>"
94
+ end
95
+ end
96
+ ary << "</tbody>"
97
+ ary << "</table>"
98
+ return "DataFrame: row#=#{@row_number}: \n" + ary.join("\n")
99
+ end
100
+ end
101
+
102
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: carray-dataframe
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hiroki Motoyoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-02 00:00:00.000000000 Z
11
+ date: 2020-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: carray
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.1'
19
+ version: '1.5'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.1'
26
+ version: '1.5'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: axlsx
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -52,7 +52,7 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.1'
55
- description: " Extension for realizing DataFrame of R in Ruby\n"
55
+ description: " DataFrame in Ruby\n"
56
56
  email: ''
57
57
  executables: []
58
58
  extensions: []
@@ -60,28 +60,24 @@ extra_rdoc_files: []
60
60
  files:
61
61
  - API.txt
62
62
  - README.md
63
+ - Rakefile
63
64
  - carray-dataframe.gemspec
64
- - examples/R/fit.rb
65
- - examples/R/iris.rb
66
- - examples/R/japan_area.rb
67
- - examples/R/kyaku.rb
68
- - examples/group_by.rb
69
- - examples/hist.rb
70
- - examples/iris.rb
71
- - examples/map.rb
72
- - examples/match.rb
73
- - examples/test.xlsx
74
- - examples/test1.rb
75
- - examples/test2.rb
76
- - examples/test3.db
77
- - examples/test3.rb
78
- - examples/test3.xlsx
79
- - examples/to_excel.rb
80
- - lib/R.rb
81
- - lib/carray/autoload/autoload_dataframe_dataframe.rb
82
- - lib/carray/dataframe/dataframe.rb
65
+ - lib/carray-dataframe.rb
66
+ - lib/carray-dataframe/arranger.rb
67
+ - lib/carray-dataframe/cadf_array.rb
68
+ - lib/carray-dataframe/converter.rb
69
+ - lib/carray-dataframe/dataframe.rb
70
+ - lib/carray-dataframe/group.rb
71
+ - lib/carray-dataframe/iloc_accessor.rb
72
+ - lib/carray-dataframe/io.rb
73
+ - lib/carray-dataframe/join.rb
74
+ - lib/carray-dataframe/loc_accessor.rb
75
+ - lib/carray-dataframe/pivot.rb
76
+ - lib/carray-dataframe/reference.rb
77
+ - lib/carray-dataframe/to_html.rb
83
78
  homepage: https://github.com/himotoyoshi/carray-dataframe
84
- licenses: []
79
+ licenses:
80
+ - MIT
85
81
  metadata: {}
86
82
  post_install_message:
87
83
  rdoc_options: []
@@ -98,9 +94,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
98
94
  - !ruby/object:Gem::Version
99
95
  version: '0'
100
96
  requirements: []
101
- rubyforge_project:
102
- rubygems_version: 2.6.13
97
+ rubygems_version: 3.1.2
103
98
  signing_key:
104
99
  specification_version: 4
105
- summary: Extension for realizing DataFrame of R in Ruby
100
+ summary: DataFrame in Ruby
106
101
  test_files: []