carray-dataframe 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,145 @@
1
+ class CADataFrame
2
+
3
+ class LocAccessor
4
+
5
+ def initialize (dataframe)
6
+ @dataframe = dataframe
7
+ end
8
+
9
+ def [] (*argv)
10
+ @dataframe.instance_eval {
11
+ index = argv.first
12
+ case index
13
+ when nil
14
+ when CArray
15
+ when Integer ### df[3]
16
+ index = [index]
17
+ when Range ### df["a".."d"]
18
+ if @row_index
19
+ idx1 = @row_index.search(index.begin)
20
+ idx2 = @row_index.search(index.end)
21
+ if idx1 and idx2
22
+ index = idx1..idx2
23
+ elsif index.begin.is_a?(Integer) || index.end.is_a?(Integer)
24
+ iloc[*argv]
25
+ else
26
+ raise "invalid index '#{index}'"
27
+ end
28
+ end
29
+ when Array
30
+ if index[0].is_a?(Range)
31
+ if @row_index
32
+ range = index[0]
33
+ idx1 = @row_index.search(range.begin)
34
+ idx2 = @row_index.search(range.end)
35
+ if idx1 and idx2
36
+ range = idx1..idx2
37
+ elsif range.begin.is_a?(Integer) || range.end.is_a?(Integer)
38
+ else
39
+ raise "invalid index '#{index}'"
40
+ end
41
+ index[0] = range
42
+ end
43
+ end
44
+ else
45
+ if @row_index
46
+ idx1 = @row_index.search(index)
47
+ if idx1
48
+ index = [idx1]
49
+ else
50
+ raise "invalid index '#{index}'"
51
+ end
52
+ end
53
+ end
54
+ column_selector = select_columns(argv[1])
55
+ columns = {}
56
+ column_selector.each do |name|
57
+ columns[name] = @column_data[name][index] ### df[...]
58
+ end
59
+ return CADataFrame.new(columns, index: @row_index ? @row_index[index] : nil)
60
+ }
61
+ end
62
+
63
+ def []= (*argv)
64
+ value = argv.pop
65
+ @dataframe.instance_eval {
66
+ index = argv.first
67
+ case index
68
+ when nil
69
+ when CArray
70
+ when Range ### df["a".."d"] = value
71
+ if @row_index
72
+ idx1 = @row_index.search(index.begin)
73
+ idx2 = @row_index.search(index.end)
74
+ if idx1 and idx2
75
+ index = idx1..idx2
76
+ elsif index.begin.is_a?(Integer) || index.end.is_a?(Integer)
77
+ iloc[*argv] = value
78
+ return
79
+ else
80
+ raise "invalid index '#{index}'"
81
+ end
82
+ end
83
+ when Array
84
+ if index[0].is_a?(Range)
85
+ if @row_index
86
+ range = index[0]
87
+ idx1 = @row_index.search(range.begin)
88
+ idx2 = @row_index.search(range.end)
89
+ if idx1 and idx2
90
+ range = idx1..idx2
91
+ elsif range.begin.is_a?(Integer) || range.end.is_a?(Integer)
92
+ else
93
+ raise "invalid index '#{index}'"
94
+ end
95
+ index[0] = range
96
+ end
97
+ end
98
+ else
99
+ if @row_index
100
+ idx1 = @row_index.search(index)
101
+ if idx1
102
+ index = [idx1]
103
+ else
104
+ raise "invalid index '#{index}'"
105
+ end
106
+ end
107
+ end
108
+ column_selector = select_columns(argv[1])
109
+ case value
110
+ when Hash ### value = {"a"=> [1,2,3], ... }
111
+ value = value.map{|k,v| [k.to_s, v]}.to_h
112
+ column_selector.each do |name|
113
+ @column_data[name][index] = value[name]
114
+ end
115
+ when Array
116
+ case value.first
117
+ when Hash ### value = [{"a"=>1,"b"=>11}, {"a"=>2,""=>12} ...]
118
+ table = {}
119
+ column_selector.each do |name|
120
+ table[name] = []
121
+ end
122
+ value.each do |hash|
123
+ hash = hash.map{|k,v| [k.to_s, v]}.to_h
124
+ column_selector.each do |name|
125
+ table[name] << hash[name]
126
+ end
127
+ end
128
+ column_selector.each do |name|
129
+ @column_data[name][index] = table[name]
130
+ end
131
+ else ### value = [[1,11],[2,12],...]
132
+ value = value.transpose
133
+ column_selector.each_with_index do |name, k|
134
+ @column_data[name][index] = value[k]
135
+ end
136
+ end
137
+ else ### value = any value
138
+ column_selector.each_with_index do |name, k|
139
+ @column_data[name][index] = value
140
+ end
141
+ end
142
+ }
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,54 @@
1
+ #############################################################
2
+ #
3
+ # PIVOT TABLE
4
+ #
5
+ #############################################################
6
+ class CADataFrame
7
+
8
+ def pivot (name1, name2)
9
+ return CADataFramePivot.new(self, name1, name2)
10
+ end
11
+ end
12
+ class CADataFramePivot
13
+
14
+ def initialize (dataframe, name1, name2)
15
+ @dataframe = dataframe
16
+ case name1
17
+ when Hash
18
+ name1, list = name1.first
19
+ @column1 = @dataframe.col(name1)
20
+ @keys1 = list.to_ca
21
+ else
22
+ @column1 = @dataframe.col(name1)
23
+ @keys1 = @column1.uniq.sort
24
+ end
25
+ case name2
26
+ when Hash
27
+ name2, list = name2.first
28
+ @column2 = @dataframe.col(name2)
29
+ @keys2 = list
30
+ else
31
+ @column2 = @dataframe.col(name2)
32
+ @keys2 = @column2.uniq.sort
33
+ end
34
+ @addrs = {}
35
+ @keys1.each do |k1|
36
+ @keys2.each do |k2|
37
+ @addrs[[k1,k2]] = (@column1.eq(k1) & @column2.eq(k2)).where
38
+ end
39
+ end
40
+ end
41
+
42
+ def table (&block)
43
+ columns = {}
44
+ @keys2.each do |k2|
45
+ columns[k2] = CArray.object(@keys1.size) { UNDEF }
46
+ end
47
+ @keys1.each_with_index do |k1, i|
48
+ @keys2.each do |k2|
49
+ columns[k2][i] = block.call(@dataframe[@addrs[[k1,k2]]])
50
+ end
51
+ end
52
+ return CADataFrame.new(columns, index: @keys1)
53
+ end
54
+ end
@@ -0,0 +1,142 @@
1
+
2
+ class CADataFrame
3
+
4
+ def select_columns (selector = nil)
5
+ case selector
6
+ when nil ### all
7
+ return @column_names
8
+ when Integer
9
+ name = @column_names[selector]
10
+ raise "invalid column index" unless selector
11
+ return [name]
12
+ when String, Symbol ### "AAA"
13
+ if @column_names.include?(selector.to_s)
14
+ return [selector.to_s]
15
+ else
16
+ raise "invalid column specified #{selector}"
17
+ end
18
+ when Array ### ["AAA", "BBB"]
19
+ if selector.size == 1 && selector.first.is_a?(Hash)
20
+ return select_columns(selector.first)
21
+ else
22
+ selector.each do |name|
23
+ unless @column_names.include?(name.to_s)
24
+ raise "invalid column specified #{name}"
25
+ end
26
+ end
27
+ return selector.map(&:to_s)
28
+ end
29
+ when Range ### "AAA".."BBB", 0..1
30
+ case selector.begin
31
+ when nil
32
+ idx1 = 0
33
+ when Integer
34
+ idx1 = selector.begin
35
+ when String, Symbol
36
+ idx1 = @column_names.search(selector.begin.to_s)
37
+ raise "can't find column #{selector.begin}" unless idx1
38
+ else
39
+ raise "invalid column specified #{selector.begin}"
40
+ end
41
+ if selector.exclude_end?
42
+ case selector.end
43
+ when nil
44
+ idx2 = -2
45
+ when Integer
46
+ idx2 = selector.end - 1
47
+ when String
48
+ idx2 = @column_names.search(selector.end.to_s)
49
+ raise "can't find column #{selector.end}" unless idx2
50
+ idx2 = idx2 - 1
51
+ else
52
+ raise "invalid column specified #{selector.end}"
53
+ end
54
+ else
55
+ case selector.end
56
+ when nil
57
+ idx2 = -1
58
+ when Integer
59
+ idx2 = selector.end
60
+ when String
61
+ idx2 = @column_names.search(selector.end.to_s)
62
+ raise "can't find column #{selector.end}" unless idx2
63
+ else
64
+ raise "invalid column specified #{selector.end}"
65
+ end
66
+ end
67
+ return @column_names[idx1..idx2]
68
+ else
69
+ raise "invalid column selector #{selector}"
70
+ end
71
+ end
72
+
73
+ def [] (arg, opt = :__dummy__)
74
+ if opt != :__dummy__
75
+ return loc[arg, opt]
76
+ else
77
+ case arg
78
+ when Range
79
+ if arg.begin.is_a?(Integer)
80
+ return iloc[arg] unless @row_index
81
+ end
82
+ return loc[arg]
83
+ when CArray
84
+ if arg.rank == 1
85
+ return loc[arg]
86
+ else
87
+ raise "index should be 1-dim array"
88
+ end
89
+ when String, Symbol
90
+ return column(arg.to_s)
91
+ else
92
+ column_selector = select_columns(arg)
93
+ new_columns = {}
94
+ column_selector.each do |key|
95
+ new_columns[key] = @column_data[key]
96
+ end
97
+ return CADataFrame.new(new_columns, index: @row_index)
98
+ end
99
+ end
100
+ end
101
+
102
+ def []= (arg, opt = :__dummy__, value)
103
+ if opt != :__dummy__
104
+ loc[arg, opt] = value
105
+ else
106
+ case arg
107
+ when Range
108
+ if arg.begin.is_a?(Integer)
109
+ iloc[arg] = value unless @row_index
110
+ end
111
+ loc[arg] = value
112
+ when CArray
113
+ loc[arg] = value
114
+ when String, Symbol
115
+ if column(arg.to_s)
116
+ column(arg.to_s)[] = value
117
+ else
118
+ arrange {
119
+ append arg, value
120
+ }
121
+ end
122
+ else
123
+ case value
124
+ when CADataFrame
125
+ column_selector = select_columns(arg)
126
+ values = column_selector.each_index.map { |i|
127
+ value.column(i).to_ca
128
+ }
129
+ column_selector.each_with_index do |key, i|
130
+ column(key)[] = values[i]
131
+ end
132
+ else
133
+ column_selector = select_columns(arg)
134
+ column_selector.each do |key|
135
+ column(key)[] = value
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ end
@@ -0,0 +1,102 @@
1
+ class CADataFrame
2
+
3
+ def to_html (threshold = 8, time_format: nil, index: true)
4
+ columns = @column_data.clone
5
+ @column_names.each do |name|
6
+ if columns[name].is_a?(CATimeIndex)
7
+ if time_format
8
+ columns[name] = columns[name].time.time_strftime(time_format)
9
+ else
10
+ columns[name] = columns[name].time.time_format("%F %T%:z")
11
+ end
12
+ end
13
+ end
14
+ if index
15
+ if @row_index
16
+ namelist = [" "] + @column_names
17
+ if @row_index.is_a?(CATimeIndex)
18
+ if time_format
19
+ row_index = @row_index.time.time_strftime(time_format)
20
+ else
21
+ row_index = @row_index.time.time_format("%F %T%:z")
22
+ end
23
+ else
24
+ row_index = @row_index
25
+ end
26
+ tbl = CADFArray.new(namelist, columns.update(" " => row_index))
27
+ else
28
+ namelist = [" "] + @column_names
29
+ tbl = CADFArray.new(namelist, columns.update(" " => CArray.int(@row_number).seq))
30
+ end
31
+ else
32
+ namelist = @column_names
33
+ tbl = CADFArray.new(namelist, columns)
34
+ end
35
+ if threshold.is_a?(Integer) and @row_number > threshold
36
+ list = tbl[0..(threshold/2),nil].to_a
37
+ list.push namelist.map { "..." }
38
+ list.push *(tbl[-threshold/2+1..-1,nil].to_a)
39
+ tbl = list.to_ca
40
+ end
41
+ datastr = tbl.convert {|c| __obj_to_string__(c) }.unmask("")
42
+ datamb = datastr.convert(:boolean, &:"ascii_only?").not.sum(0).ne(0)
43
+ namemb = namelist.to_ca.convert(:boolean) {|c| c.to_s.ascii_only? }.eq(0)
44
+ mb = datamb.or(namemb)
45
+ namelen = namelist.map(&:length).to_ca
46
+ datalen = datastr.convert(&:length)
47
+ if mb.max == 0
48
+ if datalen.size == 0
49
+ lengths = namelen.to_a
50
+ else
51
+ lengths = datalen.max(0).pmax(namelen).to_a
52
+ end
53
+ table_in = "<table>"
54
+ header = "<thead><tr>" +
55
+ [namelist, lengths].transpose.map{|name, len|
56
+ "<th>#{name.to_s.ljust(len)}</th>"
57
+ }.join() + "</tr></thead>"
58
+ body_in = "<tbody>"
59
+ ary = [table_in, header, body_in]
60
+ if datalen.size > 0
61
+ datastr[:i,nil].each_with_index do |blk, i|
62
+ list = blk.flatten.to_a
63
+ ary << "<tr>" + [list, lengths].transpose.map {|value, len|
64
+ "<td>#{value.ljust(len)}</td>"
65
+ }.join() + "</tr>"
66
+ end
67
+ end
68
+ ary << "</tbody>"
69
+ ary << "</table>"
70
+ return "DataFrame: rows#=#{@row_number}: \n" + ary.join("\n")
71
+ else
72
+ namewidth = namelist.to_ca.convert{|c| __strwidth__(c.to_s) }
73
+ if datalen.size == 0
74
+ maxwidth = namewidth
75
+ else
76
+ datawidth = datastr.convert{|c| __strwidth__(c.to_s) }
77
+ maxwidth = datawidth.max(0).pmax(namewidth)
78
+ end
79
+ len = maxwidth[:*,nil] - datawidth + datalen
80
+ table_in = "<table>"
81
+ header = "<thead><tr>" +
82
+ [namelist, maxwidth.to_a].transpose.map{|name, len|
83
+ "<th>#{name.to_s.ljust(len-__strwidth__(name.to_s)+name.to_s.length)}</th>"
84
+ }.join() + "</tr></thead>"
85
+ body_in = "<tbody>"
86
+ ary = [table_in, header, body_in]
87
+ if datalen.size > 0
88
+ datastr[:i,nil].each_with_addr do |blk, i|
89
+ list = blk.flatten.to_a
90
+ ary << "<tr>" + list.map.with_index {|value, j|
91
+
92
+ "<td>#{value.ljust(len[i,j])}</td>"
93
+ }.join() + "</tr>"
94
+ end
95
+ end
96
+ ary << "</tbody>"
97
+ ary << "</table>"
98
+ return "DataFrame: row#=#{@row_number}: \n" + ary.join("\n")
99
+ end
100
+ end
101
+
102
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: carray-dataframe
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hiroki Motoyoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-02 00:00:00.000000000 Z
11
+ date: 2020-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: carray
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.1'
19
+ version: '1.5'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.1'
26
+ version: '1.5'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: axlsx
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -52,7 +52,7 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.1'
55
- description: " Extension for realizing DataFrame of R in Ruby\n"
55
+ description: " DataFrame in Ruby\n"
56
56
  email: ''
57
57
  executables: []
58
58
  extensions: []
@@ -60,28 +60,24 @@ extra_rdoc_files: []
60
60
  files:
61
61
  - API.txt
62
62
  - README.md
63
+ - Rakefile
63
64
  - carray-dataframe.gemspec
64
- - examples/R/fit.rb
65
- - examples/R/iris.rb
66
- - examples/R/japan_area.rb
67
- - examples/R/kyaku.rb
68
- - examples/group_by.rb
69
- - examples/hist.rb
70
- - examples/iris.rb
71
- - examples/map.rb
72
- - examples/match.rb
73
- - examples/test.xlsx
74
- - examples/test1.rb
75
- - examples/test2.rb
76
- - examples/test3.db
77
- - examples/test3.rb
78
- - examples/test3.xlsx
79
- - examples/to_excel.rb
80
- - lib/R.rb
81
- - lib/carray/autoload/autoload_dataframe_dataframe.rb
82
- - lib/carray/dataframe/dataframe.rb
65
+ - lib/carray-dataframe.rb
66
+ - lib/carray-dataframe/arranger.rb
67
+ - lib/carray-dataframe/cadf_array.rb
68
+ - lib/carray-dataframe/converter.rb
69
+ - lib/carray-dataframe/dataframe.rb
70
+ - lib/carray-dataframe/group.rb
71
+ - lib/carray-dataframe/iloc_accessor.rb
72
+ - lib/carray-dataframe/io.rb
73
+ - lib/carray-dataframe/join.rb
74
+ - lib/carray-dataframe/loc_accessor.rb
75
+ - lib/carray-dataframe/pivot.rb
76
+ - lib/carray-dataframe/reference.rb
77
+ - lib/carray-dataframe/to_html.rb
83
78
  homepage: https://github.com/himotoyoshi/carray-dataframe
84
- licenses: []
79
+ licenses:
80
+ - MIT
85
81
  metadata: {}
86
82
  post_install_message:
87
83
  rdoc_options: []
@@ -98,9 +94,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
98
94
  - !ruby/object:Gem::Version
99
95
  version: '0'
100
96
  requirements: []
101
- rubyforge_project:
102
- rubygems_version: 2.6.13
97
+ rubygems_version: 3.1.2
103
98
  signing_key:
104
99
  specification_version: 4
105
- summary: Extension for realizing DataFrame of R in Ruby
100
+ summary: DataFrame in Ruby
106
101
  test_files: []