carray-dataframe 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 7f93c348e3fd8e166ddba89d5ddc1f0fb86653a6
4
- data.tar.gz: cb16a824a9e0c2aaf40db3f26328176ef9eb882e
2
+ SHA256:
3
+ metadata.gz: c0561562238eb969944e6e7977f3cdc4edbe3370187ea1f7ba01a2f6af60bf40
4
+ data.tar.gz: 738a716a1b5055fefb8b182dd26a3a9ac559563072b02df151bf16eab652f245
5
5
  SHA512:
6
- metadata.gz: 8f96c5cf91470a732a09aa1e3629b94963d29d786dbfa9768430c18e0ebe1b2219f7256ddc17c783ce8cac9253c19c4cbce9e16d005435f0f718f97c788b832d
7
- data.tar.gz: b982e8a4b8f162f69bb86fd31d0d92d1d86c7c06b663b9cc091974fe46fcdbf16635e3e793cc91b813fd54cff29e7135416c449ba0089985332dad354d2a18a9
6
+ metadata.gz: 320b6326fb40b111bc601c38d0e2a5a917434dedfd96f9830c4d24dd66df507d7450d30027829f2a832c0673c69bbe20cb7bb5201adad4cc41728505ecbea9d3
7
+ data.tar.gz: 4bd51f6c826a561dbb108aa57988520ca870fc02d8615babaa65644ca4869dd781b48c6015b19ba5235943fa7d793d0789ced918ae3675b9f4ae3aef0eb0aaf7
data/API.txt CHANGED
@@ -24,16 +24,14 @@ df["AAA"] => 1 dimensional CArray
24
24
  df[["AAA"]].to_ca => 2 dimensional CArray with column_names
25
25
  df[["AAA","BBB"]].to_ca => 2 dimensional CArray with column_names
26
26
 
27
-
28
27
  df[dfmask]
29
28
  return new detached CADataFrame masked where dfmask's value equal 1
30
29
 
31
-
32
30
  ### Iterators
33
31
 
34
32
  CADataFrame#each_column_name { |name| ... }
35
33
  CADataFrame#each_column { |name, column| ... }
36
- CADataFrame#each_row(with: [Array|Hash]) { |row| ... }
34
+ CADataFrame#each_row(with: [Array|Hash|CArray]) { |row| ... }
37
35
  CADataFrame#each_row_with_row_index(with: [Array|Hash]) { |row, idx| ... }
38
36
 
39
37
  ### Transformation
@@ -43,7 +41,6 @@ CADataFrame#add_suffix(suffix_string) -> CADataFrame
43
41
 
44
42
  CADataFrame#transpose(column_names: )
45
43
 
46
-
47
44
  ### Conversion
48
45
 
49
46
  CADataFrame#ca -> CADFArray (Reference Array)
@@ -67,9 +64,6 @@ ex) df.columns_to_hash("bbb",["aaa","ccc"])
67
64
 
68
65
  {10=>[4, 100], 20=>[5, 50], 30=>[6, -30], 40=>[7, -50]}
69
66
 
70
-
71
-
72
-
73
67
  CADataFrame
74
68
 
75
69
  #append(name) { INSTANCE_CONTEXT } <- any carray
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  carray-dataframe
2
2
  ================
3
3
 
4
-
5
4
  gem install rsruby
5
+
6
+
7
+
@@ -0,0 +1,11 @@
1
+ GEMSPEC = "carray-dataframe.gemspec"
2
+
3
+ task :install do
4
+ spec = eval File.read(GEMSPEC)
5
+ system %{
6
+ gem build #{GEMSPEC}; gem install #{spec.full_name}.gem
7
+ }
8
+ end
9
+
10
+ require 'rspec/core/rake_task'
11
+ RSpec::Core::RakeTask.new
@@ -1,25 +1,27 @@
1
1
 
2
2
  Gem::Specification::new do |s|
3
- version = "1.0.0"
3
+ version = "1.1.1"
4
4
 
5
5
  files = Dir.glob("**/*") - [
6
- Dir.glob("carray*.gem"),
6
+ Dir.glob("carray-dataframe*.gem"),
7
+ Dir.glob("examples/**/*"),
8
+ Dir.glob("test/**/*"),
7
9
  ].flatten
8
10
 
9
11
  s.platform = Gem::Platform::RUBY
10
12
  s.name = "carray-dataframe"
11
- s.summary = "Extension for realizing DataFrame of R in Ruby"
13
+ s.summary = "DataFrame in Ruby"
12
14
  s.description = <<-HERE
13
- Extension for realizing DataFrame of R in Ruby
15
+ DataFrame in Ruby
14
16
  HERE
15
17
  s.version = version
16
18
  s.author = "Hiroki Motoyoshi"
17
19
  s.email = ""
18
20
  s.homepage = 'https://github.com/himotoyoshi/carray-dataframe'
21
+ s.license = 'MIT'
19
22
  s.files = files
20
- s.has_rdoc = false
21
23
  s.required_ruby_version = ">= 1.8.1"
22
- s.add_runtime_dependency 'carray', '~> 1.1'
24
+ s.add_runtime_dependency 'carray', '~> 1.5'
23
25
  s.add_runtime_dependency 'axlsx', '~> 2.0'
24
26
  s.add_runtime_dependency 'spreadsheet', '~> 1.1'
25
27
  end
@@ -0,0 +1,13 @@
1
+ require "carray-timeindex"
2
+ require "carray-dataframe/dataframe"
3
+ require "carray-dataframe/reference"
4
+ require "carray-dataframe/loc_accessor"
5
+ require "carray-dataframe/iloc_accessor"
6
+ require "carray-dataframe/arranger"
7
+ require "carray-dataframe/cadf_array"
8
+ require "carray-dataframe/group"
9
+ require "carray-dataframe/pivot"
10
+ require "carray-dataframe/join"
11
+ require "carray-dataframe/io"
12
+ require "carray-dataframe/converter"
13
+ require "carray-dataframe/to_html"
@@ -0,0 +1,209 @@
1
+ #############################################################
2
+ #
3
+ # ARRANGER
4
+ #
5
+ #############################################################
6
+ class CADataFrame
7
+ class Arranger
8
+
9
+ def initialize (dataframe)
10
+ @dataframe = dataframe
11
+ end
12
+
13
+ def arrange (&block)
14
+ case block.arity
15
+ when 1
16
+ instance_exec(@dataframe, &block)
17
+ else
18
+ instance_exec(&block)
19
+ end
20
+ return @dataframe
21
+ end
22
+ private
23
+
24
+ def index
25
+ return @dataframe.index
26
+ end
27
+
28
+ def column_names
29
+ return @dataframe.column_names
30
+ end
31
+
32
+ def row_number
33
+ return @dataframe.row_number
34
+ end
35
+
36
+ def row_index
37
+ return @dataframe.row_index
38
+ end
39
+
40
+ def method (hash)
41
+ @dataframe.method(hash)
42
+ end
43
+
44
+ def timeseries (name, format)
45
+ time_column = @dataframe.columns[name.to_s]
46
+ column = CATimeIndex.from_time_array(time_column, "sec", format: format)
47
+ @dataframe.columns[name.to_s] = column
48
+ end
49
+
50
+ def timeindex (name, unit: "second", since: nil, offset: nil, format: nil)
51
+ time_column = @dataframe.columns[name.to_s]
52
+ column = CATimeIndex.from_time_array(time_column, unit, since: since, format: format, offset: offset)
53
+ @dataframe.columns[name.to_s] = column
54
+ end
55
+
56
+ def type (type, name, mask = :novalue)
57
+ @dataframe.columns[name.to_s] = @dataframe.columns[name.to_s].to_type(type)
58
+ if mask != :novalue
59
+ @dataframe.columns[name.to_s].maskout!(options[:maskout])
60
+ end
61
+ end
62
+
63
+ def eliminate (*names)
64
+ if names.empty?
65
+ return self
66
+ end
67
+ names = names.map(&:to_s)
68
+ @dataframe.column_names.clone.each do |name|
69
+ if names.include?(name)
70
+ @dataframe.columns.delete(name)
71
+ @dataframe.column_names.delete(name)
72
+ end
73
+ end
74
+ end
75
+
76
+ alias drop eliminate
77
+
78
+ def template (*args, &block)
79
+ return @dataframe.columns.first[1].template(*args, &block)
80
+ end
81
+
82
+ def double (*names)
83
+ names.flatten.map(&:to_s).each do |name|
84
+ if @dataframe.column_names.include?(name)
85
+ type(:double, name)
86
+ else
87
+ raise "Unknown column name '#{name}'"
88
+ end
89
+ end
90
+ end
91
+
92
+ def int (*names)
93
+ names.flatten.map(&:to_s).each do |name|
94
+ if @dataframe.column_names.include?(name)
95
+ type(:int, name)
96
+ else
97
+ raise "Unknown column name '#{name}'"
98
+ end
99
+ end
100
+ end
101
+
102
+ def maskout (value, *names)
103
+ warn "maskout is obsolete. use mask(name, *values)"
104
+ names.flatten.map(&:to_s).each do |name|
105
+ @dataframe.columns[name].maskout!(value)
106
+ end
107
+ end
108
+
109
+ def mask (name, *values)
110
+ column = @dataframe.columns[name.to_s]
111
+ values.each do |args|
112
+ if args.nil?
113
+ column.maskout!(nil)
114
+ else
115
+ column.maskout!(*args)
116
+ end
117
+ end
118
+ end
119
+
120
+ def unmask (value, *names)
121
+ names.flatten.map(&:to_s).each do |name|
122
+ @dataframe.columns[name].unmask(value)
123
+ end
124
+ end
125
+
126
+ def col (name)
127
+ return @dataframe.col(name)
128
+ end
129
+
130
+ def append (name, new_column)
131
+ if new_column
132
+ # do nothing
133
+ else
134
+ new_column = @dataframe.columns.first[1].template(:object)
135
+ end
136
+ unless new_column.is_a?(CArray)
137
+ new_column = new_column.to_ca
138
+ end
139
+ @dataframe.columns[name.to_s] = new_column
140
+ @dataframe.column_names.push(name.to_s)
141
+ end
142
+
143
+ def prepend (name, new_column)
144
+ if new_column
145
+ # do nothing
146
+ else
147
+ new_column = @dataframe.columns.first[1].template(:object)
148
+ end
149
+ unless new_column.is_a?(CArray)
150
+ new_column = new_column.to_ca
151
+ end
152
+ @dataframe.columns[name.to_s] = new_column
153
+ @dataframe.column_names.unshift(name.to_s)
154
+ end
155
+ alias lead prepend
156
+
157
+ def rename (name1, name2)
158
+ if idx = @dataframe.column_names.index(name1.to_s)
159
+ @dataframe.column_names[idx] = name2.to_s
160
+ column = @dataframe.columns[name1.to_s]
161
+ @dataframe.columns.delete(name1.to_s)
162
+ @dataframe.columns[name2.to_s] = column
163
+ else
164
+ raise "unknown column name #{name1}"
165
+ end
166
+ end
167
+
168
+ def downcase
169
+ @dataframe.downcase
170
+ end
171
+
172
+ def classify (name, scale, opt = {})
173
+ return @dataframe.classify(name, scale, opt)
174
+ end
175
+
176
+ def map (mapper, name_or_column)
177
+ case name_or_column
178
+ when String, Symbol
179
+ name = name_or_column
180
+ column = @dataframe.columns[name.to_s]
181
+ when CArray
182
+ column = name_or_column
183
+ when Array
184
+ column = name_or_column.to_ca
185
+ else
186
+ raise "invalid argument"
187
+ end
188
+ case mapper
189
+ when Hash
190
+ return column.convert(:object) {|v| hash[v] }
191
+ when CArray
192
+ return mapper.project(column)
193
+ when Array
194
+ return mapper.to_ca.project(column)
195
+ end
196
+ end
197
+
198
+ def method_missing (name, *args)
199
+ if args.size == 0
200
+ if @dataframe.column_names.include?(name.to_s)
201
+ return @dataframe.columns[name.to_s]
202
+ elsif @dataframe.__methods__.include?(name.to_s)
203
+ return @dataframe.columns[@dataframe.__methods__[name.to_s]]
204
+ end
205
+ end
206
+ super
207
+ end
208
+ end
209
+ end
@@ -0,0 +1,106 @@
1
+ #############################################################
2
+ #
3
+ # CADFArray
4
+ #
5
+ #############################################################
6
+ class CADFArray < CAObject # :nodoc:
7
+
8
+ def initialize (column_names, column_data, index: nil)
9
+ @column_names = column_names
10
+ @column_data = column_data
11
+ if index
12
+ @index = index
13
+ else
14
+ @index = CArray.int(column_data.first[1].size).seq
15
+ end
16
+ dim = [@column_data[@column_names.first].size, @column_names.size]
17
+ extend CArray::TableMethods
18
+ super(:object, dim, :read_only=>true)
19
+ __create_mask__
20
+ end
21
+
22
+ attr_reader :column_names, :index
23
+
24
+ def fetch_index (idx)
25
+ r, c = *idx
26
+ name = @column_names[c]
27
+ return @column_data[name].value[r]
28
+ end
29
+
30
+ def store_index (idx, value)
31
+ r, c = *idx
32
+ name = @column_names[c]
33
+ return @column_data[name][r] = value
34
+ end
35
+
36
+ def copy_data (data)
37
+ @column_names.each_with_index do |name, i|
38
+ data[nil,i] = @column_data[name].value
39
+ end
40
+ end
41
+
42
+ def sync_data (data)
43
+ @column_names.each_with_index do |name, i|
44
+ @column_data[name].value[] = data[nil,i]
45
+ end
46
+ end
47
+
48
+ def fill_data (value)
49
+ @column_names.each do |name|
50
+ @column_data[name] = value
51
+ end
52
+ end
53
+
54
+ def create_mask
55
+ @column_names.each do |name|
56
+ @column_data[name].instance_eval{ __create_mask__ }
57
+ end
58
+ end
59
+
60
+ def mask_fetch_index (idx)
61
+ r, c = *idx
62
+ name = @column_names[c]
63
+ if @column_data[name].has_mask?
64
+ return @column_data[name].mask[r]
65
+ else
66
+ return 0
67
+ end
68
+ end
69
+
70
+ def mask_store_index (idx, value)
71
+ r, c = *idx
72
+ name = @column_names[c]
73
+ if @column_data[name].has_mask?
74
+ return @column_data[name].mask[r] = value
75
+ else
76
+ @column_data[name].mask[r] = value
77
+ end
78
+ end
79
+
80
+ def mask_copy_data (data)
81
+ @column_names.each_with_index do |name, i|
82
+ if @column_data[name].has_mask?
83
+ data[nil,i] = @column_data[name].mask
84
+ end
85
+ end
86
+ end
87
+
88
+ def mask_sync_data (data)
89
+ @column_names.each_with_index do |name, i|
90
+ @column_data[name].mask[] = data[nil,i]
91
+ end
92
+ end
93
+
94
+ def mask_fill_data (value)
95
+ @column_names.each do |name|
96
+ @column_data[name].mask[] = value
97
+ end
98
+ end
99
+
100
+ def to_ca
101
+ obj = super
102
+ obj.extend CArray::TableMethods
103
+ obj.column_names = @column_names
104
+ return obj
105
+ end
106
+ end
@@ -0,0 +1,97 @@
1
+
2
+ module CArray::TableMethods
3
+
4
+ def to_dataframe (index: nil, &block)
5
+ if self.size == 0
6
+ return nil
7
+ end
8
+ df = CADataFrame.new(self, index: index, &block)
9
+ if @header or @note
10
+ df.instance_variable_set(:@header, @header)
11
+ df.instance_variable_set(:@note, @note)
12
+ class << df
13
+ attr_reader :note
14
+
15
+ def header (name=nil)
16
+ if name
17
+ return @header[name.to_s]
18
+ else
19
+ return @column_names
20
+ end
21
+ end
22
+ end
23
+ end
24
+ return df
25
+ end
26
+ alias to_df to_dataframe
27
+ end
28
+
29
+ class CADataFrame
30
+
31
+ def to_a (with_index: true)
32
+ if @row_index and with_index
33
+ namelist = [""] + @column_names
34
+ tbl = CADFArray.new(namelist, @column_data.clone.update("" => index))
35
+ else
36
+ tbl = ca.to_ca
37
+ end
38
+ return tbl.to_a
39
+ end
40
+
41
+ def to_csv (io = "", rs: $/, sep: ",", fill: "", with_index: true, time_format: nil, &block)
42
+ if @row_index and with_index
43
+ namelist = ["index"] + @column_names
44
+ columns = @column_data.clone.update("index" => index)
45
+ else
46
+ namelist = @column_names
47
+ columns = @column_data.clone
48
+ end
49
+ columns.each do |k, v|
50
+ if v.is_a?(CATimeIndex)
51
+ if time_format
52
+ columns[k] = v.time.time_format(time_format)
53
+ else
54
+ columns[k] = v.time.convert(:object){|t| t.to_s}
55
+ end
56
+ end
57
+ end
58
+ tbl = CADFArray.new(namelist, columns)
59
+ return tbl.to_csv(io, **{rs: rs, sep: sep, fill: fill}, &block)
60
+ end
61
+
62
+ def to_daru
63
+ require "daru"
64
+ columns = {}
65
+ each_column_name do |name|
66
+ columns[name] = column(name).object.unmask(nil).to_a
67
+ end
68
+ if @row_index
69
+ return Daru::DataFrame.new(columns, index: @row_index.to_a, order: @column_names)
70
+ else
71
+ return Daru::DataFrame.new(columns, order: @column_names)
72
+ end
73
+ end
74
+
75
+ def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: false, &block)
76
+ require "axlsx"
77
+ xl = Axlsx::Package.new
78
+ xl.use_shared_strings = true
79
+ sheet = xl.workbook.add_worksheet(name: sheet_name)
80
+ df = self.to_df.objectify.unmask("=NA()")
81
+ if with_row_index
82
+ sheet.add_row([""] + column_names)
83
+ df.each_row_with_row_index(with: Array) do |list, i|
84
+ sheet.add_row([i] + list)
85
+ end
86
+ else
87
+ sheet.add_row(column_names)
88
+ df.each_row(with: Array) do |list|
89
+ sheet.add_row(list)
90
+ end
91
+ end
92
+ if block_given?
93
+ yield sheet
94
+ end
95
+ xl.serialize(filename)
96
+ end
97
+ end