carray-dataframe 1.0.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 7f93c348e3fd8e166ddba89d5ddc1f0fb86653a6
4
- data.tar.gz: cb16a824a9e0c2aaf40db3f26328176ef9eb882e
2
+ SHA256:
3
+ metadata.gz: c0561562238eb969944e6e7977f3cdc4edbe3370187ea1f7ba01a2f6af60bf40
4
+ data.tar.gz: 738a716a1b5055fefb8b182dd26a3a9ac559563072b02df151bf16eab652f245
5
5
  SHA512:
6
- metadata.gz: 8f96c5cf91470a732a09aa1e3629b94963d29d786dbfa9768430c18e0ebe1b2219f7256ddc17c783ce8cac9253c19c4cbce9e16d005435f0f718f97c788b832d
7
- data.tar.gz: b982e8a4b8f162f69bb86fd31d0d92d1d86c7c06b663b9cc091974fe46fcdbf16635e3e793cc91b813fd54cff29e7135416c449ba0089985332dad354d2a18a9
6
+ metadata.gz: 320b6326fb40b111bc601c38d0e2a5a917434dedfd96f9830c4d24dd66df507d7450d30027829f2a832c0673c69bbe20cb7bb5201adad4cc41728505ecbea9d3
7
+ data.tar.gz: 4bd51f6c826a561dbb108aa57988520ca870fc02d8615babaa65644ca4869dd781b48c6015b19ba5235943fa7d793d0789ced918ae3675b9f4ae3aef0eb0aaf7
data/API.txt CHANGED
@@ -24,16 +24,14 @@ df["AAA"] => 1 dimensional CArray
24
24
  df[["AAA"]].to_ca => 2 dimensional CArray with column_names
25
25
  df[["AAA","BBB"]].to_ca => 2 dimensional CArray with column_names
26
26
 
27
-
28
27
  df[dfmask]
29
28
  return new detached CADataFrame masked where dfmask's value equal 1
30
29
 
31
-
32
30
  ### Iterators
33
31
 
34
32
  CADataFrame#each_column_name { |name| ... }
35
33
  CADataFrame#each_column { |name, column| ... }
36
- CADataFrame#each_row(with: [Array|Hash]) { |row| ... }
34
+ CADataFrame#each_row(with: [Array|Hash|CArray]) { |row| ... }
37
35
  CADataFrame#each_row_with_row_index(with: [Array|Hash]) { |row, idx| ... }
38
36
 
39
37
  ### Transformation
@@ -43,7 +41,6 @@ CADataFrame#add_suffix(suffix_string) -> CADataFrame
43
41
 
44
42
  CADataFrame#transpose(column_names: )
45
43
 
46
-
47
44
  ### Conversion
48
45
 
49
46
  CADataFrame#ca -> CADFArray (Reference Array)
@@ -67,9 +64,6 @@ ex) df.columns_to_hash("bbb",["aaa","ccc"])
67
64
 
68
65
  {10=>[4, 100], 20=>[5, 50], 30=>[6, -30], 40=>[7, -50]}
69
66
 
70
-
71
-
72
-
73
67
  CADataFrame
74
68
 
75
69
  #append(name) { INSTANCE_CONTEXT } <- any carray
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  carray-dataframe
2
2
  ================
3
3
 
4
-
5
4
  gem install rsruby
5
+
6
+
7
+
@@ -0,0 +1,11 @@
1
+ GEMSPEC = "carray-dataframe.gemspec"
2
+
3
+ task :install do
4
+ spec = eval File.read(GEMSPEC)
5
+ system %{
6
+ gem build #{GEMSPEC}; gem install #{spec.full_name}.gem
7
+ }
8
+ end
9
+
10
+ require 'rspec/core/rake_task'
11
+ RSpec::Core::RakeTask.new
@@ -1,25 +1,27 @@
1
1
 
2
2
  Gem::Specification::new do |s|
3
- version = "1.0.0"
3
+ version = "1.1.1"
4
4
 
5
5
  files = Dir.glob("**/*") - [
6
- Dir.glob("carray*.gem"),
6
+ Dir.glob("carray-dataframe*.gem"),
7
+ Dir.glob("examples/**/*"),
8
+ Dir.glob("test/**/*"),
7
9
  ].flatten
8
10
 
9
11
  s.platform = Gem::Platform::RUBY
10
12
  s.name = "carray-dataframe"
11
- s.summary = "Extension for realizing DataFrame of R in Ruby"
13
+ s.summary = "DataFrame in Ruby"
12
14
  s.description = <<-HERE
13
- Extension for realizing DataFrame of R in Ruby
15
+ DataFrame in Ruby
14
16
  HERE
15
17
  s.version = version
16
18
  s.author = "Hiroki Motoyoshi"
17
19
  s.email = ""
18
20
  s.homepage = 'https://github.com/himotoyoshi/carray-dataframe'
21
+ s.license = 'MIT'
19
22
  s.files = files
20
- s.has_rdoc = false
21
23
  s.required_ruby_version = ">= 1.8.1"
22
- s.add_runtime_dependency 'carray', '~> 1.1'
24
+ s.add_runtime_dependency 'carray', '~> 1.5'
23
25
  s.add_runtime_dependency 'axlsx', '~> 2.0'
24
26
  s.add_runtime_dependency 'spreadsheet', '~> 1.1'
25
27
  end
@@ -0,0 +1,13 @@
1
+ require "carray-timeindex"
2
+ require "carray-dataframe/dataframe"
3
+ require "carray-dataframe/reference"
4
+ require "carray-dataframe/loc_accessor"
5
+ require "carray-dataframe/iloc_accessor"
6
+ require "carray-dataframe/arranger"
7
+ require "carray-dataframe/cadf_array"
8
+ require "carray-dataframe/group"
9
+ require "carray-dataframe/pivot"
10
+ require "carray-dataframe/join"
11
+ require "carray-dataframe/io"
12
+ require "carray-dataframe/converter"
13
+ require "carray-dataframe/to_html"
@@ -0,0 +1,209 @@
1
+ #############################################################
2
+ #
3
+ # ARRANGER
4
+ #
5
+ #############################################################
6
+ class CADataFrame
7
+ class Arranger
8
+
9
+ def initialize (dataframe)
10
+ @dataframe = dataframe
11
+ end
12
+
13
+ def arrange (&block)
14
+ case block.arity
15
+ when 1
16
+ instance_exec(@dataframe, &block)
17
+ else
18
+ instance_exec(&block)
19
+ end
20
+ return @dataframe
21
+ end
22
+ private
23
+
24
+ def index
25
+ return @dataframe.index
26
+ end
27
+
28
+ def column_names
29
+ return @dataframe.column_names
30
+ end
31
+
32
+ def row_number
33
+ return @dataframe.row_number
34
+ end
35
+
36
+ def row_index
37
+ return @dataframe.row_index
38
+ end
39
+
40
+ def method (hash)
41
+ @dataframe.method(hash)
42
+ end
43
+
44
+ def timeseries (name, format)
45
+ time_column = @dataframe.columns[name.to_s]
46
+ column = CATimeIndex.from_time_array(time_column, "sec", format: format)
47
+ @dataframe.columns[name.to_s] = column
48
+ end
49
+
50
+ def timeindex (name, unit: "second", since: nil, offset: nil, format: nil)
51
+ time_column = @dataframe.columns[name.to_s]
52
+ column = CATimeIndex.from_time_array(time_column, unit, since: since, format: format, offset: offset)
53
+ @dataframe.columns[name.to_s] = column
54
+ end
55
+
56
+ def type (type, name, mask = :novalue)
57
+ @dataframe.columns[name.to_s] = @dataframe.columns[name.to_s].to_type(type)
58
+ if mask != :novalue
59
+ @dataframe.columns[name.to_s].maskout!(options[:maskout])
60
+ end
61
+ end
62
+
63
+ def eliminate (*names)
64
+ if names.empty?
65
+ return self
66
+ end
67
+ names = names.map(&:to_s)
68
+ @dataframe.column_names.clone.each do |name|
69
+ if names.include?(name)
70
+ @dataframe.columns.delete(name)
71
+ @dataframe.column_names.delete(name)
72
+ end
73
+ end
74
+ end
75
+
76
+ alias drop eliminate
77
+
78
+ def template (*args, &block)
79
+ return @dataframe.columns.first[1].template(*args, &block)
80
+ end
81
+
82
+ def double (*names)
83
+ names.flatten.map(&:to_s).each do |name|
84
+ if @dataframe.column_names.include?(name)
85
+ type(:double, name)
86
+ else
87
+ raise "Unknown column name '#{name}'"
88
+ end
89
+ end
90
+ end
91
+
92
+ def int (*names)
93
+ names.flatten.map(&:to_s).each do |name|
94
+ if @dataframe.column_names.include?(name)
95
+ type(:int, name)
96
+ else
97
+ raise "Unknown column name '#{name}'"
98
+ end
99
+ end
100
+ end
101
+
102
+ def maskout (value, *names)
103
+ warn "maskout is obsolete. use mask(name, *values)"
104
+ names.flatten.map(&:to_s).each do |name|
105
+ @dataframe.columns[name].maskout!(value)
106
+ end
107
+ end
108
+
109
+ def mask (name, *values)
110
+ column = @dataframe.columns[name.to_s]
111
+ values.each do |args|
112
+ if args.nil?
113
+ column.maskout!(nil)
114
+ else
115
+ column.maskout!(*args)
116
+ end
117
+ end
118
+ end
119
+
120
+ def unmask (value, *names)
121
+ names.flatten.map(&:to_s).each do |name|
122
+ @dataframe.columns[name].unmask(value)
123
+ end
124
+ end
125
+
126
+ def col (name)
127
+ return @dataframe.col(name)
128
+ end
129
+
130
+ def append (name, new_column)
131
+ if new_column
132
+ # do nothing
133
+ else
134
+ new_column = @dataframe.columns.first[1].template(:object)
135
+ end
136
+ unless new_column.is_a?(CArray)
137
+ new_column = new_column.to_ca
138
+ end
139
+ @dataframe.columns[name.to_s] = new_column
140
+ @dataframe.column_names.push(name.to_s)
141
+ end
142
+
143
+ def prepend (name, new_column)
144
+ if new_column
145
+ # do nothing
146
+ else
147
+ new_column = @dataframe.columns.first[1].template(:object)
148
+ end
149
+ unless new_column.is_a?(CArray)
150
+ new_column = new_column.to_ca
151
+ end
152
+ @dataframe.columns[name.to_s] = new_column
153
+ @dataframe.column_names.unshift(name.to_s)
154
+ end
155
+ alias lead prepend
156
+
157
+ def rename (name1, name2)
158
+ if idx = @dataframe.column_names.index(name1.to_s)
159
+ @dataframe.column_names[idx] = name2.to_s
160
+ column = @dataframe.columns[name1.to_s]
161
+ @dataframe.columns.delete(name1.to_s)
162
+ @dataframe.columns[name2.to_s] = column
163
+ else
164
+ raise "unknown column name #{name1}"
165
+ end
166
+ end
167
+
168
+ def downcase
169
+ @dataframe.downcase
170
+ end
171
+
172
+ def classify (name, scale, opt = {})
173
+ return @dataframe.classify(name, scale, opt)
174
+ end
175
+
176
+ def map (mapper, name_or_column)
177
+ case name_or_column
178
+ when String, Symbol
179
+ name = name_or_column
180
+ column = @dataframe.columns[name.to_s]
181
+ when CArray
182
+ column = name_or_column
183
+ when Array
184
+ column = name_or_column.to_ca
185
+ else
186
+ raise "invalid argument"
187
+ end
188
+ case mapper
189
+ when Hash
190
+ return column.convert(:object) {|v| hash[v] }
191
+ when CArray
192
+ return mapper.project(column)
193
+ when Array
194
+ return mapper.to_ca.project(column)
195
+ end
196
+ end
197
+
198
+ def method_missing (name, *args)
199
+ if args.size == 0
200
+ if @dataframe.column_names.include?(name.to_s)
201
+ return @dataframe.columns[name.to_s]
202
+ elsif @dataframe.__methods__.include?(name.to_s)
203
+ return @dataframe.columns[@dataframe.__methods__[name.to_s]]
204
+ end
205
+ end
206
+ super
207
+ end
208
+ end
209
+ end
@@ -0,0 +1,106 @@
1
+ #############################################################
2
+ #
3
+ # CADFArray
4
+ #
5
+ #############################################################
6
+ class CADFArray < CAObject # :nodoc:
7
+
8
+ def initialize (column_names, column_data, index: nil)
9
+ @column_names = column_names
10
+ @column_data = column_data
11
+ if index
12
+ @index = index
13
+ else
14
+ @index = CArray.int(column_data.first[1].size).seq
15
+ end
16
+ dim = [@column_data[@column_names.first].size, @column_names.size]
17
+ extend CArray::TableMethods
18
+ super(:object, dim, :read_only=>true)
19
+ __create_mask__
20
+ end
21
+
22
+ attr_reader :column_names, :index
23
+
24
+ def fetch_index (idx)
25
+ r, c = *idx
26
+ name = @column_names[c]
27
+ return @column_data[name].value[r]
28
+ end
29
+
30
+ def store_index (idx, value)
31
+ r, c = *idx
32
+ name = @column_names[c]
33
+ return @column_data[name][r] = value
34
+ end
35
+
36
+ def copy_data (data)
37
+ @column_names.each_with_index do |name, i|
38
+ data[nil,i] = @column_data[name].value
39
+ end
40
+ end
41
+
42
+ def sync_data (data)
43
+ @column_names.each_with_index do |name, i|
44
+ @column_data[name].value[] = data[nil,i]
45
+ end
46
+ end
47
+
48
+ def fill_data (value)
49
+ @column_names.each do |name|
50
+ @column_data[name] = value
51
+ end
52
+ end
53
+
54
+ def create_mask
55
+ @column_names.each do |name|
56
+ @column_data[name].instance_eval{ __create_mask__ }
57
+ end
58
+ end
59
+
60
+ def mask_fetch_index (idx)
61
+ r, c = *idx
62
+ name = @column_names[c]
63
+ if @column_data[name].has_mask?
64
+ return @column_data[name].mask[r]
65
+ else
66
+ return 0
67
+ end
68
+ end
69
+
70
+ def mask_store_index (idx, value)
71
+ r, c = *idx
72
+ name = @column_names[c]
73
+ if @column_data[name].has_mask?
74
+ return @column_data[name].mask[r] = value
75
+ else
76
+ @column_data[name].mask[r] = value
77
+ end
78
+ end
79
+
80
+ def mask_copy_data (data)
81
+ @column_names.each_with_index do |name, i|
82
+ if @column_data[name].has_mask?
83
+ data[nil,i] = @column_data[name].mask
84
+ end
85
+ end
86
+ end
87
+
88
+ def mask_sync_data (data)
89
+ @column_names.each_with_index do |name, i|
90
+ @column_data[name].mask[] = data[nil,i]
91
+ end
92
+ end
93
+
94
+ def mask_fill_data (value)
95
+ @column_names.each do |name|
96
+ @column_data[name].mask[] = value
97
+ end
98
+ end
99
+
100
+ def to_ca
101
+ obj = super
102
+ obj.extend CArray::TableMethods
103
+ obj.column_names = @column_names
104
+ return obj
105
+ end
106
+ end
@@ -0,0 +1,97 @@
1
+
2
+ module CArray::TableMethods
3
+
4
+ def to_dataframe (index: nil, &block)
5
+ if self.size == 0
6
+ return nil
7
+ end
8
+ df = CADataFrame.new(self, index: index, &block)
9
+ if @header or @note
10
+ df.instance_variable_set(:@header, @header)
11
+ df.instance_variable_set(:@note, @note)
12
+ class << df
13
+ attr_reader :note
14
+
15
+ def header (name=nil)
16
+ if name
17
+ return @header[name.to_s]
18
+ else
19
+ return @column_names
20
+ end
21
+ end
22
+ end
23
+ end
24
+ return df
25
+ end
26
+ alias to_df to_dataframe
27
+ end
28
+
29
+ class CADataFrame
30
+
31
+ def to_a (with_index: true)
32
+ if @row_index and with_index
33
+ namelist = [""] + @column_names
34
+ tbl = CADFArray.new(namelist, @column_data.clone.update("" => index))
35
+ else
36
+ tbl = ca.to_ca
37
+ end
38
+ return tbl.to_a
39
+ end
40
+
41
+ def to_csv (io = "", rs: $/, sep: ",", fill: "", with_index: true, time_format: nil, &block)
42
+ if @row_index and with_index
43
+ namelist = ["index"] + @column_names
44
+ columns = @column_data.clone.update("index" => index)
45
+ else
46
+ namelist = @column_names
47
+ columns = @column_data.clone
48
+ end
49
+ columns.each do |k, v|
50
+ if v.is_a?(CATimeIndex)
51
+ if time_format
52
+ columns[k] = v.time.time_format(time_format)
53
+ else
54
+ columns[k] = v.time.convert(:object){|t| t.to_s}
55
+ end
56
+ end
57
+ end
58
+ tbl = CADFArray.new(namelist, columns)
59
+ return tbl.to_csv(io, **{rs: rs, sep: sep, fill: fill}, &block)
60
+ end
61
+
62
+ def to_daru
63
+ require "daru"
64
+ columns = {}
65
+ each_column_name do |name|
66
+ columns[name] = column(name).object.unmask(nil).to_a
67
+ end
68
+ if @row_index
69
+ return Daru::DataFrame.new(columns, index: @row_index.to_a, order: @column_names)
70
+ else
71
+ return Daru::DataFrame.new(columns, order: @column_names)
72
+ end
73
+ end
74
+
75
+ def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: false, &block)
76
+ require "axlsx"
77
+ xl = Axlsx::Package.new
78
+ xl.use_shared_strings = true
79
+ sheet = xl.workbook.add_worksheet(name: sheet_name)
80
+ df = self.to_df.objectify.unmask("=NA()")
81
+ if with_row_index
82
+ sheet.add_row([""] + column_names)
83
+ df.each_row_with_row_index(with: Array) do |list, i|
84
+ sheet.add_row([i] + list)
85
+ end
86
+ else
87
+ sheet.add_row(column_names)
88
+ df.each_row(with: Array) do |list|
89
+ sheet.add_row(list)
90
+ end
91
+ end
92
+ if block_given?
93
+ yield sheet
94
+ end
95
+ xl.serialize(filename)
96
+ end
97
+ end