daru_lite 0.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- data/.github/workflows/ci.yml +20 -0
- data/.rubocop_todo.yml +35 -33
- data/README.md +19 -115
- data/daru_lite.gemspec +1 -0
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +142 -2355
- data/lib/daru_lite/index/index.rb +13 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +250 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3243
- data/spec/index/index_spec.rb +8 -0
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +102 -3
- data/.github/ISSUE_TEMPLATE.md +0 -18
@@ -0,0 +1,144 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class DataFrame
|
3
|
+
module Filterable
|
4
|
+
# Return unique rows by vector specified or all vectors
|
5
|
+
#
|
6
|
+
# @param vtrs [String][Symbol] vector names(s) that should be considered
|
7
|
+
#
|
8
|
+
# @example
|
9
|
+
#
|
10
|
+
# => #<DaruLite::DataFrame(6x2)>
|
11
|
+
# a b
|
12
|
+
# 0 1 a
|
13
|
+
# 1 2 b
|
14
|
+
# 2 3 c
|
15
|
+
# 3 4 d
|
16
|
+
# 2 3 c
|
17
|
+
# 3 4 f
|
18
|
+
#
|
19
|
+
# 2.3.3 :> df.uniq
|
20
|
+
# => #<DaruLite::DataFrame(5x2)>
|
21
|
+
# a b
|
22
|
+
# 0 1 a
|
23
|
+
# 1 2 b
|
24
|
+
# 2 3 c
|
25
|
+
# 3 4 d
|
26
|
+
# 3 4 f
|
27
|
+
#
|
28
|
+
# 2.3.3 :> df.uniq(:a)
|
29
|
+
# => #<DaruLite::DataFrame(5x2)>
|
30
|
+
# a b
|
31
|
+
# 0 1 a
|
32
|
+
# 1 2 b
|
33
|
+
# 2 3 c
|
34
|
+
# 3 4 d
|
35
|
+
#
|
36
|
+
def uniq(*vtrs)
|
37
|
+
vecs = vtrs.empty? ? vectors.to_a : Array(vtrs)
|
38
|
+
grouped = group_by(vecs)
|
39
|
+
indexes = grouped.groups.values.map { |v| v[0] }.sort
|
40
|
+
row[*indexes]
|
41
|
+
end
|
42
|
+
|
43
|
+
# Retain vectors or rows if the block returns a truthy value.
|
44
|
+
#
|
45
|
+
# == Description
|
46
|
+
#
|
47
|
+
# For filtering out certain rows/vectors based on their values,
|
48
|
+
# use the #filter method. By default it iterates over vectors and
|
49
|
+
# keeps those vectors for which the block returns true. It accepts
|
50
|
+
# an optional axis argument which lets you specify whether you want
|
51
|
+
# to iterate over vectors or rows.
|
52
|
+
#
|
53
|
+
# == Arguments
|
54
|
+
#
|
55
|
+
# * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
|
56
|
+
# Default to :vector.
|
57
|
+
#
|
58
|
+
# == Usage
|
59
|
+
#
|
60
|
+
# # Filter vectors
|
61
|
+
#
|
62
|
+
# df.filter do |vector|
|
63
|
+
# vector.type == :numeric and vector.median < 50
|
64
|
+
# end
|
65
|
+
#
|
66
|
+
# # Filter rows
|
67
|
+
#
|
68
|
+
# df.filter(:row) do |row|
|
69
|
+
# row[:a] + row[:d] < 100
|
70
|
+
# end
|
71
|
+
def filter(axis = :vector, &block)
|
72
|
+
dispatch_to_axis_pl axis, :filter, &block
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns a dataframe in which rows with any of the mentioned values
|
76
|
+
# are ignored.
|
77
|
+
# @param [Array] values to reject to form the new dataframe
|
78
|
+
# @return [DaruLite::DataFrame] Data Frame with only rows which doesn't
|
79
|
+
# contain the mentioned values
|
80
|
+
# @example
|
81
|
+
# df = DaruLite::DataFrame.new({
|
82
|
+
# a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
|
83
|
+
# b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
|
84
|
+
# c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
|
85
|
+
# }, index: 11..18)
|
86
|
+
# df.reject_values nil, Float::NAN
|
87
|
+
# # => #<DaruLite::DataFrame(2x3)>
|
88
|
+
# # a b c
|
89
|
+
# # 11 1 a a
|
90
|
+
# # 18 7 8 7
|
91
|
+
def reject_values(*values)
|
92
|
+
positions =
|
93
|
+
size.times.to_a - @data.flat_map { |vec| vec.positions(*values) }
|
94
|
+
# Handle the case when positions size is 1 and #row_at wouldn't return a df
|
95
|
+
if positions.size == 1
|
96
|
+
pos = positions.first
|
97
|
+
row_at(pos..pos)
|
98
|
+
else
|
99
|
+
row_at(*positions)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def keep_row_if
|
104
|
+
@index.size.times
|
105
|
+
.reject { |position| yield(row_at(position)) }
|
106
|
+
.reverse_each { |position| delete_at_position(position) }
|
107
|
+
end
|
108
|
+
|
109
|
+
def keep_vector_if
|
110
|
+
@vectors.each do |vector|
|
111
|
+
delete_vector(vector) unless yield(@data[@vectors[vector]], vector)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# creates a new vector with the data of a given field which the block returns true
|
116
|
+
def filter_vector(vec, &block)
|
117
|
+
DaruLite::Vector.new(each_row.select(&block).map { |row| row[vec] })
|
118
|
+
end
|
119
|
+
|
120
|
+
# Iterates over each row and retains it in a new DataFrame if the block returns
|
121
|
+
# true for that row.
|
122
|
+
def filter_rows
|
123
|
+
return to_enum(:filter_rows) unless block_given?
|
124
|
+
|
125
|
+
keep_rows = @index.map { |index| yield access_row(index) }
|
126
|
+
|
127
|
+
where keep_rows
|
128
|
+
end
|
129
|
+
|
130
|
+
# Iterates over each vector and retains it in a new DataFrame if the block returns
|
131
|
+
# true for that vector.
|
132
|
+
def filter_vectors(&block)
|
133
|
+
return to_enum(:filter_vectors) unless block
|
134
|
+
|
135
|
+
dup.tap { |df| df.keep_vector_if(&block) }
|
136
|
+
end
|
137
|
+
|
138
|
+
# Query a DataFrame by passing a DaruLite::Core::Query::BoolArray object.
|
139
|
+
def where(bool_array)
|
140
|
+
DaruLite::Core::Query.df_where self, bool_array
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,179 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class DataFrame
|
3
|
+
module IOAble
|
4
|
+
module ClassMethods
|
5
|
+
# Load data from a CSV file. Specify an optional block to grab the CSV
|
6
|
+
# object and pre-condition it (for example use the `convert` or
|
7
|
+
# `header_convert` methods).
|
8
|
+
#
|
9
|
+
# == Arguments
|
10
|
+
#
|
11
|
+
# * path - Local path / Remote URL of the file to load specified as a String.
|
12
|
+
#
|
13
|
+
# == Options
|
14
|
+
#
|
15
|
+
# Accepts the same options as the DaruLite::DataFrame constructor and CSV.open()
|
16
|
+
# and uses those to eventually construct the resulting DataFrame.
|
17
|
+
#
|
18
|
+
# == Verbose Description
|
19
|
+
#
|
20
|
+
# You can specify all the options to the `.from_csv` function that you
|
21
|
+
# do to the Ruby `CSV.read()` function, since this is what is used internally.
|
22
|
+
#
|
23
|
+
# For example, if the columns in your CSV file are separated by something
|
24
|
+
# other that commas, you can use the `:col_sep` option. If you want to
|
25
|
+
# convert numeric values to numbers and not keep them as strings, you can
|
26
|
+
# use the `:converters` option and set it to `:numeric`.
|
27
|
+
#
|
28
|
+
# The `.from_csv` function uses the following defaults for reading CSV files
|
29
|
+
# (that are passed into the `CSV.read()` function):
|
30
|
+
#
|
31
|
+
# {
|
32
|
+
# :col_sep => ',',
|
33
|
+
# :converters => :numeric
|
34
|
+
# }
|
35
|
+
def from_csv(path, opts = {}, &block)
|
36
|
+
DaruLite::IO.from_csv path, opts, &block
|
37
|
+
end
|
38
|
+
|
39
|
+
# Read data from an Excel file into a DataFrame.
|
40
|
+
#
|
41
|
+
# == Arguments
|
42
|
+
#
|
43
|
+
# * path - Path of the file to be read.
|
44
|
+
#
|
45
|
+
# == Options
|
46
|
+
#
|
47
|
+
# *:worksheet_id - ID of the worksheet that is to be read.
|
48
|
+
def from_excel(path, opts = {}, &block)
|
49
|
+
DaruLite::IO.from_excel path, opts, &block
|
50
|
+
end
|
51
|
+
|
52
|
+
# Read a database query and returns a Dataset
|
53
|
+
#
|
54
|
+
# @param dbh [DBI::DatabaseHandle, String] A DBI connection OR Path to a SQlite3 database.
|
55
|
+
# @param query [String] The query to be executed
|
56
|
+
#
|
57
|
+
# @return A dataframe containing the data resulting from the query
|
58
|
+
#
|
59
|
+
# USE:
|
60
|
+
#
|
61
|
+
# dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
|
62
|
+
# DaruLite::DataFrame.from_sql(dbh, "SELECT * FROM test")
|
63
|
+
#
|
64
|
+
# #Alternatively
|
65
|
+
#
|
66
|
+
# require 'dbi'
|
67
|
+
# DaruLite::DataFrame.from_sql("path/to/sqlite.db", "SELECT * FROM test")
|
68
|
+
def from_sql(dbh, query)
|
69
|
+
DaruLite::IO.from_sql dbh, query
|
70
|
+
end
|
71
|
+
|
72
|
+
# Read a dataframe from AR::Relation
|
73
|
+
#
|
74
|
+
# @param relation [ActiveRecord::Relation] An AR::Relation object from which data is loaded
|
75
|
+
# @param fields [Array] Field names to be loaded (optional)
|
76
|
+
#
|
77
|
+
# @return A dataframe containing the data loaded from the relation
|
78
|
+
#
|
79
|
+
# USE:
|
80
|
+
#
|
81
|
+
# # When Post model is defined as:
|
82
|
+
# class Post < ActiveRecord::Base
|
83
|
+
# scope :active, -> { where.not(published_at: nil) }
|
84
|
+
# end
|
85
|
+
#
|
86
|
+
# # You can load active posts into a dataframe by:
|
87
|
+
# DaruLite::DataFrame.from_activerecord(Post.active, :title, :published_at)
|
88
|
+
def from_activerecord(relation, *fields)
|
89
|
+
DaruLite::IO.from_activerecord relation, *fields
|
90
|
+
end
|
91
|
+
|
92
|
+
# Read the database from a plaintext file. For this method to work,
|
93
|
+
# the data should be present in a plain text file in columns. See
|
94
|
+
# spec/fixtures/bank2.dat for an example.
|
95
|
+
#
|
96
|
+
# == Arguments
|
97
|
+
#
|
98
|
+
# * path - Path of the file to be read.
|
99
|
+
# * fields - Vector names of the resulting database.
|
100
|
+
#
|
101
|
+
# == Usage
|
102
|
+
#
|
103
|
+
# df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6]
|
104
|
+
def from_plaintext(path, fields)
|
105
|
+
DaruLite::IO.from_plaintext path, fields
|
106
|
+
end
|
107
|
+
|
108
|
+
def _load(data)
|
109
|
+
h = Marshal.load data
|
110
|
+
DaruLite::DataFrame.new(
|
111
|
+
h[:data],
|
112
|
+
index: h[:index],
|
113
|
+
order: h[:order],
|
114
|
+
name: h[:name]
|
115
|
+
)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.included(base)
|
120
|
+
base.extend ClassMethods
|
121
|
+
end
|
122
|
+
|
123
|
+
# Write this DataFrame to a CSV file.
|
124
|
+
#
|
125
|
+
# == Arguments
|
126
|
+
#
|
127
|
+
# * filename - Path of CSV file where the DataFrame is to be saved.
|
128
|
+
#
|
129
|
+
# == Options
|
130
|
+
#
|
131
|
+
# * convert_comma - If set to *true*, will convert any commas in any
|
132
|
+
# of the data to full stops ('.').
|
133
|
+
# All the options accepted by CSV.read() can also be passed into this
|
134
|
+
# function.
|
135
|
+
def write_csv(filename, opts = {})
|
136
|
+
DaruLite::IO.dataframe_write_csv self, filename, opts
|
137
|
+
end
|
138
|
+
|
139
|
+
# Write this dataframe to an Excel Spreadsheet
|
140
|
+
#
|
141
|
+
# == Arguments
|
142
|
+
#
|
143
|
+
# * filename - The path of the file where the DataFrame should be written.
|
144
|
+
def write_excel(filename, opts = {})
|
145
|
+
DaruLite::IO.dataframe_write_excel self, filename, opts
|
146
|
+
end
|
147
|
+
|
148
|
+
# Insert each case of the Dataset on the selected table
|
149
|
+
#
|
150
|
+
# == Arguments
|
151
|
+
#
|
152
|
+
# * dbh - DBI database connection object.
|
153
|
+
# * query - Query string.
|
154
|
+
#
|
155
|
+
# == Usage
|
156
|
+
#
|
157
|
+
# ds = DaruLite::DataFrame.new({:id=>DaruLite::Vector.new([1,2,3]), :name=>DaruLite::Vector.new(["a","b","c"])})
|
158
|
+
# dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
|
159
|
+
# ds.write_sql(dbh,"test")
|
160
|
+
def write_sql(dbh, table)
|
161
|
+
DaruLite::IO.dataframe_write_sql self, dbh, table
|
162
|
+
end
|
163
|
+
|
164
|
+
# Use marshalling to save dataframe to a file.
|
165
|
+
def save(filename)
|
166
|
+
DaruLite::IO.save self, filename
|
167
|
+
end
|
168
|
+
|
169
|
+
def _dump(_depth)
|
170
|
+
Marshal.dump(
|
171
|
+
data: @data,
|
172
|
+
index: @index.to_a,
|
173
|
+
order: @vectors.to_a,
|
174
|
+
name: @name
|
175
|
+
)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class DataFrame
|
3
|
+
module Indexable
|
4
|
+
module SetSingleIndexStrategy
|
5
|
+
def self.uniq_size(df, col)
|
6
|
+
df[col].uniq.size
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.new_index(df, col)
|
10
|
+
DaruLite::Index.new(df[col].to_a)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.delete_vector(df, col)
|
14
|
+
df.delete_vector(col)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module SetCategoricalIndexStrategy
|
19
|
+
def self.new_index(df, col)
|
20
|
+
DaruLite::CategoricalIndex.new(df[col].to_a)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.delete_vector(df, col)
|
24
|
+
df.delete_vector(col)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
module SetMultiIndexStrategy
|
29
|
+
def self.uniq_size(df, cols)
|
30
|
+
df[*cols].uniq.size
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.new_index(df, cols)
|
34
|
+
DaruLite::MultiIndex.from_arrays(df[*cols].map_vectors(&:to_a)).tap do |mi|
|
35
|
+
mi.name = cols
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.delete_vector(df, cols)
|
40
|
+
df.delete_vectors(*cols)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Set a particular column as the new DF
|
45
|
+
def set_index(new_index_col, keep: false, categorical: false)
|
46
|
+
if categorical
|
47
|
+
strategy = SetCategoricalIndexStrategy
|
48
|
+
elsif new_index_col.respond_to?(:to_a)
|
49
|
+
strategy = SetMultiIndexStrategy
|
50
|
+
new_index_col = new_index_col.to_a
|
51
|
+
else
|
52
|
+
strategy = SetSingleIndexStrategy
|
53
|
+
end
|
54
|
+
|
55
|
+
unless categorical
|
56
|
+
uniq_size = strategy.uniq_size(self, new_index_col)
|
57
|
+
raise ArgumentError, 'All elements in new index must be unique.' if @size != uniq_size
|
58
|
+
end
|
59
|
+
|
60
|
+
self.index = strategy.new_index(self, new_index_col)
|
61
|
+
strategy.delete_vector(self, new_index_col) unless keep
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
# Change the index of the DataFrame and preserve the labels of the previous
|
66
|
+
# indexing. New index can be DaruLite::Index or any of its subclasses.
|
67
|
+
#
|
68
|
+
# @param [DaruLite::Index] new_index The new Index for reindexing the DataFrame.
|
69
|
+
# @example Reindexing DataFrame
|
70
|
+
# df = DaruLite::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]},
|
71
|
+
# index: ['a','b','c','d'])
|
72
|
+
# #=>
|
73
|
+
# ##<DaruLite::DataFrame:83278130 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
|
74
|
+
# # a b
|
75
|
+
# # a 1 11
|
76
|
+
# # b 2 22
|
77
|
+
# # c 3 33
|
78
|
+
# # d 4 44
|
79
|
+
# df.reindex DaruLite::Index.new(['b', 0, 'a', 'g'])
|
80
|
+
# #=>
|
81
|
+
# ##<DaruLite::DataFrame:83177070 @name = b19277b8-c548-41da-ad9a-2ad8c060e273 @size = 4>
|
82
|
+
# # a b
|
83
|
+
# # b 2 22
|
84
|
+
# # 0 nil nil
|
85
|
+
# # a 1 11
|
86
|
+
# # g nil nil
|
87
|
+
def reindex(new_index)
|
88
|
+
unless new_index.is_a?(DaruLite::Index)
|
89
|
+
raise ArgumentError, 'Must pass the new index of type Index or its ' \
|
90
|
+
"subclasses, not #{new_index.class}"
|
91
|
+
end
|
92
|
+
|
93
|
+
cl = DaruLite::DataFrame.new({}, order: @vectors, index: new_index, name: @name)
|
94
|
+
new_index.each_with_object(cl) do |idx, memo|
|
95
|
+
memo.row[idx] = @index.include?(idx) ? row[idx] : Array.new(ncols)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def reset_index
|
100
|
+
index_df = index.to_df
|
101
|
+
names = index.name
|
102
|
+
names = [names] unless names.instance_of?(Array)
|
103
|
+
new_vectors = names + vectors.to_a
|
104
|
+
self.index = index_df.index
|
105
|
+
names.each do |name|
|
106
|
+
self[name] = index_df[name]
|
107
|
+
end
|
108
|
+
self.order = new_vectors
|
109
|
+
self
|
110
|
+
end
|
111
|
+
|
112
|
+
# Reassign index with a new index of type DaruLite::Index or any of its subclasses.
|
113
|
+
#
|
114
|
+
# @param [DaruLite::Index] idx New index object on which the rows of the dataframe
|
115
|
+
# are to be indexed.
|
116
|
+
# @example Reassigining index of a DataFrame
|
117
|
+
# df = DaruLite::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]})
|
118
|
+
# df.index.to_a #=> [0,1,2,3]
|
119
|
+
#
|
120
|
+
# df.index = DaruLite::Index.new(['a','b','c','d'])
|
121
|
+
# df.index.to_a #=> ['a','b','c','d']
|
122
|
+
# df.row['a'].to_a #=> [1,11]
|
123
|
+
def index=(idx)
|
124
|
+
@index = Index.coerce idx
|
125
|
+
@data.each { |vec| vec.index = @index }
|
126
|
+
|
127
|
+
self
|
128
|
+
end
|
129
|
+
|
130
|
+
def reindex_vectors(new_vectors)
|
131
|
+
unless new_vectors.is_a?(DaruLite::Index)
|
132
|
+
raise ArgumentError, 'Must pass the new index of type Index or its ' \
|
133
|
+
"subclasses, not #{new_vectors.class}"
|
134
|
+
end
|
135
|
+
|
136
|
+
cl = DaruLite::DataFrame.new({}, order: new_vectors, index: @index, name: @name)
|
137
|
+
new_vectors.each_with_object(cl) do |vec, memo|
|
138
|
+
memo[vec] = @vectors.include?(vec) ? self[vec] : Array.new(nrows)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Reassign vectors with a new index of type DaruLite::Index or any of its subclasses.
|
143
|
+
#
|
144
|
+
# @param new_index [DaruLite::Index] idx The new index object on which the vectors are to
|
145
|
+
# be indexed. Must of the same size as ncols.
|
146
|
+
# @example Reassigning vectors of a DataFrame
|
147
|
+
# df = DaruLite::DataFrame.new({a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44]})
|
148
|
+
# df.vectors.to_a #=> [:a, :b, :c]
|
149
|
+
#
|
150
|
+
# df.vectors = DaruLite::Index.new([:foo, :bar, :baz])
|
151
|
+
# df.vectors.to_a #=> [:foo, :bar, :baz]
|
152
|
+
def vectors=(new_index)
|
153
|
+
raise ArgumentError, 'Can only reindex with Index and its subclasses' unless new_index.is_a?(DaruLite::Index)
|
154
|
+
|
155
|
+
if new_index.size != ncols
|
156
|
+
raise ArgumentError, "Specified index length #{new_index.size} not equal to" \
|
157
|
+
"dataframe size #{ncols}"
|
158
|
+
end
|
159
|
+
|
160
|
+
@vectors = new_index
|
161
|
+
@data.zip(new_index.to_a).each do |vect, name|
|
162
|
+
vect.name = name
|
163
|
+
end
|
164
|
+
self
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|