daru 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -5
- data/CONTRIBUTING.md +2 -11
- data/History.md +18 -0
- data/README.md +109 -11
- data/daru.gemspec +11 -6
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/images/plot0.png +0 -0
- data/lib/daru.rb +5 -2
- data/lib/daru/core/group_by.rb +45 -45
- data/lib/daru/core/merge.rb +59 -1
- data/lib/daru/dataframe.rb +255 -226
- data/lib/daru/exceptions.rb +2 -0
- data/lib/daru/io/io.rb +41 -19
- data/lib/daru/io/sql_data_source.rb +116 -0
- data/lib/daru/vector.rb +124 -104
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +12 -2
- data/spec/core/merge_spec.rb +14 -1
- data/spec/dataframe_spec.rb +189 -158
- data/spec/io/io_spec.rb +80 -2
- data/spec/io/sql_data_source_spec.rb +67 -0
- data/spec/spec_helper.rb +4 -2
- data/spec/support/database_helper.rb +30 -0
- data/spec/vector_spec.rb +45 -46
- metadata +104 -16
- data/.build.sh +0 -14
data/lib/daru/io/io.rb
CHANGED
@@ -120,24 +120,16 @@ module Daru
|
|
120
120
|
writer.close
|
121
121
|
end
|
122
122
|
|
123
|
-
#
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
fields.push(c[:name].to_sym)
|
134
|
-
end
|
135
|
-
ds=Daru::DataFrame.new(vectors,order: fields)
|
136
|
-
sth.fetch do |row|
|
137
|
-
ds.add_row(row.to_a)
|
138
|
-
end
|
139
|
-
ds.update
|
140
|
-
ds
|
123
|
+
# Execute a query and create a data frame from the result
|
124
|
+
#
|
125
|
+
# @param dbh [DBI::DatabaseHandle] A DBI connection to be used to run the query
|
126
|
+
# @param query [String] The query to be executed
|
127
|
+
#
|
128
|
+
# @return A dataframe containing the data resulting from the query
|
129
|
+
|
130
|
+
def from_sql(db, query)
|
131
|
+
require 'daru/io/sql_data_source'
|
132
|
+
SqlDataSource.make_dataframe(db, query)
|
141
133
|
end
|
142
134
|
|
143
135
|
def dataframe_write_sql ds, dbh, table
|
@@ -148,6 +140,36 @@ module Daru
|
|
148
140
|
return true
|
149
141
|
end
|
150
142
|
|
143
|
+
# Load dataframe from AR::Relation
|
144
|
+
#
|
145
|
+
# @param relation [ActiveRecord::Relation] A relation to be used to load the contents of dataframe
|
146
|
+
#
|
147
|
+
# @return A dataframe containing the data in the given relation
|
148
|
+
def from_activerecord(relation, *fields)
|
149
|
+
if fields.empty?
|
150
|
+
records = relation.map do |record|
|
151
|
+
record.attributes.symbolize_keys
|
152
|
+
end
|
153
|
+
return Daru::DataFrame.new(records)
|
154
|
+
else
|
155
|
+
fields = fields.map(&:to_sym)
|
156
|
+
end
|
157
|
+
|
158
|
+
vectors = Hash[*fields.map { |name|
|
159
|
+
[
|
160
|
+
name,
|
161
|
+
Daru::Vector.new([]).tap {|v| v.rename name }
|
162
|
+
]
|
163
|
+
}.flatten]
|
164
|
+
|
165
|
+
Daru::DataFrame.new(vectors, order: fields).tap do |df|
|
166
|
+
relation.pluck(*fields).each do |record|
|
167
|
+
df.add_row(Array(record))
|
168
|
+
end
|
169
|
+
df.update
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
151
173
|
# Loading data from plain text files
|
152
174
|
|
153
175
|
def from_plaintext filename, fields
|
@@ -181,4 +203,4 @@ module Daru
|
|
181
203
|
end
|
182
204
|
end
|
183
205
|
end
|
184
|
-
end
|
206
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
module Daru
|
2
|
+
module IO
|
3
|
+
class SqlDataSource
|
4
|
+
# Private adapter class for DBI::DatabaseHandle
|
5
|
+
# @private
|
6
|
+
class DbiAdapter
|
7
|
+
def initialize(dbh, query)
|
8
|
+
@dbh = dbh
|
9
|
+
@query = query
|
10
|
+
end
|
11
|
+
|
12
|
+
def each_column_name(&block)
|
13
|
+
result.column_names.each do |column_name|
|
14
|
+
block.(column_name.to_sym)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def each_row(&block)
|
19
|
+
result.fetch do |row|
|
20
|
+
block.(row.to_a)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def result
|
27
|
+
@result ||= @dbh.execute(@query)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Private adapter class for connections of ActiveRecord
|
32
|
+
# @private
|
33
|
+
class ActiveRecordConnectionAdapter
|
34
|
+
def initialize(conn, query)
|
35
|
+
@conn = conn
|
36
|
+
@query = query
|
37
|
+
end
|
38
|
+
|
39
|
+
def each_column_name(&block)
|
40
|
+
result.columns.each do |column_name|
|
41
|
+
block.(column_name.to_sym)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def each_row(&block)
|
46
|
+
result.each do |row|
|
47
|
+
block.(row.values)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def result
|
54
|
+
@result ||= @conn.exec_query(@query)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
private_constant :DbiAdapter
|
59
|
+
private_constant :ActiveRecordConnectionAdapter
|
60
|
+
|
61
|
+
def self.make_dataframe(db, query)
|
62
|
+
self.new(db, query).make_dataframe
|
63
|
+
end
|
64
|
+
|
65
|
+
def initialize(db, query)
|
66
|
+
@adapter = init_adapter(db, query)
|
67
|
+
end
|
68
|
+
|
69
|
+
def make_dataframe
|
70
|
+
vectors = {}
|
71
|
+
fields = []
|
72
|
+
@adapter.each_column_name do |column_name|
|
73
|
+
vectors[column_name] = Daru::Vector.new([])
|
74
|
+
vectors[column_name].rename column_name
|
75
|
+
fields.push column_name
|
76
|
+
end
|
77
|
+
|
78
|
+
df = Daru::DataFrame.new(vectors, order: fields)
|
79
|
+
@adapter.each_row do |row|
|
80
|
+
df.add_row(row)
|
81
|
+
end
|
82
|
+
|
83
|
+
df.update
|
84
|
+
|
85
|
+
df
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
def init_adapter(db, query)
|
91
|
+
begin
|
92
|
+
query = query.to_str
|
93
|
+
rescue
|
94
|
+
raise ArgumentError, 'query must be a string'
|
95
|
+
end
|
96
|
+
|
97
|
+
case
|
98
|
+
when check_dbi(db)
|
99
|
+
DbiAdapter.new(db, query)
|
100
|
+
when check_active_record_connection(db)
|
101
|
+
ActiveRecordConnectionAdapter.new(db, query)
|
102
|
+
else
|
103
|
+
raise ArgumentError, 'unknown database type'
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def check_dbi(obj)
|
108
|
+
DBI::DatabaseHandle === obj rescue false
|
109
|
+
end
|
110
|
+
|
111
|
+
def check_active_record_connection(obj)
|
112
|
+
ActiveRecord::ConnectionAdapters::AbstractAdapter === obj rescue false
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
data/lib/daru/vector.rb
CHANGED
@@ -16,7 +16,7 @@ module Daru
|
|
16
16
|
|
17
17
|
def each(&block)
|
18
18
|
return to_enum(:each) unless block_given?
|
19
|
-
|
19
|
+
|
20
20
|
@data.each(&block)
|
21
21
|
self
|
22
22
|
end
|
@@ -61,33 +61,33 @@ module Daru
|
|
61
61
|
attr_accessor :labels
|
62
62
|
|
63
63
|
# Create a Vector object.
|
64
|
-
#
|
64
|
+
#
|
65
65
|
# == Arguments
|
66
|
-
#
|
67
|
-
# @param source[Array,Hash] - Supply elements in the form of an Array or a
|
68
|
-
# Hash. If Array, a numeric index will be created if not supplied in the
|
69
|
-
# options. Specifying more index elements than actual values in *source*
|
70
|
-
# will insert *nil* into the surplus index elements. When a Hash is specified,
|
71
|
-
# the keys of the Hash are taken as the index elements and the corresponding
|
66
|
+
#
|
67
|
+
# @param source[Array,Hash] - Supply elements in the form of an Array or a
|
68
|
+
# Hash. If Array, a numeric index will be created if not supplied in the
|
69
|
+
# options. Specifying more index elements than actual values in *source*
|
70
|
+
# will insert *nil* into the surplus index elements. When a Hash is specified,
|
71
|
+
# the keys of the Hash are taken as the index elements and the corresponding
|
72
72
|
# values as the values that populate the vector.
|
73
|
-
#
|
73
|
+
#
|
74
74
|
# == Options
|
75
|
-
#
|
75
|
+
#
|
76
76
|
# * +:name+ - Name of the vector
|
77
|
-
#
|
77
|
+
#
|
78
78
|
# * +:index+ - Index of the vector
|
79
|
-
#
|
80
|
-
# * +:dtype+ - The underlying data type. Can be :array, :nmatrix or :gsl.
|
79
|
+
#
|
80
|
+
# * +:dtype+ - The underlying data type. Can be :array, :nmatrix or :gsl.
|
81
81
|
# Default :array.
|
82
|
-
#
|
82
|
+
#
|
83
83
|
# * +:nm_dtype+ - For NMatrix, the data type of the numbers. See the NMatrix docs for
|
84
84
|
# further information on supported data type.
|
85
|
-
#
|
85
|
+
#
|
86
86
|
# * +:missing_values+ - An Array of the values that are to be treated as 'missing'.
|
87
87
|
# nil is the default missing value.
|
88
|
-
#
|
88
|
+
#
|
89
89
|
# == Usage
|
90
|
-
#
|
90
|
+
#
|
91
91
|
# vecarr = Daru::Vector.new [1,2,3,4], index: [:a, :e, :i, :o]
|
92
92
|
# vechsh = Daru::Vector.new({a: 1, e: 2, i: 3, o: 4})
|
93
93
|
def initialize source, opts={}
|
@@ -104,7 +104,7 @@ module Daru
|
|
104
104
|
|
105
105
|
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
|
106
106
|
@index = try_create_index(index || @data.size)
|
107
|
-
|
107
|
+
|
108
108
|
if @index.size > @data.size
|
109
109
|
cast(dtype: :array) # NM with nils seg faults
|
110
110
|
(@index.size - @data.size).times { @data << nil }
|
@@ -120,11 +120,11 @@ module Daru
|
|
120
120
|
|
121
121
|
# Create a new vector by specifying the size and an optional value
|
122
122
|
# and block to generate values.
|
123
|
-
#
|
123
|
+
#
|
124
124
|
# == Description
|
125
125
|
#
|
126
126
|
# The *new_with_size* class method lets you create a Daru::Vector
|
127
|
-
# by specifying the size as the argument. The optional block, if
|
127
|
+
# by specifying the size as the argument. The optional block, if
|
128
128
|
# supplied, is run once for populating each element in the Vector.
|
129
129
|
#
|
130
130
|
# The result of each run of the block is the value that is ultimately
|
@@ -149,15 +149,15 @@ module Daru
|
|
149
149
|
# * Range: transformed using to_a
|
150
150
|
# * Daru::Vector
|
151
151
|
# * Numeric and string values
|
152
|
-
#
|
152
|
+
#
|
153
153
|
# == Description
|
154
154
|
#
|
155
155
|
# The `Vector.[]` class method creates a vector from almost any
|
156
156
|
# object that has a `#to_a` method defined on it. It is similar
|
157
157
|
# to R's `c` method.
|
158
|
-
#
|
158
|
+
#
|
159
159
|
# == Usage
|
160
|
-
#
|
160
|
+
#
|
161
161
|
# a = Daru::Vector[1,2,3,4,6..10]
|
162
162
|
# #=>
|
163
163
|
# # <Daru::Vector:99448510 @name = nil @size = 9 >
|
@@ -189,21 +189,21 @@ module Daru
|
|
189
189
|
end
|
190
190
|
|
191
191
|
# Get one or more elements with specified index or a range.
|
192
|
-
#
|
192
|
+
#
|
193
193
|
# == Usage
|
194
194
|
# # For vectors employing single layer Index
|
195
|
-
#
|
195
|
+
#
|
196
196
|
# v[:one, :two] # => Daru::Vector with indexes :one and :two
|
197
197
|
# v[:one] # => Single element
|
198
198
|
# v[:one..:three] # => Daru::Vector with indexes :one, :two and :three
|
199
|
-
#
|
199
|
+
#
|
200
200
|
# # For vectors employing hierarchial multi index
|
201
|
-
#
|
201
|
+
#
|
202
202
|
def [](*indexes)
|
203
203
|
location = indexes[0]
|
204
204
|
if @index.is_a?(MultiIndex)
|
205
205
|
sub_index = @index[indexes]
|
206
|
-
result =
|
206
|
+
result =
|
207
207
|
if sub_index.is_a?(Integer)
|
208
208
|
@data[sub_index]
|
209
209
|
else
|
@@ -220,6 +220,9 @@ module Daru
|
|
220
220
|
|
221
221
|
return result
|
222
222
|
else
|
223
|
+
raise TypeError, "Invalid index type #{location.inspect}.\
|
224
|
+
\nUsage: v[:a, :b] gives elements with keys :a and :b for vector v." if location.is_a? Array
|
225
|
+
|
223
226
|
unless indexes[1]
|
224
227
|
case location
|
225
228
|
when Range
|
@@ -238,20 +241,24 @@ module Daru
|
|
238
241
|
indexes = indexes.map { |e| named_index_for(e) }
|
239
242
|
end
|
240
243
|
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
+
begin
|
245
|
+
Daru::Vector.new(
|
246
|
+
indexes.map { |loc| @data[@index[loc]] },
|
247
|
+
name: @name, index: indexes, dtype: @dtype)
|
248
|
+
rescue NoMethodError
|
249
|
+
raise IndexError, "Specified index #{pos.inspect} does not exist."
|
250
|
+
end
|
244
251
|
end
|
245
252
|
end
|
246
253
|
|
247
254
|
# Just like in Hashes, you can specify the index label of the Daru::Vector
|
248
255
|
# and assign an element an that place in the Daru::Vector.
|
249
|
-
#
|
256
|
+
#
|
250
257
|
# == Usage
|
251
|
-
#
|
258
|
+
#
|
252
259
|
# v = Daru::Vector.new([1,2,3], index: [:a, :b, :c])
|
253
260
|
# v[:a] = 999
|
254
|
-
# #=>
|
261
|
+
# #=>
|
255
262
|
# ##<Daru::Vector:90257920 @name = nil @size = 3 >
|
256
263
|
# # nil
|
257
264
|
# # a 999
|
@@ -260,7 +267,7 @@ module Daru
|
|
260
267
|
def []=(*location, value)
|
261
268
|
cast(dtype: :array) if value.nil? and dtype != :array
|
262
269
|
|
263
|
-
@possibly_changed_type = true if @type == :object and (value.nil? or
|
270
|
+
@possibly_changed_type = true if @type == :object and (value.nil? or
|
264
271
|
value.is_a?(Numeric))
|
265
272
|
@possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and
|
266
273
|
!value.nil?)
|
@@ -271,7 +278,11 @@ module Daru
|
|
271
278
|
if pos.is_a?(Numeric)
|
272
279
|
@data[pos] = value
|
273
280
|
else
|
274
|
-
|
281
|
+
begin
|
282
|
+
pos.each { |tuple| self[tuple] = value }
|
283
|
+
rescue NoMethodError
|
284
|
+
raise IndexError, "Specified index #{pos.inspect} does not exist."
|
285
|
+
end
|
275
286
|
end
|
276
287
|
|
277
288
|
set_size
|
@@ -281,17 +292,17 @@ module Daru
|
|
281
292
|
# The values to be treated as 'missing'. *nil* is the default missing
|
282
293
|
# type. To set missing values see the missing_values= method.
|
283
294
|
def missing_values
|
284
|
-
@missing_values.keys
|
295
|
+
@missing_values.keys
|
285
296
|
end
|
286
297
|
|
287
298
|
# Assign an Array to treat certain values as 'missing'.
|
288
|
-
#
|
299
|
+
#
|
289
300
|
# == Usage
|
290
|
-
#
|
301
|
+
#
|
291
302
|
# v = Daru::Vector.new [1,2,3,4,5]
|
292
303
|
# v.missing_values = [3]
|
293
304
|
# v.update
|
294
|
-
# v.missing_positions
|
305
|
+
# v.missing_positions
|
295
306
|
# #=> [2]
|
296
307
|
def missing_values= values
|
297
308
|
set_missing_values values
|
@@ -370,12 +381,12 @@ module Daru
|
|
370
381
|
if other.is_a?(Daru::Vector)
|
371
382
|
mod.apply_vector_operator operator, self, other
|
372
383
|
else
|
373
|
-
mod.apply_scalar_operator operator, @data,other
|
384
|
+
mod.apply_scalar_operator operator, @data,other
|
374
385
|
end
|
375
386
|
end
|
376
387
|
end
|
377
388
|
alias :gt :mt
|
378
|
-
alias :gteq :mteq
|
389
|
+
alias :gteq :mteq
|
379
390
|
|
380
391
|
# Comparator for checking if any of the elements in *other* exist in self.
|
381
392
|
#
|
@@ -384,7 +395,7 @@ module Daru
|
|
384
395
|
# @example Usage of `in`.
|
385
396
|
# vector = Daru::Vector.new([1,2,3,4,5])
|
386
397
|
# vector.where(vector.in([3,5]))
|
387
|
-
# #=>
|
398
|
+
# #=>
|
388
399
|
# ##<Daru::Vector:82215960 @name = nil @size = 2 >
|
389
400
|
# # nil
|
390
401
|
# # 2 3
|
@@ -400,10 +411,10 @@ module Daru
|
|
400
411
|
end
|
401
412
|
|
402
413
|
# Return a new vector based on the contents of a boolean array. Use with the
|
403
|
-
# comparator methods to obtain meaningful results. See this notebook for
|
414
|
+
# comparator methods to obtain meaningful results. See this notebook for
|
404
415
|
# a good overview of using #where.
|
405
416
|
#
|
406
|
-
# @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The
|
417
|
+
# @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The
|
407
418
|
# collection containing the true of false values. Each element in the Vector
|
408
419
|
# corresponding to a `true` in the bool_arry will be returned alongwith it's
|
409
420
|
# index.
|
@@ -412,7 +423,7 @@ module Daru
|
|
412
423
|
#
|
413
424
|
# # Simple logic statement passed to #where.
|
414
425
|
# vector.where(vector.eq(5).or(vector.eq(1)))
|
415
|
-
# # =>
|
426
|
+
# # =>
|
416
427
|
# ##<Daru::Vector:77626210 @name = nil @size = 7 >
|
417
428
|
# # nil
|
418
429
|
# # 2 5
|
@@ -422,10 +433,10 @@ module Daru
|
|
422
433
|
# # 11 5
|
423
434
|
# # 13 5
|
424
435
|
# # 15 1
|
425
|
-
#
|
436
|
+
#
|
426
437
|
# # A somewhat more complex logic statement
|
427
438
|
# vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
|
428
|
-
# #=>
|
439
|
+
# #=>
|
429
440
|
# ##<Daru::Vector:81072310 @name = nil @size = 7 >
|
430
441
|
# # nil
|
431
442
|
# # 2 5
|
@@ -464,17 +475,17 @@ module Daru
|
|
464
475
|
set_size
|
465
476
|
set_missing_positions unless Daru.lazy_update
|
466
477
|
end
|
467
|
-
alias :push :concat
|
478
|
+
alias :push :concat
|
468
479
|
alias :<< :concat
|
469
480
|
|
470
481
|
# Cast a vector to a new data type.
|
471
|
-
#
|
482
|
+
#
|
472
483
|
# == Options
|
473
|
-
#
|
484
|
+
#
|
474
485
|
# * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
|
475
486
|
def cast opts={}
|
476
487
|
dt = opts[:dtype]
|
477
|
-
raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
|
488
|
+
raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
|
478
489
|
dt == :array or dt == :nmatrix or dt == :gsl
|
479
490
|
|
480
491
|
@data = cast_vector_to dt unless @dtype == dt
|
@@ -482,7 +493,7 @@ module Daru
|
|
482
493
|
|
483
494
|
# Delete an element by value
|
484
495
|
def delete element
|
485
|
-
self.delete_at index_of(element)
|
496
|
+
self.delete_at index_of(element)
|
486
497
|
end
|
487
498
|
|
488
499
|
# Delete element by index
|
@@ -497,9 +508,9 @@ module Daru
|
|
497
508
|
# The type of data contained in the vector. Can be :object or :numeric. If
|
498
509
|
# the underlying dtype is an NMatrix, this method will return the data type
|
499
510
|
# of the NMatrix object.
|
500
|
-
#
|
511
|
+
#
|
501
512
|
# Running through the data to figure out the kind of data is delayed to the
|
502
|
-
# last possible moment.
|
513
|
+
# last possible moment.
|
503
514
|
def type
|
504
515
|
return @data.nm_dtype if dtype == :nmatrix
|
505
516
|
|
@@ -527,8 +538,8 @@ module Daru
|
|
527
538
|
# Keep only unique elements of the vector alongwith their indexes.
|
528
539
|
def uniq
|
529
540
|
uniq_vector = @data.uniq
|
530
|
-
new_index = uniq_vector.inject([]) do |acc, element|
|
531
|
-
acc << index_of(element)
|
541
|
+
new_index = uniq_vector.inject([]) do |acc, element|
|
542
|
+
acc << index_of(element)
|
532
543
|
acc
|
533
544
|
end
|
534
545
|
|
@@ -544,18 +555,18 @@ module Daru
|
|
544
555
|
end
|
545
556
|
|
546
557
|
# Sorts a vector according to its values. If a block is specified, the contents
|
547
|
-
# will be evaluated and data will be swapped whenever the block evaluates
|
558
|
+
# will be evaluated and data will be swapped whenever the block evaluates
|
548
559
|
# to *true*. Defaults to ascending order sorting. Any missing values will be
|
549
560
|
# put at the end of the vector. Preserves indexing. Default sort algorithm is
|
550
561
|
# quick sort.
|
551
|
-
#
|
562
|
+
#
|
552
563
|
# == Options
|
553
|
-
#
|
564
|
+
#
|
554
565
|
# * +:ascending+ - if false, will sort in descending order. Defaults to true.
|
555
|
-
#
|
566
|
+
#
|
556
567
|
# * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
|
557
568
|
# == Usage
|
558
|
-
#
|
569
|
+
#
|
559
570
|
# v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
|
560
571
|
# # Say you want to sort these strings by length.
|
561
572
|
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
|
@@ -565,8 +576,18 @@ module Daru
|
|
565
576
|
type: :quick_sort
|
566
577
|
}.merge(opts)
|
567
578
|
|
568
|
-
block = lambda { |a,b|
|
569
|
-
|
579
|
+
block = lambda { |a,b|
|
580
|
+
return a <=> b if !(a.nil? || b.nil?)
|
581
|
+
|
582
|
+
if a.nil? && b.nil?
|
583
|
+
0
|
584
|
+
elsif a.nil?
|
585
|
+
-1
|
586
|
+
else
|
587
|
+
1
|
588
|
+
end
|
589
|
+
} unless block
|
590
|
+
|
570
591
|
order = opts[:ascending] ? :ascending : :descending
|
571
592
|
vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block)
|
572
593
|
index = Daru::Index.new index
|
@@ -574,7 +595,7 @@ module Daru
|
|
574
595
|
Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
575
596
|
end
|
576
597
|
|
577
|
-
# Just sort the data and get an Array in return using Enumerable#sort.
|
598
|
+
# Just sort the data and get an Array in return using Enumerable#sort.
|
578
599
|
# Non-destructive.
|
579
600
|
def sorted_data &block
|
580
601
|
@data.to_a.sort(&block)
|
@@ -728,12 +749,12 @@ module Daru
|
|
728
749
|
|
729
750
|
# Returns a vector which has *true* in the position where the element in self
|
730
751
|
# is nil, and false otherwise.
|
731
|
-
#
|
752
|
+
#
|
732
753
|
# == Usage
|
733
|
-
#
|
754
|
+
#
|
734
755
|
# v = Daru::Vector.new([1,2,4,nil])
|
735
756
|
# v.is_nil?
|
736
|
-
# # =>
|
757
|
+
# # =>
|
737
758
|
# #<Daru::Vector:89421000 @name = nil @size = 4 >
|
738
759
|
# # nil
|
739
760
|
# # 0 false
|
@@ -761,9 +782,9 @@ module Daru
|
|
761
782
|
|
762
783
|
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
763
784
|
# See #replace_nils for non-destructive version
|
764
|
-
#
|
785
|
+
#
|
765
786
|
# == Arguments
|
766
|
-
#
|
787
|
+
#
|
767
788
|
# * +replacement+ - The value which should replace all nils
|
768
789
|
def replace_nils! replacement
|
769
790
|
missing_positions.each do |idx|
|
@@ -819,9 +840,9 @@ module Daru
|
|
819
840
|
end
|
820
841
|
|
821
842
|
# Convert Vector to a horizontal or vertical Ruby Matrix.
|
822
|
-
#
|
843
|
+
#
|
823
844
|
# == Arguments
|
824
|
-
#
|
845
|
+
#
|
825
846
|
# * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
|
826
847
|
def to_matrix axis=:horizontal
|
827
848
|
if axis == :horizontal
|
@@ -861,28 +882,28 @@ module Daru
|
|
861
882
|
end
|
862
883
|
|
863
884
|
# Convert the hash from to_hash to json
|
864
|
-
def to_json *args
|
885
|
+
def to_json *args
|
865
886
|
self.to_hash.to_json
|
866
887
|
end
|
867
888
|
|
868
889
|
# Convert to html for iruby
|
869
890
|
def to_html threshold=30
|
870
891
|
name = @name || 'nil'
|
871
|
-
html = "<table>" +
|
892
|
+
html = "<table>" +
|
872
893
|
"<tr>" +
|
873
|
-
"<th colspan=\"2\">" +
|
874
|
-
"Daru::Vector:#{self.object_id} " + " size: #{size}" +
|
894
|
+
"<th colspan=\"2\">" +
|
895
|
+
"Daru::Vector:#{self.object_id} " + " size: #{size}" +
|
875
896
|
"</th>" +
|
876
897
|
"</tr>"
|
877
898
|
html += '<tr><th> </th><th>' + name.to_s + '</th></tr>'
|
878
899
|
@index.each_with_index do |index, num|
|
879
900
|
html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
|
880
|
-
|
901
|
+
|
881
902
|
if num > threshold
|
882
903
|
html += '<tr><td>...</td><td>...</td></tr>'
|
883
904
|
|
884
905
|
last_index = @index.to_a.last
|
885
|
-
html += '<tr>' +
|
906
|
+
html += '<tr>' +
|
886
907
|
'<td>' + last_index.to_s + '</td>' +
|
887
908
|
'<td>' + self[last_index].to_s + '</td>' +
|
888
909
|
'</tr>'
|
@@ -935,7 +956,7 @@ module Daru
|
|
935
956
|
# Over rides original inspect for pretty printing in irb
|
936
957
|
def inspect spacing=20, threshold=15
|
937
958
|
longest = [@name.to_s.size,
|
938
|
-
(@index.to_a.map(&:to_s).map(&:size).max || 0),
|
959
|
+
(@index.to_a.map(&:to_s).map(&:size).max || 0),
|
939
960
|
(@data .map(&:to_s).map(&:size).max || 0),
|
940
961
|
'nil'.size].max
|
941
962
|
|
@@ -975,30 +996,30 @@ module Daru
|
|
975
996
|
end
|
976
997
|
|
977
998
|
def index= idx
|
978
|
-
raise ArgumentError,
|
979
|
-
"Size of supplied index #{index.size} does not match size of DataFrame" if
|
999
|
+
raise ArgumentError,
|
1000
|
+
"Size of supplied index #{index.size} does not match size of DataFrame" if
|
980
1001
|
idx.size != self.size
|
981
|
-
raise ArgumentError, "Can only assign type Index and its subclasses." unless
|
1002
|
+
raise ArgumentError, "Can only assign type Index and its subclasses." unless
|
982
1003
|
idx.kind_of?(Daru::Index)
|
983
|
-
|
1004
|
+
|
984
1005
|
@index = idx
|
985
1006
|
self
|
986
1007
|
end
|
987
1008
|
|
988
1009
|
# Give the vector a new name
|
989
|
-
#
|
1010
|
+
#
|
990
1011
|
# @param new_name [Symbol] The new name.
|
991
1012
|
def rename new_name
|
992
1013
|
if new_name.is_a?(Numeric)
|
993
|
-
@name = new_name
|
1014
|
+
@name = new_name
|
994
1015
|
return
|
995
1016
|
end
|
996
|
-
|
1017
|
+
|
997
1018
|
@name = new_name
|
998
1019
|
end
|
999
1020
|
|
1000
1021
|
# Duplicate elements and indexes
|
1001
|
-
def dup
|
1022
|
+
def dup
|
1002
1023
|
Daru::Vector.new @data.dup, name: @name, index: @index.dup
|
1003
1024
|
end
|
1004
1025
|
|
@@ -1082,15 +1103,15 @@ module Daru
|
|
1082
1103
|
end
|
1083
1104
|
|
1084
1105
|
# Creates a new vector consisting only of non-nil data
|
1085
|
-
#
|
1106
|
+
#
|
1086
1107
|
# == Arguments
|
1087
|
-
#
|
1108
|
+
#
|
1088
1109
|
# @as_a [Symbol] Passing :array will return only the elements
|
1089
1110
|
# as an Array. Otherwise will return a Daru::Vector.
|
1090
|
-
#
|
1111
|
+
#
|
1091
1112
|
# @duplicate [Symbol] In case no missing data is found in the
|
1092
1113
|
# vector, setting this to false will return the same vector.
|
1093
|
-
# Otherwise, a duplicate will be returned irrespective of
|
1114
|
+
# Otherwise, a duplicate will be returned irrespective of
|
1094
1115
|
# presence of missing data.
|
1095
1116
|
def only_valid as_a=:vector, duplicate=true
|
1096
1117
|
return self.dup if !has_missing_data? and as_a == :vector and duplicate
|
@@ -1103,7 +1124,7 @@ module Daru
|
|
1103
1124
|
end
|
1104
1125
|
|
1105
1126
|
return new_vector if as_a != :vector
|
1106
|
-
|
1127
|
+
|
1107
1128
|
Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
|
1108
1129
|
end
|
1109
1130
|
|
@@ -1149,9 +1170,9 @@ module Daru
|
|
1149
1170
|
end
|
1150
1171
|
|
1151
1172
|
# Save the vector to a file
|
1152
|
-
#
|
1173
|
+
#
|
1153
1174
|
# == Arguments
|
1154
|
-
#
|
1175
|
+
#
|
1155
1176
|
# * filename - Path of file where the vector is to be saved
|
1156
1177
|
def save filename
|
1157
1178
|
Daru::IO.save self, filename
|
@@ -1159,16 +1180,16 @@ module Daru
|
|
1159
1180
|
|
1160
1181
|
def _dump(depth) # :nodoc:
|
1161
1182
|
Marshal.dump({
|
1162
|
-
data: @data.to_a,
|
1163
|
-
dtype: @dtype,
|
1164
|
-
name: @name,
|
1183
|
+
data: @data.to_a,
|
1184
|
+
dtype: @dtype,
|
1185
|
+
name: @name,
|
1165
1186
|
index: @index,
|
1166
1187
|
missing_values: @missing_values})
|
1167
1188
|
end
|
1168
1189
|
|
1169
1190
|
def self._load(data) # :nodoc:
|
1170
1191
|
h = Marshal.load(data)
|
1171
|
-
Daru::Vector.new(h[:data], index: h[:index],
|
1192
|
+
Daru::Vector.new(h[:data], index: h[:index],
|
1172
1193
|
name: h[:name], dtype: h[:dtype], missing_values: h[:missing_values])
|
1173
1194
|
end
|
1174
1195
|
|
@@ -1261,9 +1282,8 @@ module Daru
|
|
1261
1282
|
end
|
1262
1283
|
|
1263
1284
|
def keep? a, b, order, &block
|
1264
|
-
return false if a.nil? or b.nil?
|
1265
1285
|
eval = block.call(a,b)
|
1266
|
-
if order == :ascending
|
1286
|
+
if order == :ascending
|
1267
1287
|
return true if eval == -1
|
1268
1288
|
return false if eval == 1
|
1269
1289
|
elsif order == :descending
|
@@ -1278,7 +1298,7 @@ module Daru
|
|
1278
1298
|
def cast_vector_to dtype, source=nil, nm_dtype=nil
|
1279
1299
|
source = @data.to_a if source.nil?
|
1280
1300
|
|
1281
|
-
new_vector =
|
1301
|
+
new_vector =
|
1282
1302
|
case dtype
|
1283
1303
|
when :array then Daru::Accessors::ArrayWrapper.new(source, self)
|
1284
1304
|
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
|
@@ -1314,8 +1334,8 @@ module Daru
|
|
1314
1334
|
end
|
1315
1335
|
|
1316
1336
|
def set_name name
|
1317
|
-
@name =
|
1318
|
-
if name.is_a?(Numeric) then name
|
1337
|
+
@name =
|
1338
|
+
if name.is_a?(Numeric) then name
|
1319
1339
|
elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple
|
1320
1340
|
elsif name then name # anything but Numeric or nil
|
1321
1341
|
else
|
@@ -1355,4 +1375,4 @@ module Daru
|
|
1355
1375
|
end
|
1356
1376
|
end
|
1357
1377
|
end
|
1358
|
-
end
|
1378
|
+
end
|