daru_lite 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -0,0 +1,301 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Fetchable
4
+ # Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
5
+ # Defaults to *:vector*. Use of this method is not recommended for accessing
6
+ # rows. Use df.row[:a] for accessing row with index ':a'.
7
+ def [](*names)
8
+ axis = extract_axis(names, :vector)
9
+ dispatch_to_axis axis, :access, *names
10
+ end
11
+
12
+ # Retrive rows by positions
13
+ # @param [Array<Integer>] positions of rows to retrive
14
+ # @return [DaruLite::Vector, DaruLite::DataFrame] vector for single position and dataframe for multiple positions
15
+ # @example
16
+ # df = DaruLite::DataFrame.new({
17
+ # a: [1, 2, 3],
18
+ # b: ['a', 'b', 'c']
19
+ # })
20
+ # df.row_at 1, 2
21
+ # # => #<DaruLite::DataFrame(2x2)>
22
+ # # a b
23
+ # # 1 2 b
24
+ # # 2 3 c
25
+ def row_at(*positions)
26
+ original_positions = positions
27
+ positions = coerce_positions(*positions, nrows)
28
+ validate_positions(*positions, nrows)
29
+
30
+ if positions.is_a? Integer
31
+ row = get_rows_for([positions])
32
+ DaruLite::Vector.new(row, index: @vectors, name: @index.at(positions))
33
+ else
34
+ new_rows = get_rows_for(original_positions)
35
+ DaruLite::DataFrame.new(
36
+ new_rows,
37
+ index: @index.at(*original_positions),
38
+ order: @vectors,
39
+ name: @name
40
+ )
41
+ end
42
+ end
43
+
44
+ # Retrive vectors by positions
45
+ # @param [Array<Integer>] positions of vectors to retrive
46
+ # @return [DaruLite::Vector, DaruLite::DataFrame] vector for single position and dataframe for multiple positions
47
+ # @example
48
+ # df = DaruLite::DataFrame.new({
49
+ # a: [1, 2, 3],
50
+ # b: ['a', 'b', 'c']
51
+ # })
52
+ # df.at 0
53
+ # # => #<DaruLite::Vector(3)>
54
+ # # a
55
+ # # 0 1
56
+ # # 1 2
57
+ # # 2 3
58
+ def at(*positions)
59
+ if AXES.include? positions.last
60
+ axis = positions.pop
61
+ return row_at(*positions) if axis == :row
62
+ end
63
+
64
+ original_positions = positions
65
+ positions = coerce_positions(*positions, ncols)
66
+ validate_positions(*positions, ncols)
67
+
68
+ if positions.is_a? Integer
69
+ @data[positions].dup
70
+ else
71
+ DaruLite::DataFrame.new positions.map { |pos| @data[pos].dup },
72
+ index: @index,
73
+ order: @vectors.at(*original_positions),
74
+ name: @name
75
+ end
76
+ end
77
+
78
+ # The first ten elements of the DataFrame
79
+ #
80
+ # @param [Fixnum] quantity (10) The number of elements to display from the top.
81
+ def head(quantity = 10)
82
+ row.at 0..(quantity - 1)
83
+ end
84
+ alias first head
85
+
86
+ # The last ten elements of the DataFrame
87
+ #
88
+ # @param [Fixnum] quantity (10) The number of elements to display from the bottom.
89
+ def tail(quantity = 10)
90
+ start = [-quantity, -size].max
91
+ row.at start..-1
92
+ end
93
+ alias last tail
94
+
95
+ # Extract a dataframe given row indexes or positions
96
+ # @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
97
+ # @return [DaruLite::Dataframe]
98
+ def get_sub_dataframe(keys, by_position: true)
99
+ return DaruLite::DataFrame.new({}) if keys == []
100
+
101
+ keys = @index.pos(*keys) unless by_position
102
+
103
+ sub_df = row_at(*keys)
104
+ sub_df = sub_df.to_df.transpose if sub_df.is_a?(DaruLite::Vector)
105
+
106
+ sub_df
107
+ end
108
+
109
+ def get_vector_anyways(v)
110
+ @vectors.include?(v) ? self[v].to_a : Array.new(size)
111
+ end
112
+
113
+ # @param indexes [Array] index(s) at which row tuples are retrieved
114
+ # @return [Array] returns array of row tuples at given index(s)
115
+ # @example Using DaruLite::Index
116
+ # df = DaruLite::DataFrame.new({
117
+ # a: [1, 2, 3],
118
+ # b: ['a', 'a', 'b']
119
+ # })
120
+ #
121
+ # df.access_row_tuples_by_indexs(1,2)
122
+ # # => [[2, "a"], [3, "b"]]
123
+ #
124
+ # df.index = DaruLite::Index.new([:one,:two,:three])
125
+ # df.access_row_tuples_by_indexs(:one,:three)
126
+ # # => [[1, "a"], [3, "b"]]
127
+ #
128
+ # @example Using DaruLite::MultiIndex
129
+ # mi_idx = DaruLite::MultiIndex.from_tuples [
130
+ # [:a,:one,:bar],
131
+ # [:a,:one,:baz],
132
+ # [:b,:two,:bar],
133
+ # [:a,:two,:baz],
134
+ # ]
135
+ # df_mi = DaruLite::DataFrame.new({
136
+ # a: 1..4,
137
+ # b: 'a'..'d'
138
+ # }, index: mi_idx )
139
+ #
140
+ # df_mi.access_row_tuples_by_indexs(:b, :two, :bar)
141
+ # # => [[3, "c"]]
142
+ # df_mi.access_row_tuples_by_indexs(:a)
143
+ # # => [[1, "a"], [2, "b"], [4, "d"]]
144
+ def access_row_tuples_by_indexs(*indexes)
145
+ return get_sub_dataframe(indexes, by_position: false).map_rows(&:to_a) if
146
+ @index.is_a?(DaruLite::MultiIndex)
147
+
148
+ positions = @index.pos(*indexes)
149
+ if positions.is_a? Numeric
150
+ row = get_rows_for([positions])
151
+ row.first.is_a?(Array) ? row : [row]
152
+ else
153
+ new_rows = get_rows_for(indexes, by_position: false)
154
+ indexes.map { |index| new_rows.map { |r| r[index] } }
155
+ end
156
+ end
157
+
158
+ # Split the dataframe into many dataframes based on category vector
159
+ # @param [object] cat_name name of category vector to split the dataframe
160
+ # @return [Array] array of dataframes split by category with category vector
161
+ # used to split not included
162
+ # @example
163
+ # df = DaruLite::DataFrame.new({
164
+ # a: [1, 2, 3],
165
+ # b: ['a', 'a', 'b']
166
+ # })
167
+ # df.to_category :b
168
+ # df.split_by_category :b
169
+ # # => [#<DaruLite::DataFrame: a (2x1)>
170
+ # # a
171
+ # # 0 1
172
+ # # 1 2,
173
+ # # #<DaruLite::DataFrame: b (1x1)>
174
+ # # a
175
+ # # 2 3]
176
+ def split_by_category(cat_name)
177
+ cat_dv = self[cat_name]
178
+ raise ArgumentError, "#{cat_name} is not a category vector" unless
179
+ cat_dv.category?
180
+
181
+ cat_dv.categories.map do |cat|
182
+ where(cat_dv.eq cat)
183
+ .rename(cat)
184
+ .delete_vector cat_name
185
+ end
186
+ end
187
+
188
+ # Return the indexes of all the numeric vectors. Will include vectors with nils
189
+ # alongwith numbers.
190
+ def numeric_vectors
191
+ # FIXME: Why _with_index ?..
192
+ each_vector_with_index
193
+ .select { |vec, _i| vec.numeric? }
194
+ .map(&:last)
195
+ end
196
+
197
+ def numeric_vector_names
198
+ @vectors.select { |v| self[v].numeric? }
199
+ end
200
+
201
+ # Return a DataFrame of only the numerical Vectors. If clone: false
202
+ # is specified as option, only a *view* of the Vectors will be
203
+ # returned. Defaults to clone: true.
204
+ def only_numerics(opts = {})
205
+ cln = opts[:clone] != false
206
+ arry = numeric_vectors.map { |v| self[v] }
207
+
208
+ order = Index.new(numeric_vectors)
209
+ DaruLite::DataFrame.new(arry, clone: cln, order: order, index: @index)
210
+ end
211
+
212
+ private
213
+
214
+ def access_vector(*names)
215
+ if names.first.is_a?(Range)
216
+ dup(@vectors.subset(names.first))
217
+ elsif @vectors.is_a?(MultiIndex)
218
+ access_vector_multi_index(*names)
219
+ else
220
+ access_vector_single_index(*names)
221
+ end
222
+ end
223
+
224
+ def access_vector_multi_index(*names)
225
+ pos = @vectors[names]
226
+
227
+ return @data[pos] if pos.is_a?(Integer)
228
+
229
+ new_vectors = pos.map { |tuple| @data[@vectors[tuple]] }
230
+
231
+ pos = pos.drop_left_level(names.size) if names.size < @vectors.width
232
+
233
+ DaruLite::DataFrame.new(new_vectors, index: @index, order: pos)
234
+ end
235
+
236
+ def access_vector_single_index(*names)
237
+ if names.count < 2
238
+ begin
239
+ pos = @vectors.is_a?(DaruLite::DateTimeIndex) ? @vectors[names.first] : @vectors.pos(names.first)
240
+ rescue IndexError
241
+ raise IndexError, "Specified vector #{names.first} does not exist"
242
+ end
243
+ return @data[pos] if pos.is_a?(Numeric)
244
+
245
+ names = pos
246
+ end
247
+
248
+ new_vectors = names.map { |name| [name, @data[@vectors.pos(name)]] }.to_h
249
+
250
+ order = names.is_a?(Array) ? DaruLite::Index.new(names) : names
251
+ DaruLite::DataFrame.new(new_vectors, order: order, index: @index, name: @name)
252
+ end
253
+
254
+ def access_row(*indexes)
255
+ positions = @index.pos(*indexes)
256
+
257
+ if positions.is_a? Numeric
258
+ row = get_rows_for([positions])
259
+ DaruLite::Vector.new row, index: @vectors, name: indexes.first
260
+ else
261
+ new_rows = get_rows_for(indexes, by_position: false)
262
+ DaruLite::DataFrame.new new_rows, index: @index.subset(*indexes), order: @vectors
263
+ end
264
+ end
265
+
266
+ # @param keys [Array] can be an array of positions (if by_position is true) or indexes (if by_position if false)
267
+ # because of coercion by DaruLite::Vector#at and DaruLite::Vector#[], can return either an Array of
268
+ # values (representing a row) or an array of Vectors (that can be seen as rows)
269
+ def get_rows_for(keys, by_position: true)
270
+ raise unless keys.is_a?(Array)
271
+
272
+ if by_position
273
+ pos = keys
274
+ @data.map { |vector| vector.at(*pos) }
275
+ else
276
+ # TODO: for now (2018-07-27), it is different than using
277
+ # get_rows_for(@index.pos(*keys))
278
+ # because DaruLite::Vector#at and DaruLite::Vector#[] don't handle DaruLite::MultiIndex the same way
279
+ indexes = keys
280
+ @data.map { |vec| vec[*indexes] }
281
+ end
282
+ end
283
+
284
+ # coerce ranges, integers and array in appropriate ways
285
+ def coerce_positions(*positions, size)
286
+ if positions.size == 1
287
+ case positions.first
288
+ when Integer
289
+ positions.first
290
+ when Range
291
+ size.times.to_a[positions.first]
292
+ else
293
+ raise ArgumentError, 'Unknown position type.'
294
+ end
295
+ else
296
+ positions
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
@@ -0,0 +1,144 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Filterable
4
+ # Return unique rows by vector specified or all vectors
5
+ #
6
+ # @param vtrs [String][Symbol] vector names(s) that should be considered
7
+ #
8
+ # @example
9
+ #
10
+ # => #<DaruLite::DataFrame(6x2)>
11
+ # a b
12
+ # 0 1 a
13
+ # 1 2 b
14
+ # 2 3 c
15
+ # 3 4 d
16
+ # 2 3 c
17
+ # 3 4 f
18
+ #
19
+ # 2.3.3 :> df.uniq
20
+ # => #<DaruLite::DataFrame(5x2)>
21
+ # a b
22
+ # 0 1 a
23
+ # 1 2 b
24
+ # 2 3 c
25
+ # 3 4 d
26
+ # 3 4 f
27
+ #
28
+ # 2.3.3 :> df.uniq(:a)
29
+ # => #<DaruLite::DataFrame(5x2)>
30
+ # a b
31
+ # 0 1 a
32
+ # 1 2 b
33
+ # 2 3 c
34
+ # 3 4 d
35
+ #
36
+ def uniq(*vtrs)
37
+ vecs = vtrs.empty? ? vectors.to_a : Array(vtrs)
38
+ grouped = group_by(vecs)
39
+ indexes = grouped.groups.values.map { |v| v[0] }.sort
40
+ row[*indexes]
41
+ end
42
+
43
+ # Retain vectors or rows if the block returns a truthy value.
44
+ #
45
+ # == Description
46
+ #
47
+ # For filtering out certain rows/vectors based on their values,
48
+ # use the #filter method. By default it iterates over vectors and
49
+ # keeps those vectors for which the block returns true. It accepts
50
+ # an optional axis argument which lets you specify whether you want
51
+ # to iterate over vectors or rows.
52
+ #
53
+ # == Arguments
54
+ #
55
+ # * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
56
+ # Default to :vector.
57
+ #
58
+ # == Usage
59
+ #
60
+ # # Filter vectors
61
+ #
62
+ # df.filter do |vector|
63
+ # vector.type == :numeric and vector.median < 50
64
+ # end
65
+ #
66
+ # # Filter rows
67
+ #
68
+ # df.filter(:row) do |row|
69
+ # row[:a] + row[:d] < 100
70
+ # end
71
+ def filter(axis = :vector, &block)
72
+ dispatch_to_axis_pl axis, :filter, &block
73
+ end
74
+
75
+ # Returns a dataframe in which rows with any of the mentioned values
76
+ # are ignored.
77
+ # @param [Array] values to reject to form the new dataframe
78
+ # @return [DaruLite::DataFrame] Data Frame with only rows which doesn't
79
+ # contain the mentioned values
80
+ # @example
81
+ # df = DaruLite::DataFrame.new({
82
+ # a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
83
+ # b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
84
+ # c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
85
+ # }, index: 11..18)
86
+ # df.reject_values nil, Float::NAN
87
+ # # => #<DaruLite::DataFrame(2x3)>
88
+ # # a b c
89
+ # # 11 1 a a
90
+ # # 18 7 8 7
91
+ def reject_values(*values)
92
+ positions =
93
+ size.times.to_a - @data.flat_map { |vec| vec.positions(*values) }
94
+ # Handle the case when positions size is 1 and #row_at wouldn't return a df
95
+ if positions.size == 1
96
+ pos = positions.first
97
+ row_at(pos..pos)
98
+ else
99
+ row_at(*positions)
100
+ end
101
+ end
102
+
103
+ def keep_row_if
104
+ @index.size.times
105
+ .reject { |position| yield(row_at(position)) }
106
+ .reverse_each { |position| delete_at_position(position) }
107
+ end
108
+
109
+ def keep_vector_if
110
+ @vectors.each do |vector|
111
+ delete_vector(vector) unless yield(@data[@vectors[vector]], vector)
112
+ end
113
+ end
114
+
115
+ # creates a new vector with the data of a given field which the block returns true
116
+ def filter_vector(vec, &block)
117
+ DaruLite::Vector.new(each_row.select(&block).map { |row| row[vec] })
118
+ end
119
+
120
+ # Iterates over each row and retains it in a new DataFrame if the block returns
121
+ # true for that row.
122
+ def filter_rows
123
+ return to_enum(:filter_rows) unless block_given?
124
+
125
+ keep_rows = @index.map { |index| yield access_row(index) }
126
+
127
+ where keep_rows
128
+ end
129
+
130
+ # Iterates over each vector and retains it in a new DataFrame if the block returns
131
+ # true for that vector.
132
+ def filter_vectors(&block)
133
+ return to_enum(:filter_vectors) unless block
134
+
135
+ dup.tap { |df| df.keep_vector_if(&block) }
136
+ end
137
+
138
+ # Query a DataFrame by passing a DaruLite::Core::Query::BoolArray object.
139
+ def where(bool_array)
140
+ DaruLite::Core::Query.df_where self, bool_array
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,179 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module IOAble
4
+ module ClassMethods
5
+ # Load data from a CSV file. Specify an optional block to grab the CSV
6
+ # object and pre-condition it (for example use the `convert` or
7
+ # `header_convert` methods).
8
+ #
9
+ # == Arguments
10
+ #
11
+ # * path - Local path / Remote URL of the file to load specified as a String.
12
+ #
13
+ # == Options
14
+ #
15
+ # Accepts the same options as the DaruLite::DataFrame constructor and CSV.open()
16
+ # and uses those to eventually construct the resulting DataFrame.
17
+ #
18
+ # == Verbose Description
19
+ #
20
+ # You can specify all the options to the `.from_csv` function that you
21
+ # do to the Ruby `CSV.read()` function, since this is what is used internally.
22
+ #
23
+ # For example, if the columns in your CSV file are separated by something
24
+ # other that commas, you can use the `:col_sep` option. If you want to
25
+ # convert numeric values to numbers and not keep them as strings, you can
26
+ # use the `:converters` option and set it to `:numeric`.
27
+ #
28
+ # The `.from_csv` function uses the following defaults for reading CSV files
29
+ # (that are passed into the `CSV.read()` function):
30
+ #
31
+ # {
32
+ # :col_sep => ',',
33
+ # :converters => :numeric
34
+ # }
35
+ def from_csv(path, opts = {}, &block)
36
+ DaruLite::IO.from_csv path, opts, &block
37
+ end
38
+
39
+ # Read data from an Excel file into a DataFrame.
40
+ #
41
+ # == Arguments
42
+ #
43
+ # * path - Path of the file to be read.
44
+ #
45
+ # == Options
46
+ #
47
+ # *:worksheet_id - ID of the worksheet that is to be read.
48
+ def from_excel(path, opts = {}, &block)
49
+ DaruLite::IO.from_excel path, opts, &block
50
+ end
51
+
52
+ # Read a database query and returns a Dataset
53
+ #
54
+ # @param dbh [DBI::DatabaseHandle, String] A DBI connection OR Path to a SQlite3 database.
55
+ # @param query [String] The query to be executed
56
+ #
57
+ # @return A dataframe containing the data resulting from the query
58
+ #
59
+ # USE:
60
+ #
61
+ # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
62
+ # DaruLite::DataFrame.from_sql(dbh, "SELECT * FROM test")
63
+ #
64
+ # #Alternatively
65
+ #
66
+ # require 'dbi'
67
+ # DaruLite::DataFrame.from_sql("path/to/sqlite.db", "SELECT * FROM test")
68
+ def from_sql(dbh, query)
69
+ DaruLite::IO.from_sql dbh, query
70
+ end
71
+
72
+ # Read a dataframe from AR::Relation
73
+ #
74
+ # @param relation [ActiveRecord::Relation] An AR::Relation object from which data is loaded
75
+ # @param fields [Array] Field names to be loaded (optional)
76
+ #
77
+ # @return A dataframe containing the data loaded from the relation
78
+ #
79
+ # USE:
80
+ #
81
+ # # When Post model is defined as:
82
+ # class Post < ActiveRecord::Base
83
+ # scope :active, -> { where.not(published_at: nil) }
84
+ # end
85
+ #
86
+ # # You can load active posts into a dataframe by:
87
+ # DaruLite::DataFrame.from_activerecord(Post.active, :title, :published_at)
88
+ def from_activerecord(relation, *fields)
89
+ DaruLite::IO.from_activerecord relation, *fields
90
+ end
91
+
92
+ # Read the database from a plaintext file. For this method to work,
93
+ # the data should be present in a plain text file in columns. See
94
+ # spec/fixtures/bank2.dat for an example.
95
+ #
96
+ # == Arguments
97
+ #
98
+ # * path - Path of the file to be read.
99
+ # * fields - Vector names of the resulting database.
100
+ #
101
+ # == Usage
102
+ #
103
+ # df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6]
104
+ def from_plaintext(path, fields)
105
+ DaruLite::IO.from_plaintext path, fields
106
+ end
107
+
108
+ def _load(data)
109
+ h = Marshal.load data
110
+ DaruLite::DataFrame.new(
111
+ h[:data],
112
+ index: h[:index],
113
+ order: h[:order],
114
+ name: h[:name]
115
+ )
116
+ end
117
+ end
118
+
119
+ def self.included(base)
120
+ base.extend ClassMethods
121
+ end
122
+
123
+ # Write this DataFrame to a CSV file.
124
+ #
125
+ # == Arguments
126
+ #
127
+ # * filename - Path of CSV file where the DataFrame is to be saved.
128
+ #
129
+ # == Options
130
+ #
131
+ # * convert_comma - If set to *true*, will convert any commas in any
132
+ # of the data to full stops ('.').
133
+ # All the options accepted by CSV.read() can also be passed into this
134
+ # function.
135
+ def write_csv(filename, opts = {})
136
+ DaruLite::IO.dataframe_write_csv self, filename, opts
137
+ end
138
+
139
+ # Write this dataframe to an Excel Spreadsheet
140
+ #
141
+ # == Arguments
142
+ #
143
+ # * filename - The path of the file where the DataFrame should be written.
144
+ def write_excel(filename, opts = {})
145
+ DaruLite::IO.dataframe_write_excel self, filename, opts
146
+ end
147
+
148
+ # Insert each case of the Dataset on the selected table
149
+ #
150
+ # == Arguments
151
+ #
152
+ # * dbh - DBI database connection object.
153
+ # * query - Query string.
154
+ #
155
+ # == Usage
156
+ #
157
+ # ds = DaruLite::DataFrame.new({:id=>DaruLite::Vector.new([1,2,3]), :name=>DaruLite::Vector.new(["a","b","c"])})
158
+ # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
159
+ # ds.write_sql(dbh,"test")
160
+ def write_sql(dbh, table)
161
+ DaruLite::IO.dataframe_write_sql self, dbh, table
162
+ end
163
+
164
+ # Use marshalling to save dataframe to a file.
165
+ def save(filename)
166
+ DaruLite::IO.save self, filename
167
+ end
168
+
169
+ def _dump(_depth)
170
+ Marshal.dump(
171
+ data: @data,
172
+ index: @index.to_a,
173
+ order: @vectors.to_a,
174
+ name: @name
175
+ )
176
+ end
177
+ end
178
+ end
179
+ end