daru_lite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -0,0 +1,301 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Fetchable
4
+ # Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
5
+ # Defaults to *:vector*. Use of this method is not recommended for accessing
6
+ # rows. Use df.row[:a] for accessing row with index ':a'.
7
+ def [](*names)
8
+ axis = extract_axis(names, :vector)
9
+ dispatch_to_axis axis, :access, *names
10
+ end
11
+
12
+ # Retrive rows by positions
13
+ # @param [Array<Integer>] positions of rows to retrive
14
+ # @return [DaruLite::Vector, DaruLite::DataFrame] vector for single position and dataframe for multiple positions
15
+ # @example
16
+ # df = DaruLite::DataFrame.new({
17
+ # a: [1, 2, 3],
18
+ # b: ['a', 'b', 'c']
19
+ # })
20
+ # df.row_at 1, 2
21
+ # # => #<DaruLite::DataFrame(2x2)>
22
+ # # a b
23
+ # # 1 2 b
24
+ # # 2 3 c
25
+ def row_at(*positions)
26
+ original_positions = positions
27
+ positions = coerce_positions(*positions, nrows)
28
+ validate_positions(*positions, nrows)
29
+
30
+ if positions.is_a? Integer
31
+ row = get_rows_for([positions])
32
+ DaruLite::Vector.new(row, index: @vectors, name: @index.at(positions))
33
+ else
34
+ new_rows = get_rows_for(original_positions)
35
+ DaruLite::DataFrame.new(
36
+ new_rows,
37
+ index: @index.at(*original_positions),
38
+ order: @vectors,
39
+ name: @name
40
+ )
41
+ end
42
+ end
43
+
44
+ # Retrive vectors by positions
45
+ # @param [Array<Integer>] positions of vectors to retrive
46
+ # @return [DaruLite::Vector, DaruLite::DataFrame] vector for single position and dataframe for multiple positions
47
+ # @example
48
+ # df = DaruLite::DataFrame.new({
49
+ # a: [1, 2, 3],
50
+ # b: ['a', 'b', 'c']
51
+ # })
52
+ # df.at 0
53
+ # # => #<DaruLite::Vector(3)>
54
+ # # a
55
+ # # 0 1
56
+ # # 1 2
57
+ # # 2 3
58
+ def at(*positions)
59
+ if AXES.include? positions.last
60
+ axis = positions.pop
61
+ return row_at(*positions) if axis == :row
62
+ end
63
+
64
+ original_positions = positions
65
+ positions = coerce_positions(*positions, ncols)
66
+ validate_positions(*positions, ncols)
67
+
68
+ if positions.is_a? Integer
69
+ @data[positions].dup
70
+ else
71
+ DaruLite::DataFrame.new positions.map { |pos| @data[pos].dup },
72
+ index: @index,
73
+ order: @vectors.at(*original_positions),
74
+ name: @name
75
+ end
76
+ end
77
+
78
+ # The first ten elements of the DataFrame
79
+ #
80
+ # @param [Fixnum] quantity (10) The number of elements to display from the top.
81
+ def head(quantity = 10)
82
+ row.at 0..(quantity - 1)
83
+ end
84
+ alias first head
85
+
86
+ # The last ten elements of the DataFrame
87
+ #
88
+ # @param [Fixnum] quantity (10) The number of elements to display from the bottom.
89
+ def tail(quantity = 10)
90
+ start = [-quantity, -size].max
91
+ row.at start..-1
92
+ end
93
+ alias last tail
94
+
95
+ # Extract a dataframe given row indexes or positions
96
+ # @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
97
+ # @return [DaruLite::Dataframe]
98
+ def get_sub_dataframe(keys, by_position: true)
99
+ return DaruLite::DataFrame.new({}) if keys == []
100
+
101
+ keys = @index.pos(*keys) unless by_position
102
+
103
+ sub_df = row_at(*keys)
104
+ sub_df = sub_df.to_df.transpose if sub_df.is_a?(DaruLite::Vector)
105
+
106
+ sub_df
107
+ end
108
+
109
+ def get_vector_anyways(v)
110
+ @vectors.include?(v) ? self[v].to_a : Array.new(size)
111
+ end
112
+
113
+ # @param indexes [Array] index(s) at which row tuples are retrieved
114
+ # @return [Array] returns array of row tuples at given index(s)
115
+ # @example Using DaruLite::Index
116
+ # df = DaruLite::DataFrame.new({
117
+ # a: [1, 2, 3],
118
+ # b: ['a', 'a', 'b']
119
+ # })
120
+ #
121
+ # df.access_row_tuples_by_indexs(1,2)
122
+ # # => [[2, "a"], [3, "b"]]
123
+ #
124
+ # df.index = DaruLite::Index.new([:one,:two,:three])
125
+ # df.access_row_tuples_by_indexs(:one,:three)
126
+ # # => [[1, "a"], [3, "b"]]
127
+ #
128
+ # @example Using DaruLite::MultiIndex
129
+ # mi_idx = DaruLite::MultiIndex.from_tuples [
130
+ # [:a,:one,:bar],
131
+ # [:a,:one,:baz],
132
+ # [:b,:two,:bar],
133
+ # [:a,:two,:baz],
134
+ # ]
135
+ # df_mi = DaruLite::DataFrame.new({
136
+ # a: 1..4,
137
+ # b: 'a'..'d'
138
+ # }, index: mi_idx )
139
+ #
140
+ # df_mi.access_row_tuples_by_indexs(:b, :two, :bar)
141
+ # # => [[3, "c"]]
142
+ # df_mi.access_row_tuples_by_indexs(:a)
143
+ # # => [[1, "a"], [2, "b"], [4, "d"]]
144
+ def access_row_tuples_by_indexs(*indexes)
145
+ return get_sub_dataframe(indexes, by_position: false).map_rows(&:to_a) if
146
+ @index.is_a?(DaruLite::MultiIndex)
147
+
148
+ positions = @index.pos(*indexes)
149
+ if positions.is_a? Numeric
150
+ row = get_rows_for([positions])
151
+ row.first.is_a?(Array) ? row : [row]
152
+ else
153
+ new_rows = get_rows_for(indexes, by_position: false)
154
+ indexes.map { |index| new_rows.map { |r| r[index] } }
155
+ end
156
+ end
157
+
158
+ # Split the dataframe into many dataframes based on category vector
159
+ # @param [object] cat_name name of category vector to split the dataframe
160
+ # @return [Array] array of dataframes split by category with category vector
161
+ # used to split not included
162
+ # @example
163
+ # df = DaruLite::DataFrame.new({
164
+ # a: [1, 2, 3],
165
+ # b: ['a', 'a', 'b']
166
+ # })
167
+ # df.to_category :b
168
+ # df.split_by_category :b
169
+ # # => [#<DaruLite::DataFrame: a (2x1)>
170
+ # # a
171
+ # # 0 1
172
+ # # 1 2,
173
+ # # #<DaruLite::DataFrame: b (1x1)>
174
+ # # a
175
+ # # 2 3]
176
+ def split_by_category(cat_name)
177
+ cat_dv = self[cat_name]
178
+ raise ArgumentError, "#{cat_name} is not a category vector" unless
179
+ cat_dv.category?
180
+
181
+ cat_dv.categories.map do |cat|
182
+ where(cat_dv.eq cat)
183
+ .rename(cat)
184
+ .delete_vector cat_name
185
+ end
186
+ end
187
+
188
+ # Return the indexes of all the numeric vectors. Will include vectors with nils
189
+ # alongwith numbers.
190
+ def numeric_vectors
191
+ # FIXME: Why _with_index ?..
192
+ each_vector_with_index
193
+ .select { |vec, _i| vec.numeric? }
194
+ .map(&:last)
195
+ end
196
+
197
+ def numeric_vector_names
198
+ @vectors.select { |v| self[v].numeric? }
199
+ end
200
+
201
+ # Return a DataFrame of only the numerical Vectors. If clone: false
202
+ # is specified as option, only a *view* of the Vectors will be
203
+ # returned. Defaults to clone: true.
204
+ def only_numerics(opts = {})
205
+ cln = opts[:clone] != false
206
+ arry = numeric_vectors.map { |v| self[v] }
207
+
208
+ order = Index.new(numeric_vectors)
209
+ DaruLite::DataFrame.new(arry, clone: cln, order: order, index: @index)
210
+ end
211
+
212
+ private
213
+
214
+ def access_vector(*names)
215
+ if names.first.is_a?(Range)
216
+ dup(@vectors.subset(names.first))
217
+ elsif @vectors.is_a?(MultiIndex)
218
+ access_vector_multi_index(*names)
219
+ else
220
+ access_vector_single_index(*names)
221
+ end
222
+ end
223
+
224
+ def access_vector_multi_index(*names)
225
+ pos = @vectors[names]
226
+
227
+ return @data[pos] if pos.is_a?(Integer)
228
+
229
+ new_vectors = pos.map { |tuple| @data[@vectors[tuple]] }
230
+
231
+ pos = pos.drop_left_level(names.size) if names.size < @vectors.width
232
+
233
+ DaruLite::DataFrame.new(new_vectors, index: @index, order: pos)
234
+ end
235
+
236
+ def access_vector_single_index(*names)
237
+ if names.count < 2
238
+ begin
239
+ pos = @vectors.is_a?(DaruLite::DateTimeIndex) ? @vectors[names.first] : @vectors.pos(names.first)
240
+ rescue IndexError
241
+ raise IndexError, "Specified vector #{names.first} does not exist"
242
+ end
243
+ return @data[pos] if pos.is_a?(Numeric)
244
+
245
+ names = pos
246
+ end
247
+
248
+ new_vectors = names.map { |name| [name, @data[@vectors.pos(name)]] }.to_h
249
+
250
+ order = names.is_a?(Array) ? DaruLite::Index.new(names) : names
251
+ DaruLite::DataFrame.new(new_vectors, order: order, index: @index, name: @name)
252
+ end
253
+
254
+ def access_row(*indexes)
255
+ positions = @index.pos(*indexes)
256
+
257
+ if positions.is_a? Numeric
258
+ row = get_rows_for([positions])
259
+ DaruLite::Vector.new row, index: @vectors, name: indexes.first
260
+ else
261
+ new_rows = get_rows_for(indexes, by_position: false)
262
+ DaruLite::DataFrame.new new_rows, index: @index.subset(*indexes), order: @vectors
263
+ end
264
+ end
265
+
266
+ # @param keys [Array] can be an array of positions (if by_position is true) or indexes (if by_position if false)
267
+ # because of coercion by DaruLite::Vector#at and DaruLite::Vector#[], can return either an Array of
268
+ # values (representing a row) or an array of Vectors (that can be seen as rows)
269
+ def get_rows_for(keys, by_position: true)
270
+ raise unless keys.is_a?(Array)
271
+
272
+ if by_position
273
+ pos = keys
274
+ @data.map { |vector| vector.at(*pos) }
275
+ else
276
+ # TODO: for now (2018-07-27), it is different than using
277
+ # get_rows_for(@index.pos(*keys))
278
+ # because DaruLite::Vector#at and DaruLite::Vector#[] don't handle DaruLite::MultiIndex the same way
279
+ indexes = keys
280
+ @data.map { |vec| vec[*indexes] }
281
+ end
282
+ end
283
+
284
+ # coerce ranges, integers and array in appropriate ways
285
+ def coerce_positions(*positions, size)
286
+ if positions.size == 1
287
+ case positions.first
288
+ when Integer
289
+ positions.first
290
+ when Range
291
+ size.times.to_a[positions.first]
292
+ else
293
+ raise ArgumentError, 'Unknown position type.'
294
+ end
295
+ else
296
+ positions
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
@@ -0,0 +1,144 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Filterable
4
+ # Return unique rows by vector specified or all vectors
5
+ #
6
+ # @param vtrs [String][Symbol] vector names(s) that should be considered
7
+ #
8
+ # @example
9
+ #
10
+ # => #<DaruLite::DataFrame(6x2)>
11
+ # a b
12
+ # 0 1 a
13
+ # 1 2 b
14
+ # 2 3 c
15
+ # 3 4 d
16
+ # 2 3 c
17
+ # 3 4 f
18
+ #
19
+ # 2.3.3 :> df.uniq
20
+ # => #<DaruLite::DataFrame(5x2)>
21
+ # a b
22
+ # 0 1 a
23
+ # 1 2 b
24
+ # 2 3 c
25
+ # 3 4 d
26
+ # 3 4 f
27
+ #
28
+ # 2.3.3 :> df.uniq(:a)
29
+ # => #<DaruLite::DataFrame(5x2)>
30
+ # a b
31
+ # 0 1 a
32
+ # 1 2 b
33
+ # 2 3 c
34
+ # 3 4 d
35
+ #
36
+ def uniq(*vtrs)
37
+ vecs = vtrs.empty? ? vectors.to_a : Array(vtrs)
38
+ grouped = group_by(vecs)
39
+ indexes = grouped.groups.values.map { |v| v[0] }.sort
40
+ row[*indexes]
41
+ end
42
+
43
+ # Retain vectors or rows if the block returns a truthy value.
44
+ #
45
+ # == Description
46
+ #
47
+ # For filtering out certain rows/vectors based on their values,
48
+ # use the #filter method. By default it iterates over vectors and
49
+ # keeps those vectors for which the block returns true. It accepts
50
+ # an optional axis argument which lets you specify whether you want
51
+ # to iterate over vectors or rows.
52
+ #
53
+ # == Arguments
54
+ #
55
+ # * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
56
+ # Default to :vector.
57
+ #
58
+ # == Usage
59
+ #
60
+ # # Filter vectors
61
+ #
62
+ # df.filter do |vector|
63
+ # vector.type == :numeric and vector.median < 50
64
+ # end
65
+ #
66
+ # # Filter rows
67
+ #
68
+ # df.filter(:row) do |row|
69
+ # row[:a] + row[:d] < 100
70
+ # end
71
+ def filter(axis = :vector, &block)
72
+ dispatch_to_axis_pl axis, :filter, &block
73
+ end
74
+
75
+ # Returns a dataframe in which rows with any of the mentioned values
76
+ # are ignored.
77
+ # @param [Array] values to reject to form the new dataframe
78
+ # @return [DaruLite::DataFrame] Data Frame with only rows which doesn't
79
+ # contain the mentioned values
80
+ # @example
81
+ # df = DaruLite::DataFrame.new({
82
+ # a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
83
+ # b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
84
+ # c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
85
+ # }, index: 11..18)
86
+ # df.reject_values nil, Float::NAN
87
+ # # => #<DaruLite::DataFrame(2x3)>
88
+ # # a b c
89
+ # # 11 1 a a
90
+ # # 18 7 8 7
91
+ def reject_values(*values)
92
+ positions =
93
+ size.times.to_a - @data.flat_map { |vec| vec.positions(*values) }
94
+ # Handle the case when positions size is 1 and #row_at wouldn't return a df
95
+ if positions.size == 1
96
+ pos = positions.first
97
+ row_at(pos..pos)
98
+ else
99
+ row_at(*positions)
100
+ end
101
+ end
102
+
103
+ def keep_row_if
104
+ @index.size.times
105
+ .reject { |position| yield(row_at(position)) }
106
+ .reverse_each { |position| delete_at_position(position) }
107
+ end
108
+
109
+ def keep_vector_if
110
+ @vectors.each do |vector|
111
+ delete_vector(vector) unless yield(@data[@vectors[vector]], vector)
112
+ end
113
+ end
114
+
115
+ # creates a new vector with the data of a given field which the block returns true
116
+ def filter_vector(vec, &block)
117
+ DaruLite::Vector.new(each_row.select(&block).map { |row| row[vec] })
118
+ end
119
+
120
+ # Iterates over each row and retains it in a new DataFrame if the block returns
121
+ # true for that row.
122
+ def filter_rows
123
+ return to_enum(:filter_rows) unless block_given?
124
+
125
+ keep_rows = @index.map { |index| yield access_row(index) }
126
+
127
+ where keep_rows
128
+ end
129
+
130
+ # Iterates over each vector and retains it in a new DataFrame if the block returns
131
+ # true for that vector.
132
+ def filter_vectors(&block)
133
+ return to_enum(:filter_vectors) unless block
134
+
135
+ dup.tap { |df| df.keep_vector_if(&block) }
136
+ end
137
+
138
+ # Query a DataFrame by passing a DaruLite::Core::Query::BoolArray object.
139
+ def where(bool_array)
140
+ DaruLite::Core::Query.df_where self, bool_array
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,179 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module IOAble
4
+ module ClassMethods
5
+ # Load data from a CSV file. Specify an optional block to grab the CSV
6
+ # object and pre-condition it (for example use the `convert` or
7
+ # `header_convert` methods).
8
+ #
9
+ # == Arguments
10
+ #
11
+ # * path - Local path / Remote URL of the file to load specified as a String.
12
+ #
13
+ # == Options
14
+ #
15
+ # Accepts the same options as the DaruLite::DataFrame constructor and CSV.open()
16
+ # and uses those to eventually construct the resulting DataFrame.
17
+ #
18
+ # == Verbose Description
19
+ #
20
+ # You can specify all the options to the `.from_csv` function that you
21
+ # do to the Ruby `CSV.read()` function, since this is what is used internally.
22
+ #
23
+ # For example, if the columns in your CSV file are separated by something
24
+ # other that commas, you can use the `:col_sep` option. If you want to
25
+ # convert numeric values to numbers and not keep them as strings, you can
26
+ # use the `:converters` option and set it to `:numeric`.
27
+ #
28
+ # The `.from_csv` function uses the following defaults for reading CSV files
29
+ # (that are passed into the `CSV.read()` function):
30
+ #
31
+ # {
32
+ # :col_sep => ',',
33
+ # :converters => :numeric
34
+ # }
35
+ def from_csv(path, opts = {}, &block)
36
+ DaruLite::IO.from_csv path, opts, &block
37
+ end
38
+
39
+ # Read data from an Excel file into a DataFrame.
40
+ #
41
+ # == Arguments
42
+ #
43
+ # * path - Path of the file to be read.
44
+ #
45
+ # == Options
46
+ #
47
+ # *:worksheet_id - ID of the worksheet that is to be read.
48
+ def from_excel(path, opts = {}, &block)
49
+ DaruLite::IO.from_excel path, opts, &block
50
+ end
51
+
52
+ # Read a database query and returns a Dataset
53
+ #
54
+ # @param dbh [DBI::DatabaseHandle, String] A DBI connection OR Path to a SQlite3 database.
55
+ # @param query [String] The query to be executed
56
+ #
57
+ # @return A dataframe containing the data resulting from the query
58
+ #
59
+ # USE:
60
+ #
61
+ # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
62
+ # DaruLite::DataFrame.from_sql(dbh, "SELECT * FROM test")
63
+ #
64
+ # #Alternatively
65
+ #
66
+ # require 'dbi'
67
+ # DaruLite::DataFrame.from_sql("path/to/sqlite.db", "SELECT * FROM test")
68
+ def from_sql(dbh, query)
69
+ DaruLite::IO.from_sql dbh, query
70
+ end
71
+
72
+ # Read a dataframe from AR::Relation
73
+ #
74
+ # @param relation [ActiveRecord::Relation] An AR::Relation object from which data is loaded
75
+ # @param fields [Array] Field names to be loaded (optional)
76
+ #
77
+ # @return A dataframe containing the data loaded from the relation
78
+ #
79
+ # USE:
80
+ #
81
+ # # When Post model is defined as:
82
+ # class Post < ActiveRecord::Base
83
+ # scope :active, -> { where.not(published_at: nil) }
84
+ # end
85
+ #
86
+ # # You can load active posts into a dataframe by:
87
+ # DaruLite::DataFrame.from_activerecord(Post.active, :title, :published_at)
88
+ def from_activerecord(relation, *fields)
89
+ DaruLite::IO.from_activerecord relation, *fields
90
+ end
91
+
92
+ # Read the database from a plaintext file. For this method to work,
93
+ # the data should be present in a plain text file in columns. See
94
+ # spec/fixtures/bank2.dat for an example.
95
+ #
96
+ # == Arguments
97
+ #
98
+ # * path - Path of the file to be read.
99
+ # * fields - Vector names of the resulting database.
100
+ #
101
+ # == Usage
102
+ #
103
+ # df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6]
104
+ def from_plaintext(path, fields)
105
+ DaruLite::IO.from_plaintext path, fields
106
+ end
107
+
108
+ def _load(data)
109
+ h = Marshal.load data
110
+ DaruLite::DataFrame.new(
111
+ h[:data],
112
+ index: h[:index],
113
+ order: h[:order],
114
+ name: h[:name]
115
+ )
116
+ end
117
+ end
118
+
119
+ def self.included(base)
120
+ base.extend ClassMethods
121
+ end
122
+
123
+ # Write this DataFrame to a CSV file.
124
+ #
125
+ # == Arguments
126
+ #
127
+ # * filename - Path of CSV file where the DataFrame is to be saved.
128
+ #
129
+ # == Options
130
+ #
131
+ # * convert_comma - If set to *true*, will convert any commas in any
132
+ # of the data to full stops ('.').
133
+ # All the options accepted by CSV.read() can also be passed into this
134
+ # function.
135
+ def write_csv(filename, opts = {})
136
+ DaruLite::IO.dataframe_write_csv self, filename, opts
137
+ end
138
+
139
+ # Write this dataframe to an Excel Spreadsheet
140
+ #
141
+ # == Arguments
142
+ #
143
+ # * filename - The path of the file where the DataFrame should be written.
144
+ def write_excel(filename, opts = {})
145
+ DaruLite::IO.dataframe_write_excel self, filename, opts
146
+ end
147
+
148
+ # Insert each case of the Dataset on the selected table
149
+ #
150
+ # == Arguments
151
+ #
152
+ # * dbh - DBI database connection object.
153
+ # * query - Query string.
154
+ #
155
+ # == Usage
156
+ #
157
+ # ds = DaruLite::DataFrame.new({:id=>DaruLite::Vector.new([1,2,3]), :name=>DaruLite::Vector.new(["a","b","c"])})
158
+ # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
159
+ # ds.write_sql(dbh,"test")
160
+ def write_sql(dbh, table)
161
+ DaruLite::IO.dataframe_write_sql self, dbh, table
162
+ end
163
+
164
+ # Use marshalling to save dataframe to a file.
165
+ def save(filename)
166
+ DaruLite::IO.save self, filename
167
+ end
168
+
169
+ def _dump(_depth)
170
+ Marshal.dump(
171
+ data: @data,
172
+ index: @index.to_a,
173
+ order: @vectors.to_a,
174
+ name: @name
175
+ )
176
+ end
177
+ end
178
+ end
179
+ end