daru_lite 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,374 @@
1
+ module DaruLite
2
+ class MultiIndex < Index # rubocop:disable Metrics/ClassLength
3
+ def each(&block)
4
+ to_a.each(&block)
5
+ end
6
+
7
+ def map(&block)
8
+ to_a.map(&block)
9
+ end
10
+
11
+ attr_reader :labels, :name
12
+
13
+ def levels
14
+ @levels.map(&:keys)
15
+ end
16
+
17
+ # names and levels should be of same size. If size of Array `name` is less
18
+ # or greater than size of array `levels` then it raises `SizeError`.
19
+ # If user don't want to put name for particular level then user must put
20
+ # empty string in that index of Array `name`.
21
+ # For example there is multi_index of 3 levels and user don't want to name
22
+ # level 0, then do multi_index.name = ['', 'level1_name1', 'level2_name']
23
+ #
24
+ # @example
25
+ #
26
+ # # set the name during initialization
27
+ #
28
+ # mi = DaruLite::MultiIndex.new(
29
+ # levels: [[:a,:b,:c], [:one, :two]],
30
+ # labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]], name: ['s1', 's2'])
31
+ #
32
+ # # =>
33
+ # # <DaruLite::MultiIndex(6x2)>
34
+ # # s1 s2
35
+ # # a one
36
+ # # two
37
+ # # b one
38
+ # # two
39
+ # # c one
40
+ # # two
41
+ #
42
+ # # set new name
43
+ #
44
+ # mi.name = ['k1', 'k2']
45
+ # => ["k1", "k2"]
46
+ #
47
+ # mi
48
+ # =>
49
+ # # #<DaruLite::MultiIndex(6x2)>
50
+ # # k1 k2
51
+ # # a one
52
+ # # two
53
+ # # b one
54
+ # # two
55
+ # # c one
56
+ # # two
57
+ #
58
+ # # access the name
59
+ #
60
+ # mi.name
61
+ # => ["k1", "k2"]
62
+ #
63
+ # # If you don't want to name level 0
64
+ #
65
+ # mi.name = ['', 'k2']
66
+ # => ["", "k2"]
67
+ #
68
+ # mi
69
+ # =>
70
+ # #<DaruLite::MultiIndex(6x2)>
71
+ # # k2
72
+ # # a one
73
+ # # two
74
+ # # b one
75
+ # # two
76
+ # # c one
77
+ # # two
78
+ #
79
+ def initialize(opts = {})
80
+ labels = opts[:labels]
81
+ levels = opts[:levels]
82
+
83
+ raise ArgumentError, 'Must specify both labels and levels' unless labels && levels
84
+ raise ArgumentError, 'Labels and levels should be same size' if labels.size != levels.size
85
+ raise ArgumentError, 'Incorrect labels and levels' if incorrect_fields?(labels, levels)
86
+
87
+ @labels = labels
88
+ @levels = levels.map { |e| e.map.with_index.to_h }
89
+ self.name = opts[:name] unless opts[:name].nil?
90
+ end
91
+
92
+ def name=(names)
93
+ validate_name names, @labels
94
+ @name = names
95
+ end
96
+
97
+ def incorrect_fields?(_labels, levels)
98
+ levels[0].size # FIXME: without this exact call some specs are failing
99
+
100
+ levels.any? { |e| e.uniq.size != e.size }
101
+ end
102
+
103
+ private :incorrect_fields?
104
+
105
+ def self.from_arrays(arrays)
106
+ levels = arrays.map { |e| e.uniq.sort_by(&:to_s) }
107
+
108
+ labels = arrays.each_with_index.map do |arry, level_index|
109
+ level = levels[level_index]
110
+ arry.map { |lvl| level.index(lvl) }
111
+ end
112
+
113
+ MultiIndex.new labels: labels, levels: levels
114
+ end
115
+
116
+ def self.from_tuples(tuples)
117
+ from_arrays tuples.transpose
118
+ end
119
+
120
+ def self.try_from_tuples(tuples)
121
+ from_tuples(tuples) if tuples.respond_to?(:first) && tuples.first.is_a?(Array)
122
+ end
123
+
124
+ def [](*key)
125
+ key.flatten!
126
+ if key[0].is_a?(Range)
127
+ retrieve_from_range(key[0])
128
+ elsif key[0].is_a?(Integer) && key.size == 1
129
+ try_retrieve_from_integer(key[0])
130
+ else
131
+ begin
132
+ retrieve_from_tuples key
133
+ rescue NoMethodError
134
+ raise IndexError, "Specified index #{key.inspect} do not exist"
135
+ end
136
+ end
137
+ end
138
+
139
+ def valid?(*indexes)
140
+ # FIXME: This is perhaps not a good method
141
+ pos(*indexes)
142
+ true
143
+ rescue IndexError
144
+ false
145
+ end
146
+
147
+ # Returns positions given indexes or positions
148
+ # @note If the arugent is both a valid index and a valid position,
149
+ # it will treated as valid index
150
+ # @param indexes [Array<object>] indexes or positions
151
+ # @example
152
+ # idx = DaruLite::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
153
+ # idx.pos :a
154
+ # # => [0, 1]
155
+ def pos(*indexes)
156
+ if indexes.first.is_a? Integer
157
+ return indexes.first if indexes.size == 1
158
+
159
+ return indexes
160
+ end
161
+ res = self[indexes]
162
+ return res if res.is_a? Integer
163
+
164
+ res.map { |i| self[i] }
165
+ end
166
+
167
+ def subset(*indexes)
168
+ if indexes.first.is_a? Integer
169
+ MultiIndex.from_tuples(indexes.map { |index| key(index) })
170
+ else
171
+ self[indexes].conform indexes
172
+ end
173
+ end
174
+
175
+ # Takes positional values and returns subset of the self
176
+ # capturing the indexes at mentioned positions
177
+ # @param positions [Array<Integer>] positional values
178
+ # @return [object] index object
179
+ # @example
180
+ # idx = DaruLite::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
181
+ # idx.at 0, 1
182
+ # # => #<DaruLite::MultiIndex(2x2)>
183
+ # # a one
184
+ # # two
185
+ def at(*positions)
186
+ positions = preprocess_positions(*positions)
187
+ validate_positions(*positions)
188
+ if positions.is_a? Integer
189
+ key(positions)
190
+ else
191
+ DaruLite::MultiIndex.from_tuples(positions.map { |v| key(v) })
192
+ end
193
+ end
194
+
195
+ def add(*indexes)
196
+ DaruLite::MultiIndex.from_tuples(to_a + [indexes])
197
+ end
198
+
199
+ def reorder(new_order)
200
+ from = to_a
201
+ MultiIndex.from_tuples(new_order.map { |i| from[i] })
202
+ end
203
+
204
+ def try_retrieve_from_integer(int)
205
+ @levels[0].key?(int) ? retrieve_from_tuples([int]) : int
206
+ end
207
+
208
+ def retrieve_from_range(range)
209
+ MultiIndex.from_tuples(range.map { |index| key(index) })
210
+ end
211
+
212
+ def retrieve_from_tuples(key)
213
+ chosen = []
214
+
215
+ key.each_with_index do |k, depth|
216
+ level_index = @levels[depth][k]
217
+ raise IndexError, "Specified index #{key.inspect} do not exist" if level_index.nil?
218
+
219
+ label = @labels[depth]
220
+ chosen = find_all_indexes label, level_index, chosen
221
+ end
222
+
223
+ return chosen[0] if chosen.size == 1 && key.size == @levels.size
224
+
225
+ multi_index_from_multiple_selections(chosen)
226
+ end
227
+
228
+ def multi_index_from_multiple_selections(chosen)
229
+ MultiIndex.from_tuples(chosen.map { |e| key(e) })
230
+ end
231
+
232
+ def find_all_indexes(label, level_index, chosen)
233
+ if chosen.empty?
234
+ label.each_with_index
235
+ .select { |lbl, _| lbl == level_index }.map(&:last)
236
+ else
237
+ chosen.keep_if { |c| label[c] == level_index }
238
+ end
239
+ end
240
+
241
+ def remove_layer(layer_index)
242
+ @levels.delete_at(layer_index)
243
+ @labels.delete_at(layer_index)
244
+ @name&.delete_at(layer_index)
245
+
246
+ coerce_index
247
+ end
248
+
249
+ def coerce_index
250
+ if @levels.size == 1
251
+ elements = to_a.flatten
252
+
253
+ if elements.uniq.length == elements.length
254
+ DaruLite::Index.new(elements)
255
+ else
256
+ DaruLite::CategoricalIndex.new(elements)
257
+ end
258
+ else
259
+ self
260
+ end
261
+ end
262
+
263
+ # Array `name` must have same length as levels and labels.
264
+ def validate_name(names, levels)
265
+ error_msg = "'names' and 'levels' should be of same size. Size of the " \
266
+ "'name' array is #{names.size} and size of the MultiIndex 'levels' and " \
267
+ "'labels' is #{labels.size}."
268
+ suggestion_msg = 'If you do not want to set name for particular level ' \
269
+ "(say level 'i') then put empty string on index 'i' of the 'name' Array."
270
+
271
+ raise SizeError, error_msg if names.size > levels.size
272
+ raise SizeError, [error_msg, suggestion_msg].join("\n") if names.size < levels.size
273
+ end
274
+
275
+ private :find_all_indexes, :multi_index_from_multiple_selections,
276
+ :retrieve_from_range, :retrieve_from_tuples, :validate_name
277
+
278
+ def key(index)
279
+ raise ArgumentError, "Key #{index} is too large" if index >= @labels[0].size
280
+
281
+ @labels
282
+ .each_with_index
283
+ .map { |label, i| @levels[i].keys[label[index]] }
284
+ end
285
+
286
+ def dup
287
+ MultiIndex.new levels: levels.dup, labels: labels.dup, name: @name&.dup
288
+ end
289
+
290
+ def drop_left_level(by = 1)
291
+ MultiIndex.from_arrays to_a.transpose[by..]
292
+ end
293
+
294
+ def |(other)
295
+ MultiIndex.from_tuples(to_a | other.to_a)
296
+ end
297
+
298
+ def &(other)
299
+ MultiIndex.from_tuples(to_a & other.to_a)
300
+ end
301
+
302
+ def empty?
303
+ @labels.flatten.empty? && @levels.all?(&:empty?)
304
+ end
305
+
306
+ def include?(tuple)
307
+ return false unless tuple.is_a? Enumerable
308
+
309
+ @labels[0...tuple.flatten.size]
310
+ .transpose
311
+ .include?(tuple.flatten.each_with_index.map { |e, i| @levels[i][e] })
312
+ end
313
+
314
+ def size
315
+ @labels[0].size
316
+ end
317
+
318
+ def width
319
+ @levels.size
320
+ end
321
+
322
+ def ==(other)
323
+ self.class == other.class &&
324
+ labels == other.labels &&
325
+ levels == other.levels
326
+ end
327
+
328
+ def to_a
329
+ (0...size).map { |e| key(e) }
330
+ end
331
+
332
+ def values
333
+ Array.new(size) { |i| i }
334
+ end
335
+
336
+ def inspect(threshold = 20)
337
+ "#<DaruLite::MultiIndex(#{size}x#{width})>\n" +
338
+ Formatters::Table.format([], headers: @name, row_headers: sparse_tuples, threshold: threshold)
339
+ end
340
+
341
+ def to_html
342
+ path = File.expand_path('../iruby/templates/multi_index.html.erb', __dir__)
343
+ ERB.new(File.read(path).strip).result(binding)
344
+ end
345
+
346
+ # Provide a MultiIndex for sub vector produced
347
+ #
348
+ # @param input_indexes [Array] the input by user to index the vector
349
+ # @return [Object] the MultiIndex object for sub vector produced
350
+ def conform(input_indexes)
351
+ return self if input_indexes[0].is_a? Range
352
+
353
+ drop_left_level input_indexes.size
354
+ end
355
+
356
+ # Return tuples with nils in place of repeating values, like this:
357
+ #
358
+ # [:a , :bar, :one]
359
+ # [nil, nil , :two]
360
+ # [nil, :foo, :one]
361
+ #
362
+ def sparse_tuples
363
+ tuples = to_a
364
+ [tuples.first] + each_cons(2).map do |prev, cur|
365
+ left = cur.zip(prev).drop_while { |c, p| c == p }
366
+ Array.new(cur.size - left.size) + left.map(&:first)
367
+ end
368
+ end
369
+
370
+ def to_df
371
+ DaruLite::DataFrame.new(@name.zip(to_a.transpose).to_h)
372
+ end
373
+ end
374
+ end
@@ -0,0 +1,21 @@
1
+ module DaruLite
2
+ module IO
3
+ module CSV
4
+ CONVERTERS = {
5
+ boolean: lambda { |f, _|
6
+ case f.downcase.strip
7
+ when 'true'
8
+ true
9
+ when 'false'
10
+ false
11
+ else
12
+ f
13
+ end
14
+ },
15
+ string: lambda { |f, _|
16
+ f
17
+ }
18
+ }.freeze
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,294 @@
1
+ module DaruLite
2
+ require_relative 'csv/converters'
3
+ module IOHelpers
4
+ class << self
5
+ def process_row(row, empty)
6
+ row.to_a.map do |c|
7
+ if empty.include?(c)
8
+ # FIXME: As far as I can guess, it will never work.
9
+ # It is called only inside `from_plaintext`, and there
10
+ # data is splitted by `\s+` -- there is no chance that
11
+ # "empty" (currently just '') will be between data?..
12
+ nil
13
+ else
14
+ try_string_to_number(c)
15
+ end
16
+ end
17
+ end
18
+
19
+ def open_local_or_remote_file(path)
20
+ uri = URI.parse(path)
21
+ uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS) ? uri.open : File.open(uri.path)
22
+ end
23
+
24
+ private
25
+
26
+ INT_PATTERN = /^[-+]?\d+$/.freeze
27
+ FLOAT_PATTERN = /^[-+]?\d+[,.]?\d*(e-?\d+)?$/.freeze
28
+
29
+ def try_string_to_number(s)
30
+ case s
31
+ when INT_PATTERN
32
+ s.to_i
33
+ when FLOAT_PATTERN
34
+ s.tr(',', '.').to_f
35
+ else
36
+ s
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ module IO
43
+ class << self
44
+ # Functions for loading/writing Excel files.
45
+
46
+ def from_excel(path, opts = {})
47
+ opts = {
48
+ worksheet_id: 0,
49
+ row_id: 0
50
+ }.merge opts
51
+
52
+ worksheet, headers = read_from_excel(path, opts)
53
+ df = DaruLite::DataFrame.new({})
54
+ headers.each_with_index do |h, i|
55
+ col = worksheet.column(i).to_a
56
+ col.delete_at 0
57
+ df[h] = col
58
+ end
59
+
60
+ df
61
+ end
62
+
63
+ def read_from_excel(path, opts)
64
+ optional_gem 'spreadsheet', '~>1.3.0'
65
+
66
+ worksheet_id = opts[:worksheet_id]
67
+ row_id = opts[:row_id]
68
+ book = Spreadsheet.open path
69
+ worksheet = book.worksheet worksheet_id
70
+ headers = ArrayHelper.recode_repeated(worksheet.row(row_id)).map(&:to_sym)
71
+
72
+ [worksheet, headers]
73
+ end
74
+
75
+ def dataframe_write_excel(dataframe, path, _opts = {})
76
+ book = Spreadsheet::Workbook.new
77
+ sheet = book.create_worksheet
78
+ format = Spreadsheet::Format.new color: :blue, weight: :bold
79
+
80
+ sheet.row(0).concat(dataframe.vectors.to_a.map(&:to_s)) # Unfreeze strings
81
+ sheet.row(0).default_format = format
82
+ i = 1
83
+ dataframe.each_row do |row|
84
+ sheet.row(i).concat(row.to_a)
85
+ i += 1
86
+ end
87
+
88
+ book.write(path)
89
+ end
90
+
91
+ # Functions for loading/writing CSV files
92
+ def from_csv(path, opts = {})
93
+ daru_options, opts = from_csv_prepare_opts opts
94
+ # Preprocess headers for detecting and correcting repetition in
95
+ # case the :headers option is not specified.
96
+ hsh =
97
+ if opts[:headers]
98
+ from_csv_hash_with_headers(path, opts)
99
+ else
100
+ from_csv_hash(path, opts)
101
+ .tap { |hash| daru_options[:order] = hash.keys }
102
+ end
103
+ DaruLite::DataFrame.new(hsh, daru_options)
104
+ end
105
+
106
+ def dataframe_write_csv(dataframe, path, opts = {})
107
+ options = {
108
+ converters: :numeric
109
+ }.merge(opts)
110
+
111
+ writer = ::CSV.open(path, 'w', **options)
112
+ writer << dataframe.vectors.to_a unless options[:headers] == false
113
+
114
+ dataframe.each_row do |row|
115
+ writer << if options[:convert_comma]
116
+ row.map { |v| v.to_s.tr('.', ',') }
117
+ else
118
+ row.to_a
119
+ end
120
+ end
121
+
122
+ writer.close
123
+ end
124
+
125
+ # Execute a query and create a data frame from the result
126
+ #
127
+ # @param db [DBI::DatabaseHandle, String] A DBI connection OR Path to a SQlite3 database.
128
+ # @param query [String] The query to be executed
129
+ #
130
+ # @return A dataframe containing the data resulting from the query
131
+ def from_sql(db, query)
132
+ require 'daru_lite/io/sql_data_source'
133
+ SqlDataSource.make_dataframe(db, query)
134
+ end
135
+
136
+ def dataframe_write_sql(ds, dbh, table)
137
+ require 'dbi'
138
+ query = "INSERT INTO #{table} (#{ds.vectors.to_a.join(',')}) VALUES (#{(['?'] * ds.vectors.size).join(',')})"
139
+ sth = dbh.prepare(query)
140
+ ds.each_row { |c| sth.execute(*c.to_a) }
141
+ true
142
+ end
143
+
144
+ # Load dataframe from AR::Relation
145
+ #
146
+ # @param relation [ActiveRecord::Relation] A relation to be used to load the contents of dataframe
147
+ #
148
+ # @return A dataframe containing the data in the given relation
149
+ def from_activerecord(relation, *fields)
150
+ fields = relation.klass.column_names if fields.empty?
151
+ fields = fields.map(&:to_sym)
152
+
153
+ result = relation.pluck(*fields).transpose
154
+ DaruLite::DataFrame.new(result, order: fields).tap(&:update)
155
+ end
156
+
157
+ # Loading data from plain text files
158
+
159
+ def from_plaintext(filename, fields)
160
+ ds = DaruLite::DataFrame.new({}, order: fields)
161
+ fp = File.open(filename, 'r')
162
+ fp.each_line do |line|
163
+ row = DaruLite::IOHelpers.process_row(line.strip.split(/\s+/), [''])
164
+ next if row == ["\x1A"]
165
+
166
+ ds.add_row(row)
167
+ end
168
+ ds.update
169
+ fields.each { |f| ds[f].rename f }
170
+ ds
171
+ end
172
+
173
+ # Loading and writing Marshalled DataFrame/Vector
174
+ def save(klass, filename)
175
+ fp = File.open(filename, 'w')
176
+ Marshal.dump(klass, fp)
177
+ fp.close
178
+ end
179
+
180
+ def load(filename)
181
+ if File.exist? filename
182
+ o = false
183
+ File.open(filename, 'r') { |fp| o = Marshal.load(fp) }
184
+ o
185
+ else
186
+ false
187
+ end
188
+ end
189
+
190
+ private
191
+
192
+ def optional_gem(name, version)
193
+ gem name, version
194
+ require name
195
+ rescue LoadError
196
+ DaruLite.error "\nInstall the #{name} gem version #{version} for using #{name} functions."
197
+ end
198
+
199
+ DARU_OPT_KEYS = %i[clone order index name].freeze
200
+
201
+ def from_csv_prepare_opts(opts)
202
+ opts[:col_sep] ||= ','
203
+ opts[:skip_blanks] ||= true
204
+ opts[:converters] ||= [:numeric]
205
+
206
+ opts[:converters] = from_csv_prepare_converters(opts[:converters])
207
+
208
+ daru_options = opts.keys.each_with_object({}) do |k, hash|
209
+ hash[k] = opts.delete(k) if DARU_OPT_KEYS.include?(k)
210
+ end
211
+ [daru_options, opts]
212
+ end
213
+
214
+ def from_csv_prepare_converters(converters)
215
+ Array(converters).flat_map do |c|
216
+ if ::CSV::Converters[c]
217
+ ::CSV::Converters[c]
218
+ elsif DaruLite::IO::CSV::CONVERTERS[c]
219
+ DaruLite::IO::CSV::CONVERTERS[c]
220
+ else
221
+ c
222
+ end
223
+ end
224
+ end
225
+
226
+ def from_csv_hash_with_headers(path, opts)
227
+ opts[:header_converters] ||= :symbol
228
+ ::CSV
229
+ .parse(DaruLite::IOHelpers.open_local_or_remote_file(path), **opts)
230
+ .tap { |c| yield c if block_given? }
231
+ .by_col.to_h { |col_name, values| [col_name, values] }
232
+ end
233
+
234
+ def from_csv_hash(path, opts)
235
+ csv_as_arrays =
236
+ ::CSV
237
+ .parse(DaruLite::IOHelpers.open_local_or_remote_file(path), **opts)
238
+ .tap { |c| yield c if block_given? }
239
+ .to_a
240
+ headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
241
+ csv_as_arrays = csv_as_arrays.transpose
242
+ headers.each_with_index.to_h { |h, i| [h, csv_as_arrays[i]] }
243
+ end
244
+
245
+ def html_parse_table(table)
246
+ headers, headers_size = html_scrape_tag(table, 'th')
247
+ data, size = html_scrape_tag(table, 'td')
248
+ data = data.keep_if { |x| x.count == size }
249
+ order, indice = html_parse_hash(headers, size, headers_size) if headers_size >= size
250
+ return unless (indice.nil? || indice.count == data.count) && !order.nil? && order.count.positive?
251
+
252
+ { data: data.compact, index: indice, order: order }
253
+ end
254
+
255
+ def html_scrape_tag(table, tag)
256
+ arr = table.search('tr').map { |row| row.search(tag).map { |val| val.text.strip } }
257
+ size = arr.map(&:count).max
258
+ [arr, size]
259
+ end
260
+
261
+ # Splits headers (all th tags) into order and index. Wherein,
262
+ # Order : All <th> tags on first proper row of HTML table
263
+ # index : All <th> tags on first proper column of HTML table
264
+ def html_parse_hash(headers, size, headers_size)
265
+ headers_index = headers.find_index { |x| x.count == headers_size }
266
+ order = headers[headers_index]
267
+ order_index = order.count - size
268
+ order = order[order_index..]
269
+ indice = headers[headers_index + 1..].flatten
270
+ indice = nil if indice.to_a.empty?
271
+ [order, indice]
272
+ end
273
+
274
+ def html_search(table, match = nil)
275
+ match.nil? ? true : (table.to_s.include? match)
276
+ end
277
+
278
+ # Allows user to override the scraped order / index / data
279
+ def html_decide_values(scraped_val = {}, user_val = {})
280
+ %I[data index name order].each do |key|
281
+ user_val[key] ||= scraped_val[key]
282
+ end
283
+ user_val
284
+ end
285
+
286
+ def html_table_to_dataframe(table)
287
+ DaruLite::DataFrame.rows table[:data],
288
+ index: table[:index],
289
+ order: table[:order],
290
+ name: table[:name]
291
+ end
292
+ end
293
+ end
294
+ end