daru 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -7
- data/CONTRIBUTING.md +1 -1
- data/History.md +48 -1
- data/README.md +3 -3
- data/benchmarks/statistics.rb +6 -6
- data/benchmarks/where_clause.rb +1 -1
- data/benchmarks/where_vs_filter.rb +1 -1
- data/daru.gemspec +3 -2
- data/lib/daru.rb +14 -6
- data/lib/daru/accessors/gsl_wrapper.rb +1 -1
- data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
- data/lib/daru/category.rb +1 -1
- data/lib/daru/core/group_by.rb +32 -15
- data/lib/daru/core/query.rb +4 -4
- data/lib/daru/dataframe.rb +196 -48
- data/lib/daru/date_time/index.rb +7 -5
- data/lib/daru/formatters/table.rb +1 -0
- data/lib/daru/index/index.rb +121 -33
- data/lib/daru/index/multi_index.rb +83 -3
- data/lib/daru/io/csv/converters.rb +18 -0
- data/lib/daru/io/io.rb +80 -11
- data/lib/daru/io/sql_data_source.rb +10 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +3 -50
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
- data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/vector.html.erb +3 -25
- data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
- data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru/maths/statistics/dataframe.rb +9 -11
- data/lib/daru/maths/statistics/vector.rb +139 -32
- data/lib/daru/plotting/gruff/dataframe.rb +13 -15
- data/lib/daru/plotting/nyaplot/category.rb +1 -1
- data/lib/daru/plotting/nyaplot/dataframe.rb +4 -4
- data/lib/daru/plotting/nyaplot/vector.rb +1 -2
- data/lib/daru/vector.rb +169 -80
- data/lib/daru/version.rb +1 -1
- data/spec/category_spec.rb +19 -19
- data/spec/core/group_by_spec.rb +47 -0
- data/spec/core/query_spec.rb +55 -50
- data/spec/daru_spec.rb +22 -0
- data/spec/dataframe_spec.rb +118 -6
- data/spec/date_time/index_spec.rb +34 -16
- data/spec/extensions/rserve_spec.rb +1 -1
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +29 -0
- data/spec/index/categorical_index_spec.rb +33 -33
- data/spec/index/index_spec.rb +134 -41
- data/spec/index/multi_index_spec.rb +115 -31
- data/spec/io/io_spec.rb +201 -0
- data/spec/io/sql_data_source_spec.rb +31 -41
- data/spec/iruby/dataframe_spec.rb +17 -19
- data/spec/iruby/vector_spec.rb +26 -28
- data/spec/maths/statistics/vector_spec.rb +136 -14
- data/spec/plotting/gruff/category_spec.rb +3 -3
- data/spec/plotting/gruff/dataframe_spec.rb +14 -4
- data/spec/plotting/gruff/vector_spec.rb +9 -9
- data/spec/plotting/nyaplot/category_spec.rb +5 -9
- data/spec/plotting/nyaplot/dataframe_spec.rb +72 -47
- data/spec/plotting/nyaplot/vector_spec.rb +5 -11
- data/spec/shared/vector_display_spec.rb +12 -14
- data/spec/spec_helper.rb +21 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +222 -72
- metadata +68 -23
- data/spec/fixtures/stock_data.csv +0 -500
data/lib/daru/index/index.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module Daru
|
2
|
-
class Index
|
2
|
+
class Index # rubocop:disable Metrics/ClassLength
|
3
3
|
include Enumerable
|
4
4
|
# It so happens that over riding the .new method in a super class also
|
5
5
|
# tampers with the default .new method for class that inherit from the
|
@@ -44,24 +44,31 @@ module Daru
|
|
44
44
|
end
|
45
45
|
|
46
46
|
attr_reader :relation_hash, :size
|
47
|
+
attr_accessor :name
|
47
48
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
49
|
+
# @example
|
50
|
+
#
|
51
|
+
# idx = Daru::Index.new [:one, 'one', 1, 2, :two]
|
52
|
+
# => #<Daru::Index(5): {one, one, 1, 2, two}>
|
53
|
+
#
|
54
|
+
# # set the name
|
55
|
+
#
|
56
|
+
# idx.name = "index_name"
|
57
|
+
# => "index_name"
|
58
|
+
#
|
59
|
+
# idx
|
60
|
+
# => #<Daru::Index(5): index_name {one, one, 1, 2, two}>
|
61
|
+
#
|
62
|
+
# # set the name during initialization
|
63
|
+
#
|
64
|
+
# idx = Daru::Index.new [:one, 'one', 1, 2, :two], name: "index_name"
|
65
|
+
# => #<Daru::Index(5): index_name {one, one, 1, 2, two}>
|
66
|
+
def initialize index, opts={}
|
67
|
+
index = guess_index index
|
62
68
|
@relation_hash = index.each_with_index.to_h.freeze
|
63
69
|
@keys = @relation_hash.keys
|
64
70
|
@size = @relation_hash.size
|
71
|
+
@name = opts[:name]
|
65
72
|
end
|
66
73
|
|
67
74
|
def ==(other)
|
@@ -106,21 +113,24 @@ module Daru
|
|
106
113
|
indexes = preprocess_range(indexes.first) if indexes.first.is_a? Range
|
107
114
|
|
108
115
|
if indexes.size == 1
|
109
|
-
|
116
|
+
numeric_pos indexes.first
|
110
117
|
else
|
111
|
-
indexes.map { |index|
|
118
|
+
indexes.map { |index| numeric_pos index }
|
112
119
|
end
|
113
120
|
end
|
114
121
|
|
115
122
|
def subset *indexes
|
116
123
|
if indexes.first.is_a? Range
|
117
|
-
|
124
|
+
start = indexes.first.begin
|
125
|
+
en = indexes.first.end
|
126
|
+
|
127
|
+
subset_slice start, en
|
118
128
|
elsif include? indexes.first
|
119
129
|
# Assume 'indexes' contain indexes not positions
|
120
130
|
Daru::Index.new indexes
|
121
131
|
else
|
122
132
|
# Assume 'indexes' contain positions not indexes
|
123
|
-
Daru::Index.new
|
133
|
+
Daru::Index.new(indexes.map { |k| key k })
|
124
134
|
end
|
125
135
|
end
|
126
136
|
|
@@ -143,10 +153,11 @@ module Daru
|
|
143
153
|
end
|
144
154
|
|
145
155
|
def inspect threshold=20
|
156
|
+
name_part = @name ? "#{@name} " : ''
|
146
157
|
if size <= threshold
|
147
|
-
"#<#{self.class}(#{size}): {#{to_a.join(', ')}}>"
|
158
|
+
"#<#{self.class}(#{size}): #{name_part}{#{to_a.join(', ')}}>"
|
148
159
|
else
|
149
|
-
"#<#{self.class}(#{size}): {#{to_a.first(threshold).join(', ')} ... #{to_a.last}}>"
|
160
|
+
"#<#{self.class}(#{size}): #{name_part}{#{to_a.first(threshold).join(', ')} ... #{to_a.last}}>"
|
150
161
|
end
|
151
162
|
end
|
152
163
|
|
@@ -154,12 +165,27 @@ module Daru
|
|
154
165
|
start = args[0]
|
155
166
|
en = args[1]
|
156
167
|
|
168
|
+
start_idx = @relation_hash[start]
|
169
|
+
en_idx = @relation_hash[en]
|
170
|
+
|
171
|
+
if start_idx.nil?
|
172
|
+
nil
|
173
|
+
elsif en_idx.nil?
|
174
|
+
Array(start_idx..size-1)
|
175
|
+
else
|
176
|
+
Array(start_idx..en_idx)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def subset_slice *args
|
181
|
+
start = args[0]
|
182
|
+
en = args[1]
|
183
|
+
|
157
184
|
if start.is_a?(Integer) && en.is_a?(Integer)
|
158
185
|
Index.new @keys[start..en]
|
159
186
|
else
|
160
187
|
start_idx = @relation_hash[start]
|
161
188
|
en_idx = @relation_hash[en]
|
162
|
-
|
163
189
|
Index.new @keys[start_idx..en_idx]
|
164
190
|
end
|
165
191
|
end
|
@@ -187,6 +213,27 @@ module Daru
|
|
187
213
|
@relation_hash.key? index
|
188
214
|
end
|
189
215
|
|
216
|
+
# @note Do not use it to check for Float::NAN as
|
217
|
+
# Float::NAN == Float::NAN is false
|
218
|
+
# Return vector of booleans with value at ith position is either
|
219
|
+
# true or false depending upon whether index value at position i is equal to
|
220
|
+
# any of the values passed in the argument or not
|
221
|
+
# @param [Array] *indexes values to equate with
|
222
|
+
# @return [Daru::Vector] vector of boolean values
|
223
|
+
# @example
|
224
|
+
# dv = Daru::Index.new [1, 2, 3, :one, 'one']
|
225
|
+
# dv.is_values 1, 'one'
|
226
|
+
# # => #<Daru::Vector(5)>
|
227
|
+
# # 0 true
|
228
|
+
# # 1 false
|
229
|
+
# # 2 false
|
230
|
+
# # 3 false
|
231
|
+
# # 4 true
|
232
|
+
def is_values(*indexes) # rubocop:disable Style/PredicateName
|
233
|
+
bool_array = @relation_hash.keys.map { |r| indexes.include?(r) }
|
234
|
+
Daru::Vector.new(bool_array)
|
235
|
+
end
|
236
|
+
|
190
237
|
def empty?
|
191
238
|
@relation_hash.empty?
|
192
239
|
end
|
@@ -222,8 +269,47 @@ module Daru
|
|
222
269
|
self.class.new(new_order.map { |i| from[i] })
|
223
270
|
end
|
224
271
|
|
272
|
+
# Sorts a `Index`, according to its values. Defaults to ascending order
|
273
|
+
# sorting.
|
274
|
+
#
|
275
|
+
# @param [Hash] opts the options for sort method.
|
276
|
+
# @option opts [Boolean] :ascending False, to get descending order.
|
277
|
+
#
|
278
|
+
# @return [Index] sorted `Index` according to its values.
|
279
|
+
#
|
280
|
+
# @example
|
281
|
+
# di = Daru::Index.new [100, 99, 101, 1, 2]
|
282
|
+
# # Say you want to sort in descending order
|
283
|
+
# di.sort(ascending: false) #=> Daru::Index.new [101, 100, 99, 2, 1]
|
284
|
+
# # Say you want to sort in ascending order
|
285
|
+
# di.sort #=> Daru::Index.new [1, 2, 99, 100, 101]
|
286
|
+
def sort opts={}
|
287
|
+
opts = {ascending: true}.merge(opts)
|
288
|
+
if opts[:ascending]
|
289
|
+
new_index, = @relation_hash.sort.transpose
|
290
|
+
else
|
291
|
+
new_index, = @relation_hash.sort.reverse.transpose
|
292
|
+
end
|
293
|
+
|
294
|
+
self.class.new(new_index)
|
295
|
+
end
|
296
|
+
|
225
297
|
private
|
226
298
|
|
299
|
+
def guess_index index
|
300
|
+
case index
|
301
|
+
when nil
|
302
|
+
[]
|
303
|
+
when Integer
|
304
|
+
index.times.to_a
|
305
|
+
when Enumerable
|
306
|
+
index.to_a
|
307
|
+
else
|
308
|
+
raise ArgumentError,
|
309
|
+
"Cannot create index from #{index.class} #{index.inspect}"
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
227
313
|
def preprocess_range rng
|
228
314
|
start = rng.begin
|
229
315
|
en = rng.end
|
@@ -243,22 +329,14 @@ module Daru
|
|
243
329
|
end
|
244
330
|
|
245
331
|
def by_multi_key *key
|
246
|
-
|
247
|
-
Daru::Index.new key.map { |k| k }
|
248
|
-
else
|
249
|
-
# Assume the user is specifing values for index not keys
|
250
|
-
# Return index object having keys corresponding to values provided
|
251
|
-
Daru::Index.new key.map { |k| key k }
|
252
|
-
end
|
332
|
+
key.map { |k| by_single_key k }
|
253
333
|
end
|
254
334
|
|
255
335
|
def by_single_key key
|
256
336
|
if @relation_hash.key?(key)
|
257
337
|
@relation_hash[key]
|
258
|
-
elsif key.is_a?(Numeric) && key < size
|
259
|
-
key
|
260
338
|
else
|
261
|
-
|
339
|
+
nil
|
262
340
|
end
|
263
341
|
end
|
264
342
|
|
@@ -266,7 +344,7 @@ module Daru
|
|
266
344
|
def validate_positions *positions
|
267
345
|
positions = [positions] if positions.is_a? Integer
|
268
346
|
positions.each do |pos|
|
269
|
-
raise IndexError, "#{pos} is not a valid position." if pos >= size
|
347
|
+
raise IndexError, "#{pos} is not a valid position." if pos >= size || pos < -size
|
270
348
|
end
|
271
349
|
end
|
272
350
|
|
@@ -285,5 +363,15 @@ module Daru
|
|
285
363
|
positions
|
286
364
|
end
|
287
365
|
end
|
366
|
+
|
367
|
+
def numeric_pos key
|
368
|
+
if @relation_hash.key?(key)
|
369
|
+
@relation_hash[key]
|
370
|
+
elsif key.is_a?(Numeric) && (key < size && key >= -size)
|
371
|
+
key
|
372
|
+
else
|
373
|
+
raise IndexError, "Specified index #{key.inspect} does not exist"
|
374
|
+
end
|
375
|
+
end
|
288
376
|
end
|
289
377
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Daru
|
2
|
-
class MultiIndex < Index
|
2
|
+
class MultiIndex < Index # rubocop:disable Metrics/ClassLength
|
3
3
|
def each(&block)
|
4
4
|
to_a.each(&block)
|
5
5
|
end
|
@@ -14,6 +14,68 @@ module Daru
|
|
14
14
|
@levels.map(&:keys)
|
15
15
|
end
|
16
16
|
|
17
|
+
# names and levels should be of same size. If size of Array `name` is less
|
18
|
+
# or greater than size of array `levels` then it raises `SizeError`.
|
19
|
+
# If user don't want to put name for particular level then user must put
|
20
|
+
# empty string in that index of Array `name`.
|
21
|
+
# For example there is multi_index of 3 levels and user don't want to name
|
22
|
+
# level 0, then do mulit_index.name = ['', 'level1_name1', 'level2_name']
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
#
|
26
|
+
# # set the name during initialization
|
27
|
+
#
|
28
|
+
# mi = Daru::MultiIndex.new(
|
29
|
+
# levels: [[:a,:b,:c], [:one, :two]],
|
30
|
+
# labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]], name: ['s1', 's2'])
|
31
|
+
#
|
32
|
+
# # =>
|
33
|
+
# # <Daru::MultiIndex(6x2)>
|
34
|
+
# # s1 s2
|
35
|
+
# # a one
|
36
|
+
# # two
|
37
|
+
# # b one
|
38
|
+
# # two
|
39
|
+
# # c one
|
40
|
+
# # two
|
41
|
+
#
|
42
|
+
# # set new name
|
43
|
+
#
|
44
|
+
# mi.name = ['k1', 'k2']
|
45
|
+
# => ["k1", "k2"]
|
46
|
+
#
|
47
|
+
# mi
|
48
|
+
# =>
|
49
|
+
# # #<Daru::MultiIndex(6x2)>
|
50
|
+
# # k1 k2
|
51
|
+
# # a one
|
52
|
+
# # two
|
53
|
+
# # b one
|
54
|
+
# # two
|
55
|
+
# # c one
|
56
|
+
# # two
|
57
|
+
#
|
58
|
+
# # access the name
|
59
|
+
#
|
60
|
+
# mi.name
|
61
|
+
# => ["k1", "k2"]
|
62
|
+
#
|
63
|
+
# # If you don't want to name level 0
|
64
|
+
#
|
65
|
+
# mi.name = ['', 'k2']
|
66
|
+
# => ["", "k2"]
|
67
|
+
#
|
68
|
+
# mi
|
69
|
+
# =>
|
70
|
+
# #<Daru::MultiIndex(6x2)>
|
71
|
+
# # k2
|
72
|
+
# # a one
|
73
|
+
# # two
|
74
|
+
# # b one
|
75
|
+
# # two
|
76
|
+
# # c one
|
77
|
+
# # two
|
78
|
+
#
|
17
79
|
def initialize opts={}
|
18
80
|
labels = opts[:labels]
|
19
81
|
levels = opts[:levels]
|
@@ -24,6 +86,12 @@ module Daru
|
|
24
86
|
|
25
87
|
@labels = labels
|
26
88
|
@levels = levels.map { |e| e.map.with_index.to_h }
|
89
|
+
self.name = opts[:name] unless opts[:name].nil?
|
90
|
+
end
|
91
|
+
|
92
|
+
def name=(names)
|
93
|
+
validate_name names, @labels
|
94
|
+
@name = names
|
27
95
|
end
|
28
96
|
|
29
97
|
def incorrect_fields?(_labels, levels)
|
@@ -171,8 +239,20 @@ module Daru
|
|
171
239
|
end
|
172
240
|
end
|
173
241
|
|
242
|
+
# Array `name` must have same length as levels and labels.
|
243
|
+
def validate_name names, levels
|
244
|
+
error_msg = "'names' and 'levels' should be of same size. Size of the "\
|
245
|
+
"'name' array is #{names.size} and size of the MultiIndex 'levels' and "\
|
246
|
+
"'labels' is #{labels.size}."
|
247
|
+
suggestion_msg = "If you don\'t want to set name for particular level " \
|
248
|
+
"(say level 'i') then put empty string on index 'i' of the 'name' Array."
|
249
|
+
|
250
|
+
raise SizeError, error_msg if names.size > levels.size
|
251
|
+
raise SizeError, [error_msg, suggestion_msg].join("\n") if names.size < levels.size
|
252
|
+
end
|
253
|
+
|
174
254
|
private :find_all_indexes, :multi_index_from_multiple_selections,
|
175
|
-
:retrieve_from_range, :retrieve_from_tuples
|
255
|
+
:retrieve_from_range, :retrieve_from_tuples, :validate_name
|
176
256
|
|
177
257
|
def key index
|
178
258
|
raise ArgumentError, "Key #{index} is too large" if index >= @labels[0].size
|
@@ -232,7 +312,7 @@ module Daru
|
|
232
312
|
|
233
313
|
def inspect threshold=20
|
234
314
|
"#<Daru::MultiIndex(#{size}x#{width})>\n" +
|
235
|
-
Formatters::Table.format([], row_headers: sparse_tuples, threshold: threshold)
|
315
|
+
Formatters::Table.format([], headers: @name, row_headers: sparse_tuples, threshold: threshold)
|
236
316
|
end
|
237
317
|
|
238
318
|
def to_html
|
data/lib/daru/io/io.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module Daru
|
2
|
+
require_relative 'csv/converters.rb'
|
2
3
|
module IOHelpers
|
3
4
|
class << self
|
4
5
|
def process_row(row,empty)
|
@@ -76,7 +77,6 @@ module Daru
|
|
76
77
|
# Functions for loading/writing CSV files
|
77
78
|
def from_csv path, opts={}
|
78
79
|
daru_options, opts = from_csv_prepare_opts opts
|
79
|
-
|
80
80
|
# Preprocess headers for detecting and correcting repetition in
|
81
81
|
# case the :headers option is not specified.
|
82
82
|
hsh =
|
@@ -86,7 +86,6 @@ module Daru
|
|
86
86
|
from_csv_hash(path, opts)
|
87
87
|
.tap { |hash| daru_options[:order] = hash.keys }
|
88
88
|
end
|
89
|
-
|
90
89
|
Daru::DataFrame.new(hsh,daru_options)
|
91
90
|
end
|
92
91
|
|
@@ -111,11 +110,10 @@ module Daru
|
|
111
110
|
|
112
111
|
# Execute a query and create a data frame from the result
|
113
112
|
#
|
114
|
-
# @param dbh [DBI::DatabaseHandle] A DBI connection
|
113
|
+
# @param dbh [DBI::DatabaseHandle, String] A DBI connection OR Path to a SQlite3 database.
|
115
114
|
# @param query [String] The query to be executed
|
116
115
|
#
|
117
116
|
# @return A dataframe containing the data resulting from the query
|
118
|
-
|
119
117
|
def from_sql(db, query)
|
120
118
|
require 'daru/io/sql_data_source'
|
121
119
|
SqlDataSource.make_dataframe(db, query)
|
@@ -186,13 +184,28 @@ module Daru
|
|
186
184
|
end
|
187
185
|
end
|
188
186
|
|
187
|
+
def from_html path, opts
|
188
|
+
page = Mechanize.new.get(path)
|
189
|
+
page.search('table').map { |table| html_parse_table table }
|
190
|
+
.keep_if { |table| html_search table, opts[:match] }
|
191
|
+
.compact
|
192
|
+
.map { |table| html_decide_values table, opts }
|
193
|
+
.map { |table| html_table_to_dataframe table }
|
194
|
+
rescue LoadError
|
195
|
+
raise 'Install the mechanize gem version 2.7.5 with `gem install mechanize`,'\
|
196
|
+
' for using the from_html function.'
|
197
|
+
end
|
198
|
+
|
189
199
|
private
|
190
200
|
|
191
|
-
DARU_OPT_KEYS = [
|
201
|
+
DARU_OPT_KEYS = %i[clone order index name].freeze
|
192
202
|
|
193
203
|
def from_csv_prepare_opts opts
|
194
204
|
opts[:col_sep] ||= ','
|
195
|
-
opts[:
|
205
|
+
opts[:skip_blanks] ||= true
|
206
|
+
opts[:converters] ||= [:numeric]
|
207
|
+
|
208
|
+
opts[:converters] = from_csv_prepare_converters(opts[:converters])
|
196
209
|
|
197
210
|
daru_options = opts.keys.each_with_object({}) do |k, hash|
|
198
211
|
hash[k] = opts.delete(k) if DARU_OPT_KEYS.include?(k)
|
@@ -200,11 +213,22 @@ module Daru
|
|
200
213
|
[daru_options, opts]
|
201
214
|
end
|
202
215
|
|
216
|
+
def from_csv_prepare_converters(converters)
|
217
|
+
converters.flat_map do |c|
|
218
|
+
if ::CSV::Converters[c]
|
219
|
+
::CSV::Converters[c]
|
220
|
+
elsif Daru::IO::CSV::CONVERTERS[c]
|
221
|
+
Daru::IO::CSV::CONVERTERS[c]
|
222
|
+
else
|
223
|
+
c
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
203
228
|
def from_csv_hash_with_headers(path, opts)
|
204
229
|
opts[:header_converters] ||= :symbol
|
205
|
-
|
206
230
|
::CSV
|
207
|
-
.
|
231
|
+
.parse(open(path), opts)
|
208
232
|
.tap { |c| yield c if block_given? }
|
209
233
|
.by_col.map { |col_name, values| [col_name, values] }.to_h
|
210
234
|
end
|
@@ -212,15 +236,60 @@ module Daru
|
|
212
236
|
def from_csv_hash(path, opts)
|
213
237
|
csv_as_arrays =
|
214
238
|
::CSV
|
215
|
-
.open(path,
|
239
|
+
.parse(open(path), opts)
|
216
240
|
.tap { |c| yield c if block_given? }
|
217
241
|
.to_a
|
218
|
-
|
219
242
|
headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
|
220
243
|
csv_as_arrays = csv_as_arrays.transpose
|
221
|
-
|
222
244
|
headers.each_with_index.map { |h, i| [h, csv_as_arrays[i]] }.to_h
|
223
245
|
end
|
246
|
+
|
247
|
+
def html_parse_table(table)
|
248
|
+
headers, headers_size = html_scrape_tag(table,'th')
|
249
|
+
data, size = html_scrape_tag(table, 'td')
|
250
|
+
data = data.keep_if { |x| x.count == size }
|
251
|
+
order, indice = html_parse_hash(headers, size, headers_size) if headers_size >= size
|
252
|
+
return unless (indice.nil? || indice.count == data.count) && !order.nil? && order.count>0
|
253
|
+
{data: data.compact, index: indice, order: order}
|
254
|
+
end
|
255
|
+
|
256
|
+
def html_scrape_tag(table, tag)
|
257
|
+
arr = table.search('tr').map { |row| row.search(tag).map { |val| val.text.strip } }
|
258
|
+
size = arr.map(&:count).max
|
259
|
+
[arr, size]
|
260
|
+
end
|
261
|
+
|
262
|
+
# Splits headers (all th tags) into order and index. Wherein,
|
263
|
+
# Order : All <th> tags on first proper row of HTML table
|
264
|
+
# index : All <th> tags on first proper column of HTML table
|
265
|
+
def html_parse_hash(headers, size, headers_size)
|
266
|
+
headers_index = headers.find_index { |x| x.count == headers_size }
|
267
|
+
order = headers[headers_index]
|
268
|
+
order_index = order.count - size
|
269
|
+
order = order[order_index..-1]
|
270
|
+
indice = headers[headers_index+1..-1].flatten
|
271
|
+
indice = nil if indice.to_a.empty?
|
272
|
+
[order, indice]
|
273
|
+
end
|
274
|
+
|
275
|
+
def html_search(table, match=nil)
|
276
|
+
match.nil? ? true : (table.to_s.include? match)
|
277
|
+
end
|
278
|
+
|
279
|
+
# Allows user to override the scraped order / index / data
|
280
|
+
def html_decide_values(scraped_val={}, user_val={})
|
281
|
+
%I[data index name order].each do |key|
|
282
|
+
user_val[key] ||= scraped_val[key]
|
283
|
+
end
|
284
|
+
user_val
|
285
|
+
end
|
286
|
+
|
287
|
+
def html_table_to_dataframe(table)
|
288
|
+
Daru::DataFrame.rows table[:data],
|
289
|
+
index: table[:index],
|
290
|
+
order: table[:order],
|
291
|
+
name: table[:name]
|
292
|
+
end
|
224
293
|
end
|
225
294
|
end
|
226
295
|
end
|