daru 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -7
- data/CONTRIBUTING.md +1 -1
- data/History.md +48 -1
- data/README.md +3 -3
- data/benchmarks/statistics.rb +6 -6
- data/benchmarks/where_clause.rb +1 -1
- data/benchmarks/where_vs_filter.rb +1 -1
- data/daru.gemspec +3 -2
- data/lib/daru.rb +14 -6
- data/lib/daru/accessors/gsl_wrapper.rb +1 -1
- data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
- data/lib/daru/category.rb +1 -1
- data/lib/daru/core/group_by.rb +32 -15
- data/lib/daru/core/query.rb +4 -4
- data/lib/daru/dataframe.rb +196 -48
- data/lib/daru/date_time/index.rb +7 -5
- data/lib/daru/formatters/table.rb +1 -0
- data/lib/daru/index/index.rb +121 -33
- data/lib/daru/index/multi_index.rb +83 -3
- data/lib/daru/io/csv/converters.rb +18 -0
- data/lib/daru/io/io.rb +80 -11
- data/lib/daru/io/sql_data_source.rb +10 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +3 -50
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
- data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/vector.html.erb +3 -25
- data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
- data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru/maths/statistics/dataframe.rb +9 -11
- data/lib/daru/maths/statistics/vector.rb +139 -32
- data/lib/daru/plotting/gruff/dataframe.rb +13 -15
- data/lib/daru/plotting/nyaplot/category.rb +1 -1
- data/lib/daru/plotting/nyaplot/dataframe.rb +4 -4
- data/lib/daru/plotting/nyaplot/vector.rb +1 -2
- data/lib/daru/vector.rb +169 -80
- data/lib/daru/version.rb +1 -1
- data/spec/category_spec.rb +19 -19
- data/spec/core/group_by_spec.rb +47 -0
- data/spec/core/query_spec.rb +55 -50
- data/spec/daru_spec.rb +22 -0
- data/spec/dataframe_spec.rb +118 -6
- data/spec/date_time/index_spec.rb +34 -16
- data/spec/extensions/rserve_spec.rb +1 -1
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +29 -0
- data/spec/index/categorical_index_spec.rb +33 -33
- data/spec/index/index_spec.rb +134 -41
- data/spec/index/multi_index_spec.rb +115 -31
- data/spec/io/io_spec.rb +201 -0
- data/spec/io/sql_data_source_spec.rb +31 -41
- data/spec/iruby/dataframe_spec.rb +17 -19
- data/spec/iruby/vector_spec.rb +26 -28
- data/spec/maths/statistics/vector_spec.rb +136 -14
- data/spec/plotting/gruff/category_spec.rb +3 -3
- data/spec/plotting/gruff/dataframe_spec.rb +14 -4
- data/spec/plotting/gruff/vector_spec.rb +9 -9
- data/spec/plotting/nyaplot/category_spec.rb +5 -9
- data/spec/plotting/nyaplot/dataframe_spec.rb +72 -47
- data/spec/plotting/nyaplot/vector_spec.rb +5 -11
- data/spec/shared/vector_display_spec.rb +12 -14
- data/spec/spec_helper.rb +21 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +222 -72
- metadata +68 -23
- data/spec/fixtures/stock_data.csv +0 -500
data/lib/daru/index/index.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module Daru
|
2
|
-
class Index
|
2
|
+
class Index # rubocop:disable Metrics/ClassLength
|
3
3
|
include Enumerable
|
4
4
|
# It so happens that over riding the .new method in a super class also
|
5
5
|
# tampers with the default .new method for class that inherit from the
|
@@ -44,24 +44,31 @@ module Daru
|
|
44
44
|
end
|
45
45
|
|
46
46
|
attr_reader :relation_hash, :size
|
47
|
+
attr_accessor :name
|
47
48
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
49
|
+
# @example
|
50
|
+
#
|
51
|
+
# idx = Daru::Index.new [:one, 'one', 1, 2, :two]
|
52
|
+
# => #<Daru::Index(5): {one, one, 1, 2, two}>
|
53
|
+
#
|
54
|
+
# # set the name
|
55
|
+
#
|
56
|
+
# idx.name = "index_name"
|
57
|
+
# => "index_name"
|
58
|
+
#
|
59
|
+
# idx
|
60
|
+
# => #<Daru::Index(5): index_name {one, one, 1, 2, two}>
|
61
|
+
#
|
62
|
+
# # set the name during initialization
|
63
|
+
#
|
64
|
+
# idx = Daru::Index.new [:one, 'one', 1, 2, :two], name: "index_name"
|
65
|
+
# => #<Daru::Index(5): index_name {one, one, 1, 2, two}>
|
66
|
+
def initialize index, opts={}
|
67
|
+
index = guess_index index
|
62
68
|
@relation_hash = index.each_with_index.to_h.freeze
|
63
69
|
@keys = @relation_hash.keys
|
64
70
|
@size = @relation_hash.size
|
71
|
+
@name = opts[:name]
|
65
72
|
end
|
66
73
|
|
67
74
|
def ==(other)
|
@@ -106,21 +113,24 @@ module Daru
|
|
106
113
|
indexes = preprocess_range(indexes.first) if indexes.first.is_a? Range
|
107
114
|
|
108
115
|
if indexes.size == 1
|
109
|
-
|
116
|
+
numeric_pos indexes.first
|
110
117
|
else
|
111
|
-
indexes.map { |index|
|
118
|
+
indexes.map { |index| numeric_pos index }
|
112
119
|
end
|
113
120
|
end
|
114
121
|
|
115
122
|
def subset *indexes
|
116
123
|
if indexes.first.is_a? Range
|
117
|
-
|
124
|
+
start = indexes.first.begin
|
125
|
+
en = indexes.first.end
|
126
|
+
|
127
|
+
subset_slice start, en
|
118
128
|
elsif include? indexes.first
|
119
129
|
# Assume 'indexes' contain indexes not positions
|
120
130
|
Daru::Index.new indexes
|
121
131
|
else
|
122
132
|
# Assume 'indexes' contain positions not indexes
|
123
|
-
Daru::Index.new
|
133
|
+
Daru::Index.new(indexes.map { |k| key k })
|
124
134
|
end
|
125
135
|
end
|
126
136
|
|
@@ -143,10 +153,11 @@ module Daru
|
|
143
153
|
end
|
144
154
|
|
145
155
|
def inspect threshold=20
|
156
|
+
name_part = @name ? "#{@name} " : ''
|
146
157
|
if size <= threshold
|
147
|
-
"#<#{self.class}(#{size}): {#{to_a.join(', ')}}>"
|
158
|
+
"#<#{self.class}(#{size}): #{name_part}{#{to_a.join(', ')}}>"
|
148
159
|
else
|
149
|
-
"#<#{self.class}(#{size}): {#{to_a.first(threshold).join(', ')} ... #{to_a.last}}>"
|
160
|
+
"#<#{self.class}(#{size}): #{name_part}{#{to_a.first(threshold).join(', ')} ... #{to_a.last}}>"
|
150
161
|
end
|
151
162
|
end
|
152
163
|
|
@@ -154,12 +165,27 @@ module Daru
|
|
154
165
|
start = args[0]
|
155
166
|
en = args[1]
|
156
167
|
|
168
|
+
start_idx = @relation_hash[start]
|
169
|
+
en_idx = @relation_hash[en]
|
170
|
+
|
171
|
+
if start_idx.nil?
|
172
|
+
nil
|
173
|
+
elsif en_idx.nil?
|
174
|
+
Array(start_idx..size-1)
|
175
|
+
else
|
176
|
+
Array(start_idx..en_idx)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def subset_slice *args
|
181
|
+
start = args[0]
|
182
|
+
en = args[1]
|
183
|
+
|
157
184
|
if start.is_a?(Integer) && en.is_a?(Integer)
|
158
185
|
Index.new @keys[start..en]
|
159
186
|
else
|
160
187
|
start_idx = @relation_hash[start]
|
161
188
|
en_idx = @relation_hash[en]
|
162
|
-
|
163
189
|
Index.new @keys[start_idx..en_idx]
|
164
190
|
end
|
165
191
|
end
|
@@ -187,6 +213,27 @@ module Daru
|
|
187
213
|
@relation_hash.key? index
|
188
214
|
end
|
189
215
|
|
216
|
+
# @note Do not use it to check for Float::NAN as
|
217
|
+
# Float::NAN == Float::NAN is false
|
218
|
+
# Return vector of booleans with value at ith position is either
|
219
|
+
# true or false depending upon whether index value at position i is equal to
|
220
|
+
# any of the values passed in the argument or not
|
221
|
+
# @param [Array] *indexes values to equate with
|
222
|
+
# @return [Daru::Vector] vector of boolean values
|
223
|
+
# @example
|
224
|
+
# dv = Daru::Index.new [1, 2, 3, :one, 'one']
|
225
|
+
# dv.is_values 1, 'one'
|
226
|
+
# # => #<Daru::Vector(5)>
|
227
|
+
# # 0 true
|
228
|
+
# # 1 false
|
229
|
+
# # 2 false
|
230
|
+
# # 3 false
|
231
|
+
# # 4 true
|
232
|
+
def is_values(*indexes) # rubocop:disable Style/PredicateName
|
233
|
+
bool_array = @relation_hash.keys.map { |r| indexes.include?(r) }
|
234
|
+
Daru::Vector.new(bool_array)
|
235
|
+
end
|
236
|
+
|
190
237
|
def empty?
|
191
238
|
@relation_hash.empty?
|
192
239
|
end
|
@@ -222,8 +269,47 @@ module Daru
|
|
222
269
|
self.class.new(new_order.map { |i| from[i] })
|
223
270
|
end
|
224
271
|
|
272
|
+
# Sorts a `Index`, according to its values. Defaults to ascending order
|
273
|
+
# sorting.
|
274
|
+
#
|
275
|
+
# @param [Hash] opts the options for sort method.
|
276
|
+
# @option opts [Boolean] :ascending False, to get descending order.
|
277
|
+
#
|
278
|
+
# @return [Index] sorted `Index` according to its values.
|
279
|
+
#
|
280
|
+
# @example
|
281
|
+
# di = Daru::Index.new [100, 99, 101, 1, 2]
|
282
|
+
# # Say you want to sort in descending order
|
283
|
+
# di.sort(ascending: false) #=> Daru::Index.new [101, 100, 99, 2, 1]
|
284
|
+
# # Say you want to sort in ascending order
|
285
|
+
# di.sort #=> Daru::Index.new [1, 2, 99, 100, 101]
|
286
|
+
def sort opts={}
|
287
|
+
opts = {ascending: true}.merge(opts)
|
288
|
+
if opts[:ascending]
|
289
|
+
new_index, = @relation_hash.sort.transpose
|
290
|
+
else
|
291
|
+
new_index, = @relation_hash.sort.reverse.transpose
|
292
|
+
end
|
293
|
+
|
294
|
+
self.class.new(new_index)
|
295
|
+
end
|
296
|
+
|
225
297
|
private
|
226
298
|
|
299
|
+
def guess_index index
|
300
|
+
case index
|
301
|
+
when nil
|
302
|
+
[]
|
303
|
+
when Integer
|
304
|
+
index.times.to_a
|
305
|
+
when Enumerable
|
306
|
+
index.to_a
|
307
|
+
else
|
308
|
+
raise ArgumentError,
|
309
|
+
"Cannot create index from #{index.class} #{index.inspect}"
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
227
313
|
def preprocess_range rng
|
228
314
|
start = rng.begin
|
229
315
|
en = rng.end
|
@@ -243,22 +329,14 @@ module Daru
|
|
243
329
|
end
|
244
330
|
|
245
331
|
def by_multi_key *key
|
246
|
-
|
247
|
-
Daru::Index.new key.map { |k| k }
|
248
|
-
else
|
249
|
-
# Assume the user is specifing values for index not keys
|
250
|
-
# Return index object having keys corresponding to values provided
|
251
|
-
Daru::Index.new key.map { |k| key k }
|
252
|
-
end
|
332
|
+
key.map { |k| by_single_key k }
|
253
333
|
end
|
254
334
|
|
255
335
|
def by_single_key key
|
256
336
|
if @relation_hash.key?(key)
|
257
337
|
@relation_hash[key]
|
258
|
-
elsif key.is_a?(Numeric) && key < size
|
259
|
-
key
|
260
338
|
else
|
261
|
-
|
339
|
+
nil
|
262
340
|
end
|
263
341
|
end
|
264
342
|
|
@@ -266,7 +344,7 @@ module Daru
|
|
266
344
|
def validate_positions *positions
|
267
345
|
positions = [positions] if positions.is_a? Integer
|
268
346
|
positions.each do |pos|
|
269
|
-
raise IndexError, "#{pos} is not a valid position." if pos >= size
|
347
|
+
raise IndexError, "#{pos} is not a valid position." if pos >= size || pos < -size
|
270
348
|
end
|
271
349
|
end
|
272
350
|
|
@@ -285,5 +363,15 @@ module Daru
|
|
285
363
|
positions
|
286
364
|
end
|
287
365
|
end
|
366
|
+
|
367
|
+
def numeric_pos key
|
368
|
+
if @relation_hash.key?(key)
|
369
|
+
@relation_hash[key]
|
370
|
+
elsif key.is_a?(Numeric) && (key < size && key >= -size)
|
371
|
+
key
|
372
|
+
else
|
373
|
+
raise IndexError, "Specified index #{key.inspect} does not exist"
|
374
|
+
end
|
375
|
+
end
|
288
376
|
end
|
289
377
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Daru
|
2
|
-
class MultiIndex < Index
|
2
|
+
class MultiIndex < Index # rubocop:disable Metrics/ClassLength
|
3
3
|
def each(&block)
|
4
4
|
to_a.each(&block)
|
5
5
|
end
|
@@ -14,6 +14,68 @@ module Daru
|
|
14
14
|
@levels.map(&:keys)
|
15
15
|
end
|
16
16
|
|
17
|
+
# names and levels should be of same size. If size of Array `name` is less
|
18
|
+
# or greater than size of array `levels` then it raises `SizeError`.
|
19
|
+
# If user don't want to put name for particular level then user must put
|
20
|
+
# empty string in that index of Array `name`.
|
21
|
+
# For example there is multi_index of 3 levels and user don't want to name
|
22
|
+
# level 0, then do mulit_index.name = ['', 'level1_name1', 'level2_name']
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
#
|
26
|
+
# # set the name during initialization
|
27
|
+
#
|
28
|
+
# mi = Daru::MultiIndex.new(
|
29
|
+
# levels: [[:a,:b,:c], [:one, :two]],
|
30
|
+
# labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]], name: ['s1', 's2'])
|
31
|
+
#
|
32
|
+
# # =>
|
33
|
+
# # <Daru::MultiIndex(6x2)>
|
34
|
+
# # s1 s2
|
35
|
+
# # a one
|
36
|
+
# # two
|
37
|
+
# # b one
|
38
|
+
# # two
|
39
|
+
# # c one
|
40
|
+
# # two
|
41
|
+
#
|
42
|
+
# # set new name
|
43
|
+
#
|
44
|
+
# mi.name = ['k1', 'k2']
|
45
|
+
# => ["k1", "k2"]
|
46
|
+
#
|
47
|
+
# mi
|
48
|
+
# =>
|
49
|
+
# # #<Daru::MultiIndex(6x2)>
|
50
|
+
# # k1 k2
|
51
|
+
# # a one
|
52
|
+
# # two
|
53
|
+
# # b one
|
54
|
+
# # two
|
55
|
+
# # c one
|
56
|
+
# # two
|
57
|
+
#
|
58
|
+
# # access the name
|
59
|
+
#
|
60
|
+
# mi.name
|
61
|
+
# => ["k1", "k2"]
|
62
|
+
#
|
63
|
+
# # If you don't want to name level 0
|
64
|
+
#
|
65
|
+
# mi.name = ['', 'k2']
|
66
|
+
# => ["", "k2"]
|
67
|
+
#
|
68
|
+
# mi
|
69
|
+
# =>
|
70
|
+
# #<Daru::MultiIndex(6x2)>
|
71
|
+
# # k2
|
72
|
+
# # a one
|
73
|
+
# # two
|
74
|
+
# # b one
|
75
|
+
# # two
|
76
|
+
# # c one
|
77
|
+
# # two
|
78
|
+
#
|
17
79
|
def initialize opts={}
|
18
80
|
labels = opts[:labels]
|
19
81
|
levels = opts[:levels]
|
@@ -24,6 +86,12 @@ module Daru
|
|
24
86
|
|
25
87
|
@labels = labels
|
26
88
|
@levels = levels.map { |e| e.map.with_index.to_h }
|
89
|
+
self.name = opts[:name] unless opts[:name].nil?
|
90
|
+
end
|
91
|
+
|
92
|
+
def name=(names)
|
93
|
+
validate_name names, @labels
|
94
|
+
@name = names
|
27
95
|
end
|
28
96
|
|
29
97
|
def incorrect_fields?(_labels, levels)
|
@@ -171,8 +239,20 @@ module Daru
|
|
171
239
|
end
|
172
240
|
end
|
173
241
|
|
242
|
+
# Array `name` must have same length as levels and labels.
|
243
|
+
def validate_name names, levels
|
244
|
+
error_msg = "'names' and 'levels' should be of same size. Size of the "\
|
245
|
+
"'name' array is #{names.size} and size of the MultiIndex 'levels' and "\
|
246
|
+
"'labels' is #{labels.size}."
|
247
|
+
suggestion_msg = "If you don\'t want to set name for particular level " \
|
248
|
+
"(say level 'i') then put empty string on index 'i' of the 'name' Array."
|
249
|
+
|
250
|
+
raise SizeError, error_msg if names.size > levels.size
|
251
|
+
raise SizeError, [error_msg, suggestion_msg].join("\n") if names.size < levels.size
|
252
|
+
end
|
253
|
+
|
174
254
|
private :find_all_indexes, :multi_index_from_multiple_selections,
|
175
|
-
:retrieve_from_range, :retrieve_from_tuples
|
255
|
+
:retrieve_from_range, :retrieve_from_tuples, :validate_name
|
176
256
|
|
177
257
|
def key index
|
178
258
|
raise ArgumentError, "Key #{index} is too large" if index >= @labels[0].size
|
@@ -232,7 +312,7 @@ module Daru
|
|
232
312
|
|
233
313
|
def inspect threshold=20
|
234
314
|
"#<Daru::MultiIndex(#{size}x#{width})>\n" +
|
235
|
-
Formatters::Table.format([], row_headers: sparse_tuples, threshold: threshold)
|
315
|
+
Formatters::Table.format([], headers: @name, row_headers: sparse_tuples, threshold: threshold)
|
236
316
|
end
|
237
317
|
|
238
318
|
def to_html
|
data/lib/daru/io/io.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module Daru
|
2
|
+
require_relative 'csv/converters.rb'
|
2
3
|
module IOHelpers
|
3
4
|
class << self
|
4
5
|
def process_row(row,empty)
|
@@ -76,7 +77,6 @@ module Daru
|
|
76
77
|
# Functions for loading/writing CSV files
|
77
78
|
def from_csv path, opts={}
|
78
79
|
daru_options, opts = from_csv_prepare_opts opts
|
79
|
-
|
80
80
|
# Preprocess headers for detecting and correcting repetition in
|
81
81
|
# case the :headers option is not specified.
|
82
82
|
hsh =
|
@@ -86,7 +86,6 @@ module Daru
|
|
86
86
|
from_csv_hash(path, opts)
|
87
87
|
.tap { |hash| daru_options[:order] = hash.keys }
|
88
88
|
end
|
89
|
-
|
90
89
|
Daru::DataFrame.new(hsh,daru_options)
|
91
90
|
end
|
92
91
|
|
@@ -111,11 +110,10 @@ module Daru
|
|
111
110
|
|
112
111
|
# Execute a query and create a data frame from the result
|
113
112
|
#
|
114
|
-
# @param dbh [DBI::DatabaseHandle] A DBI connection
|
113
|
+
# @param dbh [DBI::DatabaseHandle, String] A DBI connection OR Path to a SQlite3 database.
|
115
114
|
# @param query [String] The query to be executed
|
116
115
|
#
|
117
116
|
# @return A dataframe containing the data resulting from the query
|
118
|
-
|
119
117
|
def from_sql(db, query)
|
120
118
|
require 'daru/io/sql_data_source'
|
121
119
|
SqlDataSource.make_dataframe(db, query)
|
@@ -186,13 +184,28 @@ module Daru
|
|
186
184
|
end
|
187
185
|
end
|
188
186
|
|
187
|
+
def from_html path, opts
|
188
|
+
page = Mechanize.new.get(path)
|
189
|
+
page.search('table').map { |table| html_parse_table table }
|
190
|
+
.keep_if { |table| html_search table, opts[:match] }
|
191
|
+
.compact
|
192
|
+
.map { |table| html_decide_values table, opts }
|
193
|
+
.map { |table| html_table_to_dataframe table }
|
194
|
+
rescue LoadError
|
195
|
+
raise 'Install the mechanize gem version 2.7.5 with `gem install mechanize`,'\
|
196
|
+
' for using the from_html function.'
|
197
|
+
end
|
198
|
+
|
189
199
|
private
|
190
200
|
|
191
|
-
DARU_OPT_KEYS = [
|
201
|
+
DARU_OPT_KEYS = %i[clone order index name].freeze
|
192
202
|
|
193
203
|
def from_csv_prepare_opts opts
|
194
204
|
opts[:col_sep] ||= ','
|
195
|
-
opts[:
|
205
|
+
opts[:skip_blanks] ||= true
|
206
|
+
opts[:converters] ||= [:numeric]
|
207
|
+
|
208
|
+
opts[:converters] = from_csv_prepare_converters(opts[:converters])
|
196
209
|
|
197
210
|
daru_options = opts.keys.each_with_object({}) do |k, hash|
|
198
211
|
hash[k] = opts.delete(k) if DARU_OPT_KEYS.include?(k)
|
@@ -200,11 +213,22 @@ module Daru
|
|
200
213
|
[daru_options, opts]
|
201
214
|
end
|
202
215
|
|
216
|
+
def from_csv_prepare_converters(converters)
|
217
|
+
converters.flat_map do |c|
|
218
|
+
if ::CSV::Converters[c]
|
219
|
+
::CSV::Converters[c]
|
220
|
+
elsif Daru::IO::CSV::CONVERTERS[c]
|
221
|
+
Daru::IO::CSV::CONVERTERS[c]
|
222
|
+
else
|
223
|
+
c
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
203
228
|
def from_csv_hash_with_headers(path, opts)
|
204
229
|
opts[:header_converters] ||= :symbol
|
205
|
-
|
206
230
|
::CSV
|
207
|
-
.
|
231
|
+
.parse(open(path), opts)
|
208
232
|
.tap { |c| yield c if block_given? }
|
209
233
|
.by_col.map { |col_name, values| [col_name, values] }.to_h
|
210
234
|
end
|
@@ -212,15 +236,60 @@ module Daru
|
|
212
236
|
def from_csv_hash(path, opts)
|
213
237
|
csv_as_arrays =
|
214
238
|
::CSV
|
215
|
-
.open(path,
|
239
|
+
.parse(open(path), opts)
|
216
240
|
.tap { |c| yield c if block_given? }
|
217
241
|
.to_a
|
218
|
-
|
219
242
|
headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
|
220
243
|
csv_as_arrays = csv_as_arrays.transpose
|
221
|
-
|
222
244
|
headers.each_with_index.map { |h, i| [h, csv_as_arrays[i]] }.to_h
|
223
245
|
end
|
246
|
+
|
247
|
+
def html_parse_table(table)
|
248
|
+
headers, headers_size = html_scrape_tag(table,'th')
|
249
|
+
data, size = html_scrape_tag(table, 'td')
|
250
|
+
data = data.keep_if { |x| x.count == size }
|
251
|
+
order, indice = html_parse_hash(headers, size, headers_size) if headers_size >= size
|
252
|
+
return unless (indice.nil? || indice.count == data.count) && !order.nil? && order.count>0
|
253
|
+
{data: data.compact, index: indice, order: order}
|
254
|
+
end
|
255
|
+
|
256
|
+
def html_scrape_tag(table, tag)
|
257
|
+
arr = table.search('tr').map { |row| row.search(tag).map { |val| val.text.strip } }
|
258
|
+
size = arr.map(&:count).max
|
259
|
+
[arr, size]
|
260
|
+
end
|
261
|
+
|
262
|
+
# Splits headers (all th tags) into order and index. Wherein,
|
263
|
+
# Order : All <th> tags on first proper row of HTML table
|
264
|
+
# index : All <th> tags on first proper column of HTML table
|
265
|
+
def html_parse_hash(headers, size, headers_size)
|
266
|
+
headers_index = headers.find_index { |x| x.count == headers_size }
|
267
|
+
order = headers[headers_index]
|
268
|
+
order_index = order.count - size
|
269
|
+
order = order[order_index..-1]
|
270
|
+
indice = headers[headers_index+1..-1].flatten
|
271
|
+
indice = nil if indice.to_a.empty?
|
272
|
+
[order, indice]
|
273
|
+
end
|
274
|
+
|
275
|
+
def html_search(table, match=nil)
|
276
|
+
match.nil? ? true : (table.to_s.include? match)
|
277
|
+
end
|
278
|
+
|
279
|
+
# Allows user to override the scraped order / index / data
|
280
|
+
def html_decide_values(scraped_val={}, user_val={})
|
281
|
+
%I[data index name order].each do |key|
|
282
|
+
user_val[key] ||= scraped_val[key]
|
283
|
+
end
|
284
|
+
user_val
|
285
|
+
end
|
286
|
+
|
287
|
+
def html_table_to_dataframe(table)
|
288
|
+
Daru::DataFrame.rows table[:data],
|
289
|
+
index: table[:index],
|
290
|
+
order: table[:order],
|
291
|
+
name: table[:name]
|
292
|
+
end
|
224
293
|
end
|
225
294
|
end
|
226
295
|
end
|