daru 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -7
- data/CONTRIBUTING.md +1 -1
- data/History.md +48 -1
- data/README.md +3 -3
- data/benchmarks/statistics.rb +6 -6
- data/benchmarks/where_clause.rb +1 -1
- data/benchmarks/where_vs_filter.rb +1 -1
- data/daru.gemspec +3 -2
- data/lib/daru.rb +14 -6
- data/lib/daru/accessors/gsl_wrapper.rb +1 -1
- data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
- data/lib/daru/category.rb +1 -1
- data/lib/daru/core/group_by.rb +32 -15
- data/lib/daru/core/query.rb +4 -4
- data/lib/daru/dataframe.rb +196 -48
- data/lib/daru/date_time/index.rb +7 -5
- data/lib/daru/formatters/table.rb +1 -0
- data/lib/daru/index/index.rb +121 -33
- data/lib/daru/index/multi_index.rb +83 -3
- data/lib/daru/io/csv/converters.rb +18 -0
- data/lib/daru/io/io.rb +80 -11
- data/lib/daru/io/sql_data_source.rb +10 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +3 -50
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
- data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/vector.html.erb +3 -25
- data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
- data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru/maths/statistics/dataframe.rb +9 -11
- data/lib/daru/maths/statistics/vector.rb +139 -32
- data/lib/daru/plotting/gruff/dataframe.rb +13 -15
- data/lib/daru/plotting/nyaplot/category.rb +1 -1
- data/lib/daru/plotting/nyaplot/dataframe.rb +4 -4
- data/lib/daru/plotting/nyaplot/vector.rb +1 -2
- data/lib/daru/vector.rb +169 -80
- data/lib/daru/version.rb +1 -1
- data/spec/category_spec.rb +19 -19
- data/spec/core/group_by_spec.rb +47 -0
- data/spec/core/query_spec.rb +55 -50
- data/spec/daru_spec.rb +22 -0
- data/spec/dataframe_spec.rb +118 -6
- data/spec/date_time/index_spec.rb +34 -16
- data/spec/extensions/rserve_spec.rb +1 -1
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +29 -0
- data/spec/index/categorical_index_spec.rb +33 -33
- data/spec/index/index_spec.rb +134 -41
- data/spec/index/multi_index_spec.rb +115 -31
- data/spec/io/io_spec.rb +201 -0
- data/spec/io/sql_data_source_spec.rb +31 -41
- data/spec/iruby/dataframe_spec.rb +17 -19
- data/spec/iruby/vector_spec.rb +26 -28
- data/spec/maths/statistics/vector_spec.rb +136 -14
- data/spec/plotting/gruff/category_spec.rb +3 -3
- data/spec/plotting/gruff/dataframe_spec.rb +14 -4
- data/spec/plotting/gruff/vector_spec.rb +9 -9
- data/spec/plotting/nyaplot/category_spec.rb +5 -9
- data/spec/plotting/nyaplot/dataframe_spec.rb +72 -47
- data/spec/plotting/nyaplot/vector_spec.rb +5 -11
- data/spec/shared/vector_display_spec.rb +12 -14
- data/spec/spec_helper.rb +21 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +222 -72
- metadata +68 -23
- data/spec/fixtures/stock_data.csv +0 -500
data/lib/daru/dataframe.rb
CHANGED
@@ -20,7 +20,7 @@ module Daru
|
|
20
20
|
#
|
21
21
|
# == Arguments
|
22
22
|
#
|
23
|
-
# * path -
|
23
|
+
# * path - Local path / Remote URL of the file to load specified as a String.
|
24
24
|
#
|
25
25
|
# == Options
|
26
26
|
#
|
@@ -63,7 +63,7 @@ module Daru
|
|
63
63
|
|
64
64
|
# Read a database query and returns a Dataset
|
65
65
|
#
|
66
|
-
# @param dbh [DBI::DatabaseHandle] A DBI connection
|
66
|
+
# @param dbh [DBI::DatabaseHandle, String] A DBI connection OR Path to a SQlite3 database.
|
67
67
|
# @param query [String] The query to be executed
|
68
68
|
#
|
69
69
|
# @return A dataframe containing the data resulting from the query
|
@@ -72,6 +72,11 @@ module Daru
|
|
72
72
|
#
|
73
73
|
# dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
|
74
74
|
# Daru::DataFrame.from_sql(dbh, "SELECT * FROM test")
|
75
|
+
#
|
76
|
+
# #Alternatively
|
77
|
+
#
|
78
|
+
# require 'dbi'
|
79
|
+
# Daru::DataFrame.from_sql("path/to/sqlite.db", "SELECT * FROM test")
|
75
80
|
def from_sql dbh, query
|
76
81
|
Daru::IO.from_sql dbh, query
|
77
82
|
end
|
@@ -112,6 +117,49 @@ module Daru
|
|
112
117
|
Daru::IO.from_plaintext path, fields
|
113
118
|
end
|
114
119
|
|
120
|
+
# Read the table data from a remote html file. Please note that this module
|
121
|
+
# works only for static table elements on a HTML page, and won't work in
|
122
|
+
# cases where the data is being loaded into the HTML table by Javascript.
|
123
|
+
#
|
124
|
+
# By default - all <th> tag elements in the first proper row are considered
|
125
|
+
# as the order, and all the <th> tag elements in the first column are
|
126
|
+
# considered as the index.
|
127
|
+
#
|
128
|
+
# == Arguments
|
129
|
+
#
|
130
|
+
# * path [String] - URL of the target HTML file.
|
131
|
+
# * fields [Hash] -
|
132
|
+
#
|
133
|
+
# +:match+ - A *String* to match and choose a particular table(s) from multiple tables of a HTML page.
|
134
|
+
#
|
135
|
+
# +:order+ - An *Array* which would act as the user-defined order, to override the parsed *Daru::DataFrame*.
|
136
|
+
#
|
137
|
+
# +:index+ - An *Array* which would act as the user-defined index, to override the parsed *Daru::DataFrame*.
|
138
|
+
#
|
139
|
+
# +:name+ - A *String* that manually assigns a name to the scraped *Daru::DataFrame*, for user's preference.
|
140
|
+
#
|
141
|
+
# == Returns
|
142
|
+
# An Array of +Daru::DataFrame+s, with each dataframe corresponding to a
|
143
|
+
# HTML table on that webpage.
|
144
|
+
#
|
145
|
+
# == Usage
|
146
|
+
# dfs = Daru::DataFrame.from_html("http://www.moneycontrol.com/", match: "Sun Pharma")
|
147
|
+
# dfs.count
|
148
|
+
# # => 4
|
149
|
+
#
|
150
|
+
# dfs.first
|
151
|
+
# #
|
152
|
+
# # => <Daru::DataFrame(5x4)>
|
153
|
+
# # Company Price Change Value (Rs
|
154
|
+
# # 0 Sun Pharma 502.60 -65.05 2,117.87
|
155
|
+
# # 1 Reliance 1356.90 19.60 745.10
|
156
|
+
# # 2 Tech Mahin 379.45 -49.70 650.22
|
157
|
+
# # 3 ITC 315.85 6.75 621.12
|
158
|
+
# # 4 HDFC 1598.85 50.95 553.91
|
159
|
+
def from_html path, fields={}
|
160
|
+
Daru::IO.from_html path, fields
|
161
|
+
end
|
162
|
+
|
115
163
|
# Create DataFrame by specifying rows as an Array of Arrays or Array of
|
116
164
|
# Daru::Vector objects.
|
117
165
|
def rows source, opts={}
|
@@ -239,6 +287,48 @@ module Daru
|
|
239
287
|
# # b 7 2
|
240
288
|
# # c 8 3
|
241
289
|
# # d 9 4
|
290
|
+
#
|
291
|
+
# df = Daru::DataFrame.new([[1,2,3,4],[6,7,8,9]], name: :bat_man)
|
292
|
+
#
|
293
|
+
# # =>
|
294
|
+
# # #<Daru::DataFrame: bat_man (4x2)>
|
295
|
+
# # 0 1
|
296
|
+
# # 0 1 6
|
297
|
+
# # 1 2 7
|
298
|
+
# # 2 3 8
|
299
|
+
# # 3 4 9
|
300
|
+
#
|
301
|
+
# # Dataframe having Index name
|
302
|
+
#
|
303
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
|
304
|
+
# index: Daru::Index.new([:a, :b, :c, :d], name: 'idx_name'),
|
305
|
+
# name: :spider_man)
|
306
|
+
#
|
307
|
+
# # =>
|
308
|
+
# # <Daru::DataFrame:80766980 @name = spider_man @size = 4>
|
309
|
+
# # idx_name b a
|
310
|
+
# # a 6 1
|
311
|
+
# # b 7 2
|
312
|
+
# # c 8 3
|
313
|
+
# # d 9 4
|
314
|
+
#
|
315
|
+
#
|
316
|
+
# idx = Daru::Index.new [100, 99, 101, 1, 2], name: "s1"
|
317
|
+
# => #<Daru::Index(5): s1 {100, 99, 101, 1, 2}>
|
318
|
+
#
|
319
|
+
# df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
320
|
+
# c: [11,22,33,44,55]},
|
321
|
+
# order: [:a, :b, :c],
|
322
|
+
# index: idx)
|
323
|
+
# # =>
|
324
|
+
# #<Daru::DataFrame(5x3)>
|
325
|
+
# # s1 a b c
|
326
|
+
# # 100 1 11 11
|
327
|
+
# # 99 2 12 22
|
328
|
+
# # 101 3 13 33
|
329
|
+
# # 1 4 14 44
|
330
|
+
# # 2 5 15 55
|
331
|
+
|
242
332
|
def initialize source, opts={} # rubocop:disable Metrics/MethodLength
|
243
333
|
vectors, index = opts[:order], opts[:index] # FIXME: just keyword arges after Ruby 2.1
|
244
334
|
@data = []
|
@@ -457,7 +547,7 @@ module Daru
|
|
457
547
|
def dup vectors_to_dup=nil
|
458
548
|
vectors_to_dup = @vectors.to_a unless vectors_to_dup
|
459
549
|
|
460
|
-
src = vectors_to_dup.map { |vec| @data[@vectors
|
550
|
+
src = vectors_to_dup.map { |vec| @data[@vectors.pos(vec)].dup }
|
461
551
|
new_order = Daru::Index.new(vectors_to_dup)
|
462
552
|
|
463
553
|
Daru::DataFrame.new src, order: new_order, index: @index.dup, name: @name, clone: true
|
@@ -544,7 +634,7 @@ module Daru
|
|
544
634
|
# b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
|
545
635
|
# c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
|
546
636
|
# }, index: 11..18)
|
547
|
-
# df
|
637
|
+
# df.replace_values nil, Float::NAN
|
548
638
|
# # => #<Daru::DataFrame(8x3)>
|
549
639
|
# # a b c
|
550
640
|
# # 11 1 a a
|
@@ -679,7 +769,7 @@ module Daru
|
|
679
769
|
# * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
|
680
770
|
# Default to :vector.
|
681
771
|
def map! axis=:vector, &block
|
682
|
-
if
|
772
|
+
if %i[vector column].include?(axis)
|
683
773
|
map_vectors!(&block)
|
684
774
|
elsif axis == :row
|
685
775
|
map_rows!(&block)
|
@@ -913,7 +1003,7 @@ module Daru
|
|
913
1003
|
|
914
1004
|
# creates a new vector with the data of a given field which the block returns true
|
915
1005
|
def filter_vector vec, &block
|
916
|
-
Daru::Vector.new
|
1006
|
+
Daru::Vector.new(each_row.select(&block).map { |row| row[vec] })
|
917
1007
|
end
|
918
1008
|
|
919
1009
|
# Iterates over each row and retains it in a new DataFrame if the block returns
|
@@ -1031,7 +1121,7 @@ module Daru
|
|
1031
1121
|
alias :vector_missing_values :missing_values_rows
|
1032
1122
|
|
1033
1123
|
def has_missing_data?
|
1034
|
-
|
1124
|
+
@data.any? { |vec| vec.include_values?(*Daru::MISSING_VALUES) }
|
1035
1125
|
end
|
1036
1126
|
alias :flawed? :has_missing_data?
|
1037
1127
|
deprecate :has_missing_data?, :include_values?, 2016, 10
|
@@ -1119,7 +1209,7 @@ module Daru
|
|
1119
1209
|
# row[:a] < 3 and row[:b] == 'b'
|
1120
1210
|
# end #=> true
|
1121
1211
|
def any? axis=:vector, &block
|
1122
|
-
if
|
1212
|
+
if %i[vector column].include?(axis)
|
1123
1213
|
@data.any?(&block)
|
1124
1214
|
elsif axis == :row
|
1125
1215
|
each_row do |row|
|
@@ -1141,7 +1231,7 @@ module Daru
|
|
1141
1231
|
# row[:a] < 10
|
1142
1232
|
# end #=> true
|
1143
1233
|
def all? axis=:vector, &block
|
1144
|
-
if
|
1234
|
+
if %i[vector column].include?(axis)
|
1145
1235
|
@data.all?(&block)
|
1146
1236
|
elsif axis == :row
|
1147
1237
|
each_row.all?(&block)
|
@@ -1377,7 +1467,7 @@ module Daru
|
|
1377
1467
|
# df.rename_vectors :a => :alpha, :c => :gamma
|
1378
1468
|
# df.vectors.to_a #=> [:alpha, :b, :gamma]
|
1379
1469
|
def rename_vectors name_map
|
1380
|
-
existing_targets = name_map.
|
1470
|
+
existing_targets = name_map.reject { |k,v| k == v }.values & vectors.to_a
|
1381
1471
|
delete_vectors(*existing_targets)
|
1382
1472
|
|
1383
1473
|
new_names = vectors.to_a.map { |v| name_map[v] ? name_map[v] : v }
|
@@ -1408,19 +1498,16 @@ module Daru
|
|
1408
1498
|
Daru::DataFrame.new(arry, clone: cln, order: order, index: @index)
|
1409
1499
|
end
|
1410
1500
|
|
1411
|
-
# Generate a summary of this DataFrame
|
1412
|
-
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1418
|
-
|
1419
|
-
@vectors.each do |v|
|
1420
|
-
g.text "Element:[#{v}]"
|
1421
|
-
g.parse_element(self[v])
|
1422
|
-
end
|
1501
|
+
# Generate a summary of this DataFrame based on individual vectors in the DataFrame
|
1502
|
+
# @return [String] String containing the summary of the DataFrame
|
1503
|
+
def summary
|
1504
|
+
summary = "= #{name}"
|
1505
|
+
summary << "\n Number of rows: #{nrows}"
|
1506
|
+
@vectors.each do |v|
|
1507
|
+
summary << "\n Element:[#{v}]\n"
|
1508
|
+
summary << self[v].summary(1)
|
1423
1509
|
end
|
1510
|
+
summary
|
1424
1511
|
end
|
1425
1512
|
|
1426
1513
|
# Sorts a dataframe (ascending/descending) in the given pripority sequence of
|
@@ -1783,7 +1870,9 @@ module Daru
|
|
1783
1870
|
end
|
1784
1871
|
|
1785
1872
|
# Convert to html for IRuby.
|
1786
|
-
def to_html
|
1873
|
+
def to_html(threshold=30)
|
1874
|
+
table_thead = to_html_thead
|
1875
|
+
table_tbody = to_html_tbody(threshold)
|
1787
1876
|
path = if index.is_a?(MultiIndex)
|
1788
1877
|
File.expand_path('../iruby/templates/dataframe_mi.html.erb', __FILE__)
|
1789
1878
|
else
|
@@ -1792,8 +1881,28 @@ module Daru
|
|
1792
1881
|
ERB.new(File.read(path).strip).result(binding)
|
1793
1882
|
end
|
1794
1883
|
|
1884
|
+
def to_html_thead
|
1885
|
+
table_thead_path =
|
1886
|
+
if index.is_a?(MultiIndex)
|
1887
|
+
File.expand_path('../iruby/templates/dataframe_mi_thead.html.erb', __FILE__)
|
1888
|
+
else
|
1889
|
+
File.expand_path('../iruby/templates/dataframe_thead.html.erb', __FILE__)
|
1890
|
+
end
|
1891
|
+
ERB.new(File.read(table_thead_path).strip).result(binding)
|
1892
|
+
end
|
1893
|
+
|
1894
|
+
def to_html_tbody(threshold=30)
|
1895
|
+
table_tbody_path =
|
1896
|
+
if index.is_a?(MultiIndex)
|
1897
|
+
File.expand_path('../iruby/templates/dataframe_mi_tbody.html.erb', __FILE__)
|
1898
|
+
else
|
1899
|
+
File.expand_path('../iruby/templates/dataframe_tbody.html.erb', __FILE__)
|
1900
|
+
end
|
1901
|
+
ERB.new(File.read(table_tbody_path).strip).result(binding)
|
1902
|
+
end
|
1903
|
+
|
1795
1904
|
def to_s
|
1796
|
-
|
1905
|
+
"#<#{self.class}#{': ' + @name.to_s if @name}(#{nrows}x#{ncols})>"
|
1797
1906
|
end
|
1798
1907
|
|
1799
1908
|
# Method for updating the metadata (i.e. missing value positions) of the
|
@@ -1900,14 +2009,13 @@ module Daru
|
|
1900
2009
|
|
1901
2010
|
# Pretty print in a nice table format for the command line (irb/pry/iruby)
|
1902
2011
|
def inspect spacing=10, threshold=15
|
1903
|
-
row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
|
1904
2012
|
name_part = @name ? ": #{@name} " : ''
|
1905
2013
|
|
1906
2014
|
"#<#{self.class}#{name_part}(#{nrows}x#{ncols})>\n" +
|
1907
2015
|
Formatters::Table.format(
|
1908
2016
|
each_row.lazy,
|
1909
2017
|
row_headers: row_headers,
|
1910
|
-
headers:
|
2018
|
+
headers: headers,
|
1911
2019
|
threshold: threshold,
|
1912
2020
|
spacing: spacing
|
1913
2021
|
)
|
@@ -2002,8 +2110,32 @@ module Daru
|
|
2002
2110
|
end
|
2003
2111
|
end
|
2004
2112
|
|
2113
|
+
# returns array of row tuples at given index(s)
|
2114
|
+
def access_row_tuples_by_indexs *indexes
|
2115
|
+
positions = @index.pos(*indexes)
|
2116
|
+
|
2117
|
+
return populate_row_for(positions) if positions.is_a? Numeric
|
2118
|
+
|
2119
|
+
res = []
|
2120
|
+
new_rows = @data.map { |vec| vec[*indexes] }
|
2121
|
+
indexes.each do |index|
|
2122
|
+
tuples = []
|
2123
|
+
new_rows.map { |row| tuples += [row[index]] }
|
2124
|
+
res << tuples
|
2125
|
+
end
|
2126
|
+
res
|
2127
|
+
end
|
2128
|
+
|
2005
2129
|
private
|
2006
2130
|
|
2131
|
+
def headers
|
2132
|
+
Daru::Index.new(Array(index.name) + @vectors.to_a)
|
2133
|
+
end
|
2134
|
+
|
2135
|
+
def row_headers
|
2136
|
+
index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
|
2137
|
+
end
|
2138
|
+
|
2007
2139
|
def convert_categorical_vectors names
|
2008
2140
|
names.map do |n|
|
2009
2141
|
next unless self[n].category?
|
@@ -2034,7 +2166,7 @@ module Daru
|
|
2034
2166
|
end
|
2035
2167
|
|
2036
2168
|
def dispatch_to_axis(axis, method, *args, &block)
|
2037
|
-
if
|
2169
|
+
if %i[vector column].include?(axis)
|
2038
2170
|
send("#{method}_vector", *args, &block)
|
2039
2171
|
elsif axis == :row
|
2040
2172
|
send("#{method}_row", *args, &block)
|
@@ -2044,7 +2176,7 @@ module Daru
|
|
2044
2176
|
end
|
2045
2177
|
|
2046
2178
|
def dispatch_to_axis_pl(axis, method, *args, &block)
|
2047
|
-
if
|
2179
|
+
if %i[vector column].include?(axis)
|
2048
2180
|
send("#{method}_vectors", *args, &block)
|
2049
2181
|
elsif axis == :row
|
2050
2182
|
send("#{method}_rows", *args, &block)
|
@@ -2053,7 +2185,7 @@ module Daru
|
|
2053
2185
|
end
|
2054
2186
|
end
|
2055
2187
|
|
2056
|
-
AXES = [
|
2188
|
+
AXES = %i[row vector].freeze
|
2057
2189
|
|
2058
2190
|
def extract_axis names, default=:vector
|
2059
2191
|
if AXES.include?(names.last)
|
@@ -2065,7 +2197,7 @@ module Daru
|
|
2065
2197
|
|
2066
2198
|
def access_vector *names
|
2067
2199
|
if names.first.is_a?(Range)
|
2068
|
-
dup(@vectors
|
2200
|
+
dup(@vectors.subset(names.first))
|
2069
2201
|
elsif @vectors.is_a?(MultiIndex)
|
2070
2202
|
access_vector_multi_index(*names)
|
2071
2203
|
else
|
@@ -2087,14 +2219,18 @@ module Daru
|
|
2087
2219
|
|
2088
2220
|
def access_vector_single_index *names
|
2089
2221
|
if names.count < 2
|
2090
|
-
|
2222
|
+
begin
|
2223
|
+
pos = @vectors.is_a?(Daru::DateTimeIndex) ? @vectors[names.first] : @vectors.pos(names.first)
|
2224
|
+
rescue IndexError
|
2225
|
+
raise IndexError, "Specified vector #{names.first} does not exist"
|
2226
|
+
end
|
2091
2227
|
|
2092
2228
|
return @data[pos] if pos.is_a?(Numeric)
|
2093
2229
|
|
2094
2230
|
names = pos
|
2095
2231
|
end
|
2096
2232
|
|
2097
|
-
new_vectors = names.map { |name| [name, @data[@vectors
|
2233
|
+
new_vectors = names.map { |name| [name, @data[@vectors.pos(name)]] }.to_h
|
2098
2234
|
|
2099
2235
|
order = names.is_a?(Array) ? Daru::Index.new(names) : names
|
2100
2236
|
Daru::DataFrame.new(new_vectors, order: order,
|
@@ -2126,7 +2262,7 @@ module Daru
|
|
2126
2262
|
if @index.empty?
|
2127
2263
|
insert_vector_in_empty name, vector
|
2128
2264
|
else
|
2129
|
-
vec =
|
2265
|
+
vec = prepare_for_insert name, vector
|
2130
2266
|
|
2131
2267
|
assign_or_add_vector name, vec
|
2132
2268
|
end
|
@@ -2173,25 +2309,35 @@ module Daru
|
|
2173
2309
|
@data.map! { |v| v.empty? ? v.reindex(@index) : v }
|
2174
2310
|
end
|
2175
2311
|
|
2176
|
-
def
|
2177
|
-
if
|
2178
|
-
|
2179
|
-
|
2180
|
-
|
2181
|
-
Daru::Vector.new([], name: coerce_name(name), index: @index).tap { |v|
|
2182
|
-
@index.each do |idx|
|
2183
|
-
v[idx] = vector.index.include?(idx) ? vector[idx] : nil
|
2184
|
-
end
|
2185
|
-
}
|
2312
|
+
def prepare_for_insert name, arg
|
2313
|
+
if arg.is_a? Daru::Vector
|
2314
|
+
prepare_vector_for_insert name, arg
|
2315
|
+
elsif arg.respond_to?(:to_a)
|
2316
|
+
prepare_enum_for_insert name, arg
|
2186
2317
|
else
|
2187
|
-
|
2188
|
-
|
2189
|
-
|
2190
|
-
|
2318
|
+
prepare_value_for_insert name, arg
|
2319
|
+
end
|
2320
|
+
end
|
2321
|
+
|
2322
|
+
def prepare_vector_for_insert name, vector
|
2323
|
+
# so that index-by-index assignment is avoided when possible.
|
2324
|
+
return vector.dup if vector.index == @index
|
2325
|
+
Daru::Vector.new([], name: coerce_name(name), index: @index).tap { |v|
|
2326
|
+
@index.each do |idx|
|
2327
|
+
v[idx] = vector.index.include?(idx) ? vector[idx] : nil
|
2191
2328
|
end
|
2329
|
+
}
|
2330
|
+
end
|
2192
2331
|
|
2193
|
-
|
2332
|
+
def prepare_enum_for_insert name, enum
|
2333
|
+
if @size != enum.size
|
2334
|
+
raise "Specified vector of length #{enum.size} cannot be inserted in DataFrame of size #{@size}"
|
2194
2335
|
end
|
2336
|
+
Daru::Vector.new(enum, name: coerce_name(name), index: @index)
|
2337
|
+
end
|
2338
|
+
|
2339
|
+
def prepare_value_for_insert name, value
|
2340
|
+
Daru::Vector.new(Array(value) * @size, name: coerce_name(name), index: @index)
|
2195
2341
|
end
|
2196
2342
|
|
2197
2343
|
def insert_or_modify_row indexes, vector
|
@@ -2276,8 +2422,10 @@ module Daru
|
|
2276
2422
|
|
2277
2423
|
case source.first
|
2278
2424
|
when Array
|
2425
|
+
vectors ||= (0..source.size-1).to_a
|
2279
2426
|
initialize_from_array_of_arrays source, vectors, index, opts
|
2280
2427
|
when Vector
|
2428
|
+
vectors ||= (0..source.size-1).to_a
|
2281
2429
|
initialize_from_array_of_vectors source, vectors, index, opts
|
2282
2430
|
when Hash
|
2283
2431
|
initialize_from_array_of_hashes source, vectors, index, opts
|
data/lib/daru/date_time/index.rb
CHANGED
@@ -226,7 +226,7 @@ module Daru
|
|
226
226
|
to_a.each(&block)
|
227
227
|
end
|
228
228
|
|
229
|
-
attr_reader :frequency, :offset, :periods
|
229
|
+
attr_reader :frequency, :offset, :periods, :keys
|
230
230
|
|
231
231
|
# Create a DateTimeIndex with or without a frequency in data. The constructor
|
232
232
|
# should be used for creating DateTimeIndex by directly passing in DateTime
|
@@ -253,6 +253,7 @@ module Daru
|
|
253
253
|
# DateTime.new(2012,4,11), DateTime.new(2012,4,12)], freq: :infer)
|
254
254
|
# #=>#<DateTimeIndex:84198340 offset=D periods=8 data=[2012-04-05T00:00:00+00:00...2012-04-12T00:00:00+00:00]>
|
255
255
|
def initialize data, opts={freq: nil}
|
256
|
+
super data
|
256
257
|
Helper.possibly_convert_to_date_time data
|
257
258
|
|
258
259
|
@offset =
|
@@ -405,7 +406,7 @@ module Daru
|
|
405
406
|
@data
|
406
407
|
else
|
407
408
|
@data.sort_by(&:last)
|
408
|
-
end.transpose.first
|
409
|
+
end.transpose.first || []
|
409
410
|
end
|
410
411
|
|
411
412
|
# Size of index.
|
@@ -419,6 +420,7 @@ module Daru
|
|
419
420
|
|
420
421
|
def inspect
|
421
422
|
meta = [@periods, @frequency ? "frequency=#{@frequency}" : nil].compact.join(', ')
|
423
|
+
return "#<#{self.class}(#{meta})>" if @data.empty?
|
422
424
|
"#<#{self.class}(#{meta}) " \
|
423
425
|
"#{@data.first[0]}...#{@data.last[0]}>"
|
424
426
|
end
|
@@ -490,7 +492,7 @@ module Daru
|
|
490
492
|
# @return [Array<Integer>] Array containing minutes of each index.
|
491
493
|
# @!method sec
|
492
494
|
# @return [Array<Integer>] Array containing seconds of each index.
|
493
|
-
[
|
495
|
+
%i[year month day hour min sec].each do |meth|
|
494
496
|
define_method(meth) do
|
495
497
|
each_with_object([]) do |d, arr|
|
496
498
|
arr << d.send(meth)
|
@@ -528,7 +530,7 @@ module Daru
|
|
528
530
|
slice first, last
|
529
531
|
end
|
530
532
|
|
531
|
-
def slice_between_dates first, last # rubocop:disable Metrics/AbcSize
|
533
|
+
def slice_between_dates first, last # rubocop:disable Metrics/AbcSize,Metrics/PerceivedComplexity
|
532
534
|
# about that ^ disable: I'm waiting for cleaner understanding
|
533
535
|
# of offsets logic. Reference: https://github.com/v0dro/daru/commit/7e1c34aec9516a9ba33037b4a1daaaaf1de0726a#diff-a95ef410a8e1f4ea3cc48d231bb880faR250
|
534
536
|
start = @data.bsearch { |d| d[0] >= first }
|
@@ -542,7 +544,7 @@ module Daru
|
|
542
544
|
st = @data.index(start)
|
543
545
|
en = after_en ? @data.index(after_en) - 1 : Helper.last_date(@data)[1]
|
544
546
|
return start[1] if st == en
|
545
|
-
DateTimeIndex.new(@data[st..en].transpose[0])
|
547
|
+
DateTimeIndex.new(@data[st..en].transpose[0] || []) # empty slice guard
|
546
548
|
end
|
547
549
|
end
|
548
550
|
|