daru 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -7
- data/CONTRIBUTING.md +1 -1
- data/History.md +48 -1
- data/README.md +3 -3
- data/benchmarks/statistics.rb +6 -6
- data/benchmarks/where_clause.rb +1 -1
- data/benchmarks/where_vs_filter.rb +1 -1
- data/daru.gemspec +3 -2
- data/lib/daru.rb +14 -6
- data/lib/daru/accessors/gsl_wrapper.rb +1 -1
- data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
- data/lib/daru/category.rb +1 -1
- data/lib/daru/core/group_by.rb +32 -15
- data/lib/daru/core/query.rb +4 -4
- data/lib/daru/dataframe.rb +196 -48
- data/lib/daru/date_time/index.rb +7 -5
- data/lib/daru/formatters/table.rb +1 -0
- data/lib/daru/index/index.rb +121 -33
- data/lib/daru/index/multi_index.rb +83 -3
- data/lib/daru/io/csv/converters.rb +18 -0
- data/lib/daru/io/io.rb +80 -11
- data/lib/daru/io/sql_data_source.rb +10 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +3 -50
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
- data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/vector.html.erb +3 -25
- data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
- data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru/maths/statistics/dataframe.rb +9 -11
- data/lib/daru/maths/statistics/vector.rb +139 -32
- data/lib/daru/plotting/gruff/dataframe.rb +13 -15
- data/lib/daru/plotting/nyaplot/category.rb +1 -1
- data/lib/daru/plotting/nyaplot/dataframe.rb +4 -4
- data/lib/daru/plotting/nyaplot/vector.rb +1 -2
- data/lib/daru/vector.rb +169 -80
- data/lib/daru/version.rb +1 -1
- data/spec/category_spec.rb +19 -19
- data/spec/core/group_by_spec.rb +47 -0
- data/spec/core/query_spec.rb +55 -50
- data/spec/daru_spec.rb +22 -0
- data/spec/dataframe_spec.rb +118 -6
- data/spec/date_time/index_spec.rb +34 -16
- data/spec/extensions/rserve_spec.rb +1 -1
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +29 -0
- data/spec/index/categorical_index_spec.rb +33 -33
- data/spec/index/index_spec.rb +134 -41
- data/spec/index/multi_index_spec.rb +115 -31
- data/spec/io/io_spec.rb +201 -0
- data/spec/io/sql_data_source_spec.rb +31 -41
- data/spec/iruby/dataframe_spec.rb +17 -19
- data/spec/iruby/vector_spec.rb +26 -28
- data/spec/maths/statistics/vector_spec.rb +136 -14
- data/spec/plotting/gruff/category_spec.rb +3 -3
- data/spec/plotting/gruff/dataframe_spec.rb +14 -4
- data/spec/plotting/gruff/vector_spec.rb +9 -9
- data/spec/plotting/nyaplot/category_spec.rb +5 -9
- data/spec/plotting/nyaplot/dataframe_spec.rb +72 -47
- data/spec/plotting/nyaplot/vector_spec.rb +5 -11
- data/spec/shared/vector_display_spec.rb +12 -14
- data/spec/spec_helper.rb +21 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +222 -72
- metadata +68 -23
- data/spec/fixtures/stock_data.csv +0 -500
data/lib/daru/dataframe.rb
CHANGED
@@ -20,7 +20,7 @@ module Daru
|
|
20
20
|
#
|
21
21
|
# == Arguments
|
22
22
|
#
|
23
|
-
# * path -
|
23
|
+
# * path - Local path / Remote URL of the file to load specified as a String.
|
24
24
|
#
|
25
25
|
# == Options
|
26
26
|
#
|
@@ -63,7 +63,7 @@ module Daru
|
|
63
63
|
|
64
64
|
# Read a database query and returns a Dataset
|
65
65
|
#
|
66
|
-
# @param dbh [DBI::DatabaseHandle] A DBI connection
|
66
|
+
# @param dbh [DBI::DatabaseHandle, String] A DBI connection OR Path to a SQlite3 database.
|
67
67
|
# @param query [String] The query to be executed
|
68
68
|
#
|
69
69
|
# @return A dataframe containing the data resulting from the query
|
@@ -72,6 +72,11 @@ module Daru
|
|
72
72
|
#
|
73
73
|
# dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
|
74
74
|
# Daru::DataFrame.from_sql(dbh, "SELECT * FROM test")
|
75
|
+
#
|
76
|
+
# #Alternatively
|
77
|
+
#
|
78
|
+
# require 'dbi'
|
79
|
+
# Daru::DataFrame.from_sql("path/to/sqlite.db", "SELECT * FROM test")
|
75
80
|
def from_sql dbh, query
|
76
81
|
Daru::IO.from_sql dbh, query
|
77
82
|
end
|
@@ -112,6 +117,49 @@ module Daru
|
|
112
117
|
Daru::IO.from_plaintext path, fields
|
113
118
|
end
|
114
119
|
|
120
|
+
# Read the table data from a remote html file. Please note that this module
|
121
|
+
# works only for static table elements on a HTML page, and won't work in
|
122
|
+
# cases where the data is being loaded into the HTML table by Javascript.
|
123
|
+
#
|
124
|
+
# By default - all <th> tag elements in the first proper row are considered
|
125
|
+
# as the order, and all the <th> tag elements in the first column are
|
126
|
+
# considered as the index.
|
127
|
+
#
|
128
|
+
# == Arguments
|
129
|
+
#
|
130
|
+
# * path [String] - URL of the target HTML file.
|
131
|
+
# * fields [Hash] -
|
132
|
+
#
|
133
|
+
# +:match+ - A *String* to match and choose a particular table(s) from multiple tables of a HTML page.
|
134
|
+
#
|
135
|
+
# +:order+ - An *Array* which would act as the user-defined order, to override the parsed *Daru::DataFrame*.
|
136
|
+
#
|
137
|
+
# +:index+ - An *Array* which would act as the user-defined index, to override the parsed *Daru::DataFrame*.
|
138
|
+
#
|
139
|
+
# +:name+ - A *String* that manually assigns a name to the scraped *Daru::DataFrame*, for user's preference.
|
140
|
+
#
|
141
|
+
# == Returns
|
142
|
+
# An Array of +Daru::DataFrame+s, with each dataframe corresponding to a
|
143
|
+
# HTML table on that webpage.
|
144
|
+
#
|
145
|
+
# == Usage
|
146
|
+
# dfs = Daru::DataFrame.from_html("http://www.moneycontrol.com/", match: "Sun Pharma")
|
147
|
+
# dfs.count
|
148
|
+
# # => 4
|
149
|
+
#
|
150
|
+
# dfs.first
|
151
|
+
# #
|
152
|
+
# # => <Daru::DataFrame(5x4)>
|
153
|
+
# # Company Price Change Value (Rs
|
154
|
+
# # 0 Sun Pharma 502.60 -65.05 2,117.87
|
155
|
+
# # 1 Reliance 1356.90 19.60 745.10
|
156
|
+
# # 2 Tech Mahin 379.45 -49.70 650.22
|
157
|
+
# # 3 ITC 315.85 6.75 621.12
|
158
|
+
# # 4 HDFC 1598.85 50.95 553.91
|
159
|
+
def from_html path, fields={}
|
160
|
+
Daru::IO.from_html path, fields
|
161
|
+
end
|
162
|
+
|
115
163
|
# Create DataFrame by specifying rows as an Array of Arrays or Array of
|
116
164
|
# Daru::Vector objects.
|
117
165
|
def rows source, opts={}
|
@@ -239,6 +287,48 @@ module Daru
|
|
239
287
|
# # b 7 2
|
240
288
|
# # c 8 3
|
241
289
|
# # d 9 4
|
290
|
+
#
|
291
|
+
# df = Daru::DataFrame.new([[1,2,3,4],[6,7,8,9]], name: :bat_man)
|
292
|
+
#
|
293
|
+
# # =>
|
294
|
+
# # #<Daru::DataFrame: bat_man (4x2)>
|
295
|
+
# # 0 1
|
296
|
+
# # 0 1 6
|
297
|
+
# # 1 2 7
|
298
|
+
# # 2 3 8
|
299
|
+
# # 3 4 9
|
300
|
+
#
|
301
|
+
# # Dataframe having Index name
|
302
|
+
#
|
303
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
|
304
|
+
# index: Daru::Index.new([:a, :b, :c, :d], name: 'idx_name'),
|
305
|
+
# name: :spider_man)
|
306
|
+
#
|
307
|
+
# # =>
|
308
|
+
# # <Daru::DataFrame:80766980 @name = spider_man @size = 4>
|
309
|
+
# # idx_name b a
|
310
|
+
# # a 6 1
|
311
|
+
# # b 7 2
|
312
|
+
# # c 8 3
|
313
|
+
# # d 9 4
|
314
|
+
#
|
315
|
+
#
|
316
|
+
# idx = Daru::Index.new [100, 99, 101, 1, 2], name: "s1"
|
317
|
+
# => #<Daru::Index(5): s1 {100, 99, 101, 1, 2}>
|
318
|
+
#
|
319
|
+
# df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
320
|
+
# c: [11,22,33,44,55]},
|
321
|
+
# order: [:a, :b, :c],
|
322
|
+
# index: idx)
|
323
|
+
# # =>
|
324
|
+
# #<Daru::DataFrame(5x3)>
|
325
|
+
# # s1 a b c
|
326
|
+
# # 100 1 11 11
|
327
|
+
# # 99 2 12 22
|
328
|
+
# # 101 3 13 33
|
329
|
+
# # 1 4 14 44
|
330
|
+
# # 2 5 15 55
|
331
|
+
|
242
332
|
def initialize source, opts={} # rubocop:disable Metrics/MethodLength
|
243
333
|
vectors, index = opts[:order], opts[:index] # FIXME: just keyword arges after Ruby 2.1
|
244
334
|
@data = []
|
@@ -457,7 +547,7 @@ module Daru
|
|
457
547
|
def dup vectors_to_dup=nil
|
458
548
|
vectors_to_dup = @vectors.to_a unless vectors_to_dup
|
459
549
|
|
460
|
-
src = vectors_to_dup.map { |vec| @data[@vectors
|
550
|
+
src = vectors_to_dup.map { |vec| @data[@vectors.pos(vec)].dup }
|
461
551
|
new_order = Daru::Index.new(vectors_to_dup)
|
462
552
|
|
463
553
|
Daru::DataFrame.new src, order: new_order, index: @index.dup, name: @name, clone: true
|
@@ -544,7 +634,7 @@ module Daru
|
|
544
634
|
# b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
|
545
635
|
# c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
|
546
636
|
# }, index: 11..18)
|
547
|
-
# df
|
637
|
+
# df.replace_values nil, Float::NAN
|
548
638
|
# # => #<Daru::DataFrame(8x3)>
|
549
639
|
# # a b c
|
550
640
|
# # 11 1 a a
|
@@ -679,7 +769,7 @@ module Daru
|
|
679
769
|
# * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
|
680
770
|
# Default to :vector.
|
681
771
|
def map! axis=:vector, &block
|
682
|
-
if
|
772
|
+
if %i[vector column].include?(axis)
|
683
773
|
map_vectors!(&block)
|
684
774
|
elsif axis == :row
|
685
775
|
map_rows!(&block)
|
@@ -913,7 +1003,7 @@ module Daru
|
|
913
1003
|
|
914
1004
|
# creates a new vector with the data of a given field which the block returns true
|
915
1005
|
def filter_vector vec, &block
|
916
|
-
Daru::Vector.new
|
1006
|
+
Daru::Vector.new(each_row.select(&block).map { |row| row[vec] })
|
917
1007
|
end
|
918
1008
|
|
919
1009
|
# Iterates over each row and retains it in a new DataFrame if the block returns
|
@@ -1031,7 +1121,7 @@ module Daru
|
|
1031
1121
|
alias :vector_missing_values :missing_values_rows
|
1032
1122
|
|
1033
1123
|
def has_missing_data?
|
1034
|
-
|
1124
|
+
@data.any? { |vec| vec.include_values?(*Daru::MISSING_VALUES) }
|
1035
1125
|
end
|
1036
1126
|
alias :flawed? :has_missing_data?
|
1037
1127
|
deprecate :has_missing_data?, :include_values?, 2016, 10
|
@@ -1119,7 +1209,7 @@ module Daru
|
|
1119
1209
|
# row[:a] < 3 and row[:b] == 'b'
|
1120
1210
|
# end #=> true
|
1121
1211
|
def any? axis=:vector, &block
|
1122
|
-
if
|
1212
|
+
if %i[vector column].include?(axis)
|
1123
1213
|
@data.any?(&block)
|
1124
1214
|
elsif axis == :row
|
1125
1215
|
each_row do |row|
|
@@ -1141,7 +1231,7 @@ module Daru
|
|
1141
1231
|
# row[:a] < 10
|
1142
1232
|
# end #=> true
|
1143
1233
|
def all? axis=:vector, &block
|
1144
|
-
if
|
1234
|
+
if %i[vector column].include?(axis)
|
1145
1235
|
@data.all?(&block)
|
1146
1236
|
elsif axis == :row
|
1147
1237
|
each_row.all?(&block)
|
@@ -1377,7 +1467,7 @@ module Daru
|
|
1377
1467
|
# df.rename_vectors :a => :alpha, :c => :gamma
|
1378
1468
|
# df.vectors.to_a #=> [:alpha, :b, :gamma]
|
1379
1469
|
def rename_vectors name_map
|
1380
|
-
existing_targets = name_map.
|
1470
|
+
existing_targets = name_map.reject { |k,v| k == v }.values & vectors.to_a
|
1381
1471
|
delete_vectors(*existing_targets)
|
1382
1472
|
|
1383
1473
|
new_names = vectors.to_a.map { |v| name_map[v] ? name_map[v] : v }
|
@@ -1408,19 +1498,16 @@ module Daru
|
|
1408
1498
|
Daru::DataFrame.new(arry, clone: cln, order: order, index: @index)
|
1409
1499
|
end
|
1410
1500
|
|
1411
|
-
# Generate a summary of this DataFrame
|
1412
|
-
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1418
|
-
|
1419
|
-
@vectors.each do |v|
|
1420
|
-
g.text "Element:[#{v}]"
|
1421
|
-
g.parse_element(self[v])
|
1422
|
-
end
|
1501
|
+
# Generate a summary of this DataFrame based on individual vectors in the DataFrame
|
1502
|
+
# @return [String] String containing the summary of the DataFrame
|
1503
|
+
def summary
|
1504
|
+
summary = "= #{name}"
|
1505
|
+
summary << "\n Number of rows: #{nrows}"
|
1506
|
+
@vectors.each do |v|
|
1507
|
+
summary << "\n Element:[#{v}]\n"
|
1508
|
+
summary << self[v].summary(1)
|
1423
1509
|
end
|
1510
|
+
summary
|
1424
1511
|
end
|
1425
1512
|
|
1426
1513
|
# Sorts a dataframe (ascending/descending) in the given pripority sequence of
|
@@ -1783,7 +1870,9 @@ module Daru
|
|
1783
1870
|
end
|
1784
1871
|
|
1785
1872
|
# Convert to html for IRuby.
|
1786
|
-
def to_html
|
1873
|
+
def to_html(threshold=30)
|
1874
|
+
table_thead = to_html_thead
|
1875
|
+
table_tbody = to_html_tbody(threshold)
|
1787
1876
|
path = if index.is_a?(MultiIndex)
|
1788
1877
|
File.expand_path('../iruby/templates/dataframe_mi.html.erb', __FILE__)
|
1789
1878
|
else
|
@@ -1792,8 +1881,28 @@ module Daru
|
|
1792
1881
|
ERB.new(File.read(path).strip).result(binding)
|
1793
1882
|
end
|
1794
1883
|
|
1884
|
+
def to_html_thead
|
1885
|
+
table_thead_path =
|
1886
|
+
if index.is_a?(MultiIndex)
|
1887
|
+
File.expand_path('../iruby/templates/dataframe_mi_thead.html.erb', __FILE__)
|
1888
|
+
else
|
1889
|
+
File.expand_path('../iruby/templates/dataframe_thead.html.erb', __FILE__)
|
1890
|
+
end
|
1891
|
+
ERB.new(File.read(table_thead_path).strip).result(binding)
|
1892
|
+
end
|
1893
|
+
|
1894
|
+
def to_html_tbody(threshold=30)
|
1895
|
+
table_tbody_path =
|
1896
|
+
if index.is_a?(MultiIndex)
|
1897
|
+
File.expand_path('../iruby/templates/dataframe_mi_tbody.html.erb', __FILE__)
|
1898
|
+
else
|
1899
|
+
File.expand_path('../iruby/templates/dataframe_tbody.html.erb', __FILE__)
|
1900
|
+
end
|
1901
|
+
ERB.new(File.read(table_tbody_path).strip).result(binding)
|
1902
|
+
end
|
1903
|
+
|
1795
1904
|
def to_s
|
1796
|
-
|
1905
|
+
"#<#{self.class}#{': ' + @name.to_s if @name}(#{nrows}x#{ncols})>"
|
1797
1906
|
end
|
1798
1907
|
|
1799
1908
|
# Method for updating the metadata (i.e. missing value positions) of the
|
@@ -1900,14 +2009,13 @@ module Daru
|
|
1900
2009
|
|
1901
2010
|
# Pretty print in a nice table format for the command line (irb/pry/iruby)
|
1902
2011
|
def inspect spacing=10, threshold=15
|
1903
|
-
row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
|
1904
2012
|
name_part = @name ? ": #{@name} " : ''
|
1905
2013
|
|
1906
2014
|
"#<#{self.class}#{name_part}(#{nrows}x#{ncols})>\n" +
|
1907
2015
|
Formatters::Table.format(
|
1908
2016
|
each_row.lazy,
|
1909
2017
|
row_headers: row_headers,
|
1910
|
-
headers:
|
2018
|
+
headers: headers,
|
1911
2019
|
threshold: threshold,
|
1912
2020
|
spacing: spacing
|
1913
2021
|
)
|
@@ -2002,8 +2110,32 @@ module Daru
|
|
2002
2110
|
end
|
2003
2111
|
end
|
2004
2112
|
|
2113
|
+
# returns array of row tuples at given index(s)
|
2114
|
+
def access_row_tuples_by_indexs *indexes
|
2115
|
+
positions = @index.pos(*indexes)
|
2116
|
+
|
2117
|
+
return populate_row_for(positions) if positions.is_a? Numeric
|
2118
|
+
|
2119
|
+
res = []
|
2120
|
+
new_rows = @data.map { |vec| vec[*indexes] }
|
2121
|
+
indexes.each do |index|
|
2122
|
+
tuples = []
|
2123
|
+
new_rows.map { |row| tuples += [row[index]] }
|
2124
|
+
res << tuples
|
2125
|
+
end
|
2126
|
+
res
|
2127
|
+
end
|
2128
|
+
|
2005
2129
|
private
|
2006
2130
|
|
2131
|
+
def headers
|
2132
|
+
Daru::Index.new(Array(index.name) + @vectors.to_a)
|
2133
|
+
end
|
2134
|
+
|
2135
|
+
def row_headers
|
2136
|
+
index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
|
2137
|
+
end
|
2138
|
+
|
2007
2139
|
def convert_categorical_vectors names
|
2008
2140
|
names.map do |n|
|
2009
2141
|
next unless self[n].category?
|
@@ -2034,7 +2166,7 @@ module Daru
|
|
2034
2166
|
end
|
2035
2167
|
|
2036
2168
|
def dispatch_to_axis(axis, method, *args, &block)
|
2037
|
-
if
|
2169
|
+
if %i[vector column].include?(axis)
|
2038
2170
|
send("#{method}_vector", *args, &block)
|
2039
2171
|
elsif axis == :row
|
2040
2172
|
send("#{method}_row", *args, &block)
|
@@ -2044,7 +2176,7 @@ module Daru
|
|
2044
2176
|
end
|
2045
2177
|
|
2046
2178
|
def dispatch_to_axis_pl(axis, method, *args, &block)
|
2047
|
-
if
|
2179
|
+
if %i[vector column].include?(axis)
|
2048
2180
|
send("#{method}_vectors", *args, &block)
|
2049
2181
|
elsif axis == :row
|
2050
2182
|
send("#{method}_rows", *args, &block)
|
@@ -2053,7 +2185,7 @@ module Daru
|
|
2053
2185
|
end
|
2054
2186
|
end
|
2055
2187
|
|
2056
|
-
AXES = [
|
2188
|
+
AXES = %i[row vector].freeze
|
2057
2189
|
|
2058
2190
|
def extract_axis names, default=:vector
|
2059
2191
|
if AXES.include?(names.last)
|
@@ -2065,7 +2197,7 @@ module Daru
|
|
2065
2197
|
|
2066
2198
|
def access_vector *names
|
2067
2199
|
if names.first.is_a?(Range)
|
2068
|
-
dup(@vectors
|
2200
|
+
dup(@vectors.subset(names.first))
|
2069
2201
|
elsif @vectors.is_a?(MultiIndex)
|
2070
2202
|
access_vector_multi_index(*names)
|
2071
2203
|
else
|
@@ -2087,14 +2219,18 @@ module Daru
|
|
2087
2219
|
|
2088
2220
|
def access_vector_single_index *names
|
2089
2221
|
if names.count < 2
|
2090
|
-
|
2222
|
+
begin
|
2223
|
+
pos = @vectors.is_a?(Daru::DateTimeIndex) ? @vectors[names.first] : @vectors.pos(names.first)
|
2224
|
+
rescue IndexError
|
2225
|
+
raise IndexError, "Specified vector #{names.first} does not exist"
|
2226
|
+
end
|
2091
2227
|
|
2092
2228
|
return @data[pos] if pos.is_a?(Numeric)
|
2093
2229
|
|
2094
2230
|
names = pos
|
2095
2231
|
end
|
2096
2232
|
|
2097
|
-
new_vectors = names.map { |name| [name, @data[@vectors
|
2233
|
+
new_vectors = names.map { |name| [name, @data[@vectors.pos(name)]] }.to_h
|
2098
2234
|
|
2099
2235
|
order = names.is_a?(Array) ? Daru::Index.new(names) : names
|
2100
2236
|
Daru::DataFrame.new(new_vectors, order: order,
|
@@ -2126,7 +2262,7 @@ module Daru
|
|
2126
2262
|
if @index.empty?
|
2127
2263
|
insert_vector_in_empty name, vector
|
2128
2264
|
else
|
2129
|
-
vec =
|
2265
|
+
vec = prepare_for_insert name, vector
|
2130
2266
|
|
2131
2267
|
assign_or_add_vector name, vec
|
2132
2268
|
end
|
@@ -2173,25 +2309,35 @@ module Daru
|
|
2173
2309
|
@data.map! { |v| v.empty? ? v.reindex(@index) : v }
|
2174
2310
|
end
|
2175
2311
|
|
2176
|
-
def
|
2177
|
-
if
|
2178
|
-
|
2179
|
-
|
2180
|
-
|
2181
|
-
Daru::Vector.new([], name: coerce_name(name), index: @index).tap { |v|
|
2182
|
-
@index.each do |idx|
|
2183
|
-
v[idx] = vector.index.include?(idx) ? vector[idx] : nil
|
2184
|
-
end
|
2185
|
-
}
|
2312
|
+
def prepare_for_insert name, arg
|
2313
|
+
if arg.is_a? Daru::Vector
|
2314
|
+
prepare_vector_for_insert name, arg
|
2315
|
+
elsif arg.respond_to?(:to_a)
|
2316
|
+
prepare_enum_for_insert name, arg
|
2186
2317
|
else
|
2187
|
-
|
2188
|
-
|
2189
|
-
|
2190
|
-
|
2318
|
+
prepare_value_for_insert name, arg
|
2319
|
+
end
|
2320
|
+
end
|
2321
|
+
|
2322
|
+
def prepare_vector_for_insert name, vector
|
2323
|
+
# so that index-by-index assignment is avoided when possible.
|
2324
|
+
return vector.dup if vector.index == @index
|
2325
|
+
Daru::Vector.new([], name: coerce_name(name), index: @index).tap { |v|
|
2326
|
+
@index.each do |idx|
|
2327
|
+
v[idx] = vector.index.include?(idx) ? vector[idx] : nil
|
2191
2328
|
end
|
2329
|
+
}
|
2330
|
+
end
|
2192
2331
|
|
2193
|
-
|
2332
|
+
def prepare_enum_for_insert name, enum
|
2333
|
+
if @size != enum.size
|
2334
|
+
raise "Specified vector of length #{enum.size} cannot be inserted in DataFrame of size #{@size}"
|
2194
2335
|
end
|
2336
|
+
Daru::Vector.new(enum, name: coerce_name(name), index: @index)
|
2337
|
+
end
|
2338
|
+
|
2339
|
+
def prepare_value_for_insert name, value
|
2340
|
+
Daru::Vector.new(Array(value) * @size, name: coerce_name(name), index: @index)
|
2195
2341
|
end
|
2196
2342
|
|
2197
2343
|
def insert_or_modify_row indexes, vector
|
@@ -2276,8 +2422,10 @@ module Daru
|
|
2276
2422
|
|
2277
2423
|
case source.first
|
2278
2424
|
when Array
|
2425
|
+
vectors ||= (0..source.size-1).to_a
|
2279
2426
|
initialize_from_array_of_arrays source, vectors, index, opts
|
2280
2427
|
when Vector
|
2428
|
+
vectors ||= (0..source.size-1).to_a
|
2281
2429
|
initialize_from_array_of_vectors source, vectors, index, opts
|
2282
2430
|
when Hash
|
2283
2431
|
initialize_from_array_of_hashes source, vectors, index, opts
|
data/lib/daru/date_time/index.rb
CHANGED
@@ -226,7 +226,7 @@ module Daru
|
|
226
226
|
to_a.each(&block)
|
227
227
|
end
|
228
228
|
|
229
|
-
attr_reader :frequency, :offset, :periods
|
229
|
+
attr_reader :frequency, :offset, :periods, :keys
|
230
230
|
|
231
231
|
# Create a DateTimeIndex with or without a frequency in data. The constructor
|
232
232
|
# should be used for creating DateTimeIndex by directly passing in DateTime
|
@@ -253,6 +253,7 @@ module Daru
|
|
253
253
|
# DateTime.new(2012,4,11), DateTime.new(2012,4,12)], freq: :infer)
|
254
254
|
# #=>#<DateTimeIndex:84198340 offset=D periods=8 data=[2012-04-05T00:00:00+00:00...2012-04-12T00:00:00+00:00]>
|
255
255
|
def initialize data, opts={freq: nil}
|
256
|
+
super data
|
256
257
|
Helper.possibly_convert_to_date_time data
|
257
258
|
|
258
259
|
@offset =
|
@@ -405,7 +406,7 @@ module Daru
|
|
405
406
|
@data
|
406
407
|
else
|
407
408
|
@data.sort_by(&:last)
|
408
|
-
end.transpose.first
|
409
|
+
end.transpose.first || []
|
409
410
|
end
|
410
411
|
|
411
412
|
# Size of index.
|
@@ -419,6 +420,7 @@ module Daru
|
|
419
420
|
|
420
421
|
def inspect
|
421
422
|
meta = [@periods, @frequency ? "frequency=#{@frequency}" : nil].compact.join(', ')
|
423
|
+
return "#<#{self.class}(#{meta})>" if @data.empty?
|
422
424
|
"#<#{self.class}(#{meta}) " \
|
423
425
|
"#{@data.first[0]}...#{@data.last[0]}>"
|
424
426
|
end
|
@@ -490,7 +492,7 @@ module Daru
|
|
490
492
|
# @return [Array<Integer>] Array containing minutes of each index.
|
491
493
|
# @!method sec
|
492
494
|
# @return [Array<Integer>] Array containing seconds of each index.
|
493
|
-
[
|
495
|
+
%i[year month day hour min sec].each do |meth|
|
494
496
|
define_method(meth) do
|
495
497
|
each_with_object([]) do |d, arr|
|
496
498
|
arr << d.send(meth)
|
@@ -528,7 +530,7 @@ module Daru
|
|
528
530
|
slice first, last
|
529
531
|
end
|
530
532
|
|
531
|
-
def slice_between_dates first, last # rubocop:disable Metrics/AbcSize
|
533
|
+
def slice_between_dates first, last # rubocop:disable Metrics/AbcSize,Metrics/PerceivedComplexity
|
532
534
|
# about that ^ disable: I'm waiting for cleaner understanding
|
533
535
|
# of offsets logic. Reference: https://github.com/v0dro/daru/commit/7e1c34aec9516a9ba33037b4a1daaaaf1de0726a#diff-a95ef410a8e1f4ea3cc48d231bb880faR250
|
534
536
|
start = @data.bsearch { |d| d[0] >= first }
|
@@ -542,7 +544,7 @@ module Daru
|
|
542
544
|
st = @data.index(start)
|
543
545
|
en = after_en ? @data.index(after_en) - 1 : Helper.last_date(@data)[1]
|
544
546
|
return start[1] if st == en
|
545
|
-
DateTimeIndex.new(@data[st..en].transpose[0])
|
547
|
+
DateTimeIndex.new(@data[st..en].transpose[0] || []) # empty slice guard
|
546
548
|
end
|
547
549
|
end
|
548
550
|
|