daru 0.1.3.1 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rspec +2 -1
- data/.rspec_formatter.rb +33 -0
- data/.rubocop.yml +26 -2
- data/History.md +38 -0
- data/README.md +22 -13
- data/Rakefile +50 -2
- data/benchmarks/csv_reading.rb +22 -0
- data/daru.gemspec +9 -2
- data/lib/daru.rb +36 -4
- data/lib/daru/accessors/array_wrapper.rb +6 -1
- data/lib/daru/accessors/dataframe_by_row.rb +10 -2
- data/lib/daru/accessors/gsl_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
- data/lib/daru/category.rb +935 -0
- data/lib/daru/core/group_by.rb +29 -38
- data/lib/daru/core/merge.rb +186 -145
- data/lib/daru/core/query.rb +22 -11
- data/lib/daru/dataframe.rb +976 -885
- data/lib/daru/date_time/index.rb +166 -166
- data/lib/daru/date_time/offsets.rb +66 -77
- data/lib/daru/formatters/table.rb +54 -0
- data/lib/daru/helpers/array.rb +40 -0
- data/lib/daru/index.rb +476 -73
- data/lib/daru/io/io.rb +66 -45
- data/lib/daru/io/sql_data_source.rb +33 -62
- data/lib/daru/iruby/helpers.rb +38 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
- data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru/iruby/templates/vector.html.erb +27 -0
- data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
- data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
- data/lib/daru/maths/arithmetic/vector.rb +4 -6
- data/lib/daru/maths/statistics/dataframe.rb +8 -15
- data/lib/daru/maths/statistics/vector.rb +120 -98
- data/lib/daru/monkeys.rb +12 -40
- data/lib/daru/plotting/gruff.rb +3 -0
- data/lib/daru/plotting/gruff/category.rb +49 -0
- data/lib/daru/plotting/gruff/dataframe.rb +91 -0
- data/lib/daru/plotting/gruff/vector.rb +57 -0
- data/lib/daru/plotting/nyaplot.rb +3 -0
- data/lib/daru/plotting/nyaplot/category.rb +34 -0
- data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
- data/lib/daru/plotting/nyaplot/vector.rb +46 -0
- data/lib/daru/vector.rb +694 -421
- data/lib/daru/version.rb +1 -1
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/spec/accessors/wrappers_spec.rb +2 -4
- data/spec/categorical_spec.rb +1734 -0
- data/spec/core/group_by_spec.rb +52 -2
- data/spec/core/merge_spec.rb +63 -2
- data/spec/core/query_spec.rb +236 -80
- data/spec/dataframe_spec.rb +1373 -79
- data/spec/date_time/data_spec.rb +3 -5
- data/spec/date_time/index_spec.rb +154 -17
- data/spec/date_time/offsets_spec.rb +3 -4
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/formatters/table_formatter_spec.rb +99 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +168 -0
- data/spec/index/index_spec.rb +283 -0
- data/spec/index/multi_index_spec.rb +570 -0
- data/spec/io/io_spec.rb +31 -4
- data/spec/io/sql_data_source_spec.rb +0 -1
- data/spec/iruby/dataframe_spec.rb +172 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +107 -0
- data/spec/math/arithmetic/dataframe_spec.rb +71 -13
- data/spec/math/arithmetic/vector_spec.rb +8 -10
- data/spec/math/statistics/dataframe_spec.rb +3 -5
- data/spec/math/statistics/vector_spec.rb +45 -55
- data/spec/monkeys_spec.rb +32 -9
- data/spec/plotting/dataframe_spec.rb +386 -0
- data/spec/plotting/vector_spec.rb +230 -0
- data/spec/shared/vector_display_spec.rb +215 -0
- data/spec/spec_helper.rb +23 -0
- data/spec/vector_spec.rb +905 -138
- metadata +143 -11
- data/.rubocop_todo.yml +0 -44
- data/lib/daru/plotting/dataframe.rb +0 -104
- data/lib/daru/plotting/vector.rb +0 -38
- data/spec/daru_spec.rb +0 -58
- data/spec/index_spec.rb +0 -375
data/lib/daru/io/io.rb
CHANGED
@@ -4,14 +4,32 @@ module Daru
|
|
4
4
|
def process_row(row,empty)
|
5
5
|
row.to_a.map do |c|
|
6
6
|
if empty.include?(c)
|
7
|
+
# FIXME: As far as I can guess, it will never work.
|
8
|
+
# It is called only inside `from_plaintext`, and there
|
9
|
+
# data is splitted by `\s+` -- there is no chance that
|
10
|
+
# "empty" (currently just '') will be between data?..
|
7
11
|
nil
|
8
|
-
elsif c.is_a?(String) && c.is_number?
|
9
|
-
c =~ /^\d+$/ ? c.to_i : c.tr(',','.').to_f
|
10
12
|
else
|
11
|
-
c
|
13
|
+
try_string_to_number(c)
|
12
14
|
end
|
13
15
|
end
|
14
16
|
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
INT_PATTERN = /^[-+]?\d+$/
|
21
|
+
FLOAT_PATTERN = /^[-+]?\d+[,.]?\d*(e-?\d+)?$/
|
22
|
+
|
23
|
+
def try_string_to_number(s)
|
24
|
+
case s
|
25
|
+
when INT_PATTERN
|
26
|
+
s.to_i
|
27
|
+
when FLOAT_PATTERN
|
28
|
+
s.tr(',', '.').to_f
|
29
|
+
else
|
30
|
+
s
|
31
|
+
end
|
32
|
+
end
|
15
33
|
end
|
16
34
|
end
|
17
35
|
|
@@ -27,7 +45,7 @@ module Daru
|
|
27
45
|
worksheet_id = opts[:worksheet_id]
|
28
46
|
book = Spreadsheet.open path
|
29
47
|
worksheet = book.worksheet worksheet_id
|
30
|
-
headers = worksheet.row(0).
|
48
|
+
headers = ArrayHelper.recode_repeated(worksheet.row(0)).map(&:to_sym)
|
31
49
|
|
32
50
|
df = Daru::DataFrame.new({})
|
33
51
|
headers.each_with_index do |h,i|
|
@@ -57,46 +75,18 @@ module Daru
|
|
57
75
|
|
58
76
|
# Functions for loading/writing CSV files
|
59
77
|
def from_csv path, opts={}
|
60
|
-
opts
|
61
|
-
opts[:converters] ||= :numeric
|
62
|
-
|
63
|
-
daru_options = opts.keys.each_with_object({}) do |k, hash|
|
64
|
-
if [:clone, :order, :index, :name].include?(k)
|
65
|
-
hash[k] = opts[k]
|
66
|
-
opts.delete k
|
67
|
-
end
|
68
|
-
end
|
78
|
+
daru_options, opts = from_csv_prepare_opts opts
|
69
79
|
|
70
80
|
# Preprocess headers for detecting and correcting repetition in
|
71
81
|
# case the :headers option is not specified.
|
72
|
-
|
73
|
-
opts[:
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
hsh = {}
|
79
|
-
csv.by_col.each do |col_name, values|
|
80
|
-
hsh[col_name] = values
|
81
|
-
end
|
82
|
-
else
|
83
|
-
csv = ::CSV.open(path, 'rb', opts)
|
84
|
-
yield csv if block_given?
|
85
|
-
|
86
|
-
csv_as_arrays = csv.to_a
|
87
|
-
headers = csv_as_arrays[0].recode_repeated.map
|
88
|
-
csv_as_arrays.delete_at 0
|
89
|
-
csv_as_arrays = csv_as_arrays.transpose
|
90
|
-
|
91
|
-
hsh = {}
|
92
|
-
headers.each_with_index do |h, i|
|
93
|
-
hsh[h] = csv_as_arrays[i]
|
82
|
+
hsh =
|
83
|
+
if opts[:headers]
|
84
|
+
from_csv_hash_with_headers(path, opts)
|
85
|
+
else
|
86
|
+
from_csv_hash(path, opts)
|
87
|
+
.tap { |hash| daru_options[:order] = hash.keys }
|
94
88
|
end
|
95
89
|
|
96
|
-
# Order columns as given in CSV
|
97
|
-
daru_options[:order] = headers.to_a
|
98
|
-
end
|
99
|
-
|
100
90
|
Daru::DataFrame.new(hsh,daru_options)
|
101
91
|
end
|
102
92
|
|
@@ -154,12 +144,7 @@ module Daru
|
|
154
144
|
fields = fields.map(&:to_sym)
|
155
145
|
end
|
156
146
|
|
157
|
-
vectors =
|
158
|
-
[
|
159
|
-
name,
|
160
|
-
Daru::Vector.new([]).tap { |v| v.rename name }
|
161
|
-
]
|
162
|
-
}.flatten]
|
147
|
+
vectors = fields.map { |name| [name, Daru::Vector.new([], name: name)] }.to_h
|
163
148
|
|
164
149
|
Daru::DataFrame.new(vectors, order: fields).tap do |df|
|
165
150
|
relation.pluck(*fields).each do |record|
|
@@ -200,6 +185,42 @@ module Daru
|
|
200
185
|
false
|
201
186
|
end
|
202
187
|
end
|
188
|
+
|
189
|
+
private
|
190
|
+
|
191
|
+
DARU_OPT_KEYS = [:clone, :order, :index, :name].freeze
|
192
|
+
|
193
|
+
def from_csv_prepare_opts opts
|
194
|
+
opts[:col_sep] ||= ','
|
195
|
+
opts[:converters] ||= :numeric
|
196
|
+
|
197
|
+
daru_options = opts.keys.each_with_object({}) do |k, hash|
|
198
|
+
hash[k] = opts.delete(k) if DARU_OPT_KEYS.include?(k)
|
199
|
+
end
|
200
|
+
[daru_options, opts]
|
201
|
+
end
|
202
|
+
|
203
|
+
def from_csv_hash_with_headers(path, opts)
|
204
|
+
opts[:header_converters] ||= :symbol
|
205
|
+
|
206
|
+
::CSV
|
207
|
+
.read(path, 'rb',opts)
|
208
|
+
.tap { |c| yield c if block_given? }
|
209
|
+
.by_col.map { |col_name, values| [col_name, values] }.to_h
|
210
|
+
end
|
211
|
+
|
212
|
+
def from_csv_hash(path, opts)
|
213
|
+
csv_as_arrays =
|
214
|
+
::CSV
|
215
|
+
.open(path, 'rb', opts)
|
216
|
+
.tap { |c| yield c if block_given? }
|
217
|
+
.to_a
|
218
|
+
|
219
|
+
headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
|
220
|
+
csv_as_arrays = csv_as_arrays.transpose
|
221
|
+
|
222
|
+
headers.each_with_index.map { |h, i| [h, csv_as_arrays[i]] }.to_h
|
223
|
+
end
|
203
224
|
end
|
204
225
|
end
|
205
226
|
end
|
@@ -1,55 +1,52 @@
|
|
1
1
|
module Daru
|
2
2
|
module IO
|
3
3
|
class SqlDataSource
|
4
|
-
# Private adapter class for DBI::DatabaseHandle
|
5
4
|
# @private
|
6
|
-
class
|
7
|
-
def initialize(
|
8
|
-
@
|
5
|
+
class Adapter
|
6
|
+
def initialize(conn, query)
|
7
|
+
@conn = conn
|
9
8
|
@query = query
|
10
9
|
end
|
11
10
|
|
12
|
-
def
|
13
|
-
|
14
|
-
|
15
|
-
|
11
|
+
def result_hash
|
12
|
+
column_names
|
13
|
+
.map(&:to_sym)
|
14
|
+
.zip(rows.transpose)
|
15
|
+
.to_h
|
16
16
|
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Private adapter class for DBI::DatabaseHandle
|
20
|
+
# @private
|
21
|
+
class DbiAdapter < Adapter
|
22
|
+
private
|
17
23
|
|
18
|
-
def
|
19
|
-
result.
|
20
|
-
yield(row.to_a)
|
21
|
-
end
|
24
|
+
def column_names
|
25
|
+
result.column_names
|
22
26
|
end
|
23
27
|
|
24
|
-
|
28
|
+
def rows
|
29
|
+
result.to_a.map(&:to_a)
|
30
|
+
end
|
25
31
|
|
26
32
|
def result
|
27
|
-
@result ||= @
|
33
|
+
@result ||= @conn.execute(@query)
|
28
34
|
end
|
29
35
|
end
|
30
36
|
|
31
37
|
# Private adapter class for connections of ActiveRecord
|
32
38
|
# @private
|
33
|
-
class ActiveRecordConnectionAdapter
|
34
|
-
|
35
|
-
@conn = conn
|
36
|
-
@query = query
|
37
|
-
end
|
39
|
+
class ActiveRecordConnectionAdapter < Adapter
|
40
|
+
private
|
38
41
|
|
39
|
-
def
|
40
|
-
result.columns
|
41
|
-
yield(column_name.to_sym)
|
42
|
-
end
|
42
|
+
def column_names
|
43
|
+
result.columns
|
43
44
|
end
|
44
45
|
|
45
|
-
def
|
46
|
-
result.
|
47
|
-
yield(row.values)
|
48
|
-
end
|
46
|
+
def rows
|
47
|
+
result.cast_values
|
49
48
|
end
|
50
49
|
|
51
|
-
private
|
52
|
-
|
53
50
|
def result
|
54
51
|
@result ||= @conn.exec_query(@query)
|
55
52
|
end
|
@@ -67,50 +64,24 @@ module Daru
|
|
67
64
|
end
|
68
65
|
|
69
66
|
def make_dataframe
|
70
|
-
|
71
|
-
fields = []
|
72
|
-
@adapter.each_column_name do |column_name|
|
73
|
-
vectors[column_name] = Daru::Vector.new([])
|
74
|
-
vectors[column_name].rename column_name
|
75
|
-
fields.push column_name
|
76
|
-
end
|
77
|
-
|
78
|
-
df = Daru::DataFrame.new(vectors, order: fields)
|
79
|
-
@adapter.each_row do |row|
|
80
|
-
df.add_row(row)
|
81
|
-
end
|
82
|
-
|
83
|
-
df.update
|
84
|
-
|
85
|
-
df
|
67
|
+
Daru::DataFrame.new(@adapter.result_hash).tap(&:update)
|
86
68
|
end
|
87
69
|
|
88
70
|
private
|
89
71
|
|
90
72
|
def init_adapter(db, query)
|
91
|
-
|
92
|
-
|
93
|
-
rescue
|
94
|
-
raise ArgumentError, 'query must be a string'
|
95
|
-
end
|
73
|
+
query = String.try_convert(query) or
|
74
|
+
raise ArgumentError, "Query must be a string, #{query.class} received"
|
96
75
|
|
97
|
-
case
|
98
|
-
when
|
76
|
+
case db
|
77
|
+
when DBI::DatabaseHandle
|
99
78
|
DbiAdapter.new(db, query)
|
100
|
-
when
|
79
|
+
when ActiveRecord::ConnectionAdapters::AbstractAdapter
|
101
80
|
ActiveRecordConnectionAdapter.new(db, query)
|
102
81
|
else
|
103
|
-
raise ArgumentError,
|
82
|
+
raise ArgumentError, "Unknown database adapter type #{db.class}"
|
104
83
|
end
|
105
84
|
end
|
106
|
-
|
107
|
-
def check_dbi(obj)
|
108
|
-
obj.is_a?(DBI::DatabaseHandle)
|
109
|
-
end
|
110
|
-
|
111
|
-
def check_active_record_connection(obj)
|
112
|
-
obj.is_a?(ActiveRecord::ConnectionAdapters::AbstractAdapter)
|
113
|
-
end
|
114
85
|
end
|
115
86
|
end
|
116
87
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Daru
|
2
|
+
# @private
|
3
|
+
module IRuby
|
4
|
+
module Helpers
|
5
|
+
module_function
|
6
|
+
|
7
|
+
def tuples_with_rowspans(index)
|
8
|
+
index.sparse_tuples.transpose
|
9
|
+
.map { |r| nils_counted(r) }
|
10
|
+
.transpose.map(&:compact)
|
11
|
+
end
|
12
|
+
|
13
|
+
def tuples_with_colspans(index)
|
14
|
+
index.sparse_tuples.transpose
|
15
|
+
.map { |r| nils_counted(r) }
|
16
|
+
.map(&:compact)
|
17
|
+
end
|
18
|
+
|
19
|
+
# It is complicated, but the only algo I could think of.
|
20
|
+
# It does [:a, nil, nil, :b, nil, :c] # =>
|
21
|
+
# [[:a,3], nil, nil, [:b,2], nil, :c]
|
22
|
+
# Needed by tuples_with_colspans/rowspans, which we need for pretty HTML
|
23
|
+
def nils_counted array
|
24
|
+
grouped = [[array.first]]
|
25
|
+
array[1..-1].each do |val|
|
26
|
+
if val
|
27
|
+
grouped << [val]
|
28
|
+
else
|
29
|
+
grouped.last << val
|
30
|
+
end
|
31
|
+
end
|
32
|
+
grouped.flat_map { |items|
|
33
|
+
[[items.first, items.count], *[nil] * (items.count - 1)]
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
<table>
|
2
|
+
<tr>
|
3
|
+
<th colspan='<%= @vectors.size+1 %>'>Daru::DataFrame<%= name ? ": #{name} " : ''%>(<%=nrows%>x<%=ncols%>)</th>
|
4
|
+
</tr>
|
5
|
+
|
6
|
+
<% if @vectors.is_a? MultiIndex %>
|
7
|
+
<% Daru::IRuby::Helpers.tuples_with_colspans(@vectors).each_with_index do |tuple, idx| %>
|
8
|
+
<tr>
|
9
|
+
<% if idx.zero? %>
|
10
|
+
<th rowspan="<%= @vectors.width %>"></th>
|
11
|
+
<% end %>
|
12
|
+
<% tuple.each do |idx, span| %>
|
13
|
+
<th colspan="<%= span %>"><%= idx %></th>
|
14
|
+
<% end %>
|
15
|
+
</tr>
|
16
|
+
<% end %>
|
17
|
+
<% else %>
|
18
|
+
<tr>
|
19
|
+
<th></th>
|
20
|
+
<% @vectors.each do |vector| %>
|
21
|
+
<th><%=vector%></th>
|
22
|
+
<% end %>
|
23
|
+
</tr>
|
24
|
+
<% end %>
|
25
|
+
|
26
|
+
<% @index.first(threshold).each_with_index do |index, pos| %>
|
27
|
+
<tr>
|
28
|
+
<td><%= index %></td>
|
29
|
+
<% row.at(pos).each do |element| %>
|
30
|
+
<td><%= element.to_s %></td>
|
31
|
+
<% end %>
|
32
|
+
</tr>
|
33
|
+
<% end %>
|
34
|
+
|
35
|
+
<% if nrows > threshold %>
|
36
|
+
<tr>
|
37
|
+
<% (@vectors.size + 1).times do %>
|
38
|
+
<td>...</td>
|
39
|
+
<% end %>
|
40
|
+
</tr>
|
41
|
+
|
42
|
+
<% last_index = @index.to_a.last
|
43
|
+
last_row = row_at @index.size-1 %>
|
44
|
+
|
45
|
+
<tr>
|
46
|
+
<td><%= last_index %></td>
|
47
|
+
<% last_row.each do |element| %>
|
48
|
+
<td><%= element.to_s %></td>
|
49
|
+
<% end %>
|
50
|
+
</tr>
|
51
|
+
<% end %>
|
52
|
+
</table>
|
@@ -0,0 +1,58 @@
|
|
1
|
+
<table>
|
2
|
+
<tr>
|
3
|
+
<th colspan='<%= @vectors.size+index.width %>'>Daru::DataFrame<%= name ? ": #{name} " : ''%>(<%=nrows%>x<%=ncols%>)</th>
|
4
|
+
</tr>
|
5
|
+
<% if @vectors.is_a? MultiIndex %>
|
6
|
+
<% Daru::IRuby::Helpers.tuples_with_colspans(@vectors).each_with_index do |tuple, idx| %>
|
7
|
+
<tr>
|
8
|
+
<% if idx.zero? %>
|
9
|
+
<th colspan="<%= index.width %>" rowspan="<%= @vectors.width %>"></th>
|
10
|
+
<% end %>
|
11
|
+
<% tuple.each do |idx, span| %>
|
12
|
+
<th colspan="<%= span %>"><%= idx %></th>
|
13
|
+
<% end %>
|
14
|
+
</tr>
|
15
|
+
<% end %>
|
16
|
+
<% else %>
|
17
|
+
<tr>
|
18
|
+
<th colspan="<%= index.width %>"></th>
|
19
|
+
<% @vectors.each do |vector| %>
|
20
|
+
<th><%=vector%></th>
|
21
|
+
<% end %>
|
22
|
+
</tr>
|
23
|
+
<% end %>
|
24
|
+
|
25
|
+
<% Daru::IRuby::Helpers.tuples_with_rowspans(@index).first(threshold).zip(@index.to_a).each do |tuple, index| %>
|
26
|
+
<tr>
|
27
|
+
<% tuple.each do |idx, span| %>
|
28
|
+
<th rowspan="<%= span %>"><%= idx %></th>
|
29
|
+
<% end %>
|
30
|
+
<% row[index].each do |element| %>
|
31
|
+
<td><%= element.to_s %></td>
|
32
|
+
<% end %>
|
33
|
+
</tr>
|
34
|
+
<% end %>
|
35
|
+
|
36
|
+
<% if nrows > threshold %>
|
37
|
+
<tr>
|
38
|
+
<% index.width.times do %>
|
39
|
+
<th>...</th>
|
40
|
+
<% end %>
|
41
|
+
<% @vectors.size.times do %>
|
42
|
+
<td>...</td>
|
43
|
+
<% end %>
|
44
|
+
</tr>
|
45
|
+
|
46
|
+
<% last_index = @index.to_a.last
|
47
|
+
last_row = row[last_index] %>
|
48
|
+
|
49
|
+
<tr>
|
50
|
+
<% last_index.each do |idx| %>
|
51
|
+
<th><%= idx %></td>
|
52
|
+
<% end %>
|
53
|
+
<% last_row.each do |element| %>
|
54
|
+
<td><%= element.to_s %></td>
|
55
|
+
<% end %>
|
56
|
+
</tr>
|
57
|
+
<% end %>
|
58
|
+
</table>
|
@@ -0,0 +1,12 @@
|
|
1
|
+
<table>
|
2
|
+
<tr>
|
3
|
+
<th colspan="<%= width %>">Daru::MultiIndex(<%= size %>x<%= width %>)</th>
|
4
|
+
</tr>
|
5
|
+
<% Daru::IRuby::Helpers.tuples_with_rowspans(self).each do |row| %>
|
6
|
+
<tr>
|
7
|
+
<% row.each do |val, span| %>
|
8
|
+
<th rowspan="<%= span %>"><%= val %></th>
|
9
|
+
<% end %>
|
10
|
+
</tr>
|
11
|
+
<% end %>
|
12
|
+
</table>
|