daru 0.1.3.1 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -4,14 +4,32 @@ module Daru
4
4
  def process_row(row,empty)
5
5
  row.to_a.map do |c|
6
6
  if empty.include?(c)
7
+ # FIXME: As far as I can guess, it will never work.
8
+ # It is called only inside `from_plaintext`, and there
9
+ # data is splitted by `\s+` -- there is no chance that
10
+ # "empty" (currently just '') will be between data?..
7
11
  nil
8
- elsif c.is_a?(String) && c.is_number?
9
- c =~ /^\d+$/ ? c.to_i : c.tr(',','.').to_f
10
12
  else
11
- c
13
+ try_string_to_number(c)
12
14
  end
13
15
  end
14
16
  end
17
+
18
+ private
19
+
20
+ INT_PATTERN = /^[-+]?\d+$/
21
+ FLOAT_PATTERN = /^[-+]?\d+[,.]?\d*(e-?\d+)?$/
22
+
23
+ def try_string_to_number(s)
24
+ case s
25
+ when INT_PATTERN
26
+ s.to_i
27
+ when FLOAT_PATTERN
28
+ s.tr(',', '.').to_f
29
+ else
30
+ s
31
+ end
32
+ end
15
33
  end
16
34
  end
17
35
 
@@ -27,7 +45,7 @@ module Daru
27
45
  worksheet_id = opts[:worksheet_id]
28
46
  book = Spreadsheet.open path
29
47
  worksheet = book.worksheet worksheet_id
30
- headers = worksheet.row(0).recode_repeated.map(&:to_sym)
48
+ headers = ArrayHelper.recode_repeated(worksheet.row(0)).map(&:to_sym)
31
49
 
32
50
  df = Daru::DataFrame.new({})
33
51
  headers.each_with_index do |h,i|
@@ -57,46 +75,18 @@ module Daru
57
75
 
58
76
  # Functions for loading/writing CSV files
59
77
  def from_csv path, opts={}
60
- opts[:col_sep] ||= ','
61
- opts[:converters] ||= :numeric
62
-
63
- daru_options = opts.keys.each_with_object({}) do |k, hash|
64
- if [:clone, :order, :index, :name].include?(k)
65
- hash[k] = opts[k]
66
- opts.delete k
67
- end
68
- end
78
+ daru_options, opts = from_csv_prepare_opts opts
69
79
 
70
80
  # Preprocess headers for detecting and correcting repetition in
71
81
  # case the :headers option is not specified.
72
- if opts[:headers]
73
- opts[:header_converters] ||= :symbol
74
-
75
- csv = ::CSV.read(path, 'rb',opts)
76
- yield csv if block_given?
77
-
78
- hsh = {}
79
- csv.by_col.each do |col_name, values|
80
- hsh[col_name] = values
81
- end
82
- else
83
- csv = ::CSV.open(path, 'rb', opts)
84
- yield csv if block_given?
85
-
86
- csv_as_arrays = csv.to_a
87
- headers = csv_as_arrays[0].recode_repeated.map
88
- csv_as_arrays.delete_at 0
89
- csv_as_arrays = csv_as_arrays.transpose
90
-
91
- hsh = {}
92
- headers.each_with_index do |h, i|
93
- hsh[h] = csv_as_arrays[i]
82
+ hsh =
83
+ if opts[:headers]
84
+ from_csv_hash_with_headers(path, opts)
85
+ else
86
+ from_csv_hash(path, opts)
87
+ .tap { |hash| daru_options[:order] = hash.keys }
94
88
  end
95
89
 
96
- # Order columns as given in CSV
97
- daru_options[:order] = headers.to_a
98
- end
99
-
100
90
  Daru::DataFrame.new(hsh,daru_options)
101
91
  end
102
92
 
@@ -154,12 +144,7 @@ module Daru
154
144
  fields = fields.map(&:to_sym)
155
145
  end
156
146
 
157
- vectors = Hash[*fields.map { |name|
158
- [
159
- name,
160
- Daru::Vector.new([]).tap { |v| v.rename name }
161
- ]
162
- }.flatten]
147
+ vectors = fields.map { |name| [name, Daru::Vector.new([], name: name)] }.to_h
163
148
 
164
149
  Daru::DataFrame.new(vectors, order: fields).tap do |df|
165
150
  relation.pluck(*fields).each do |record|
@@ -200,6 +185,42 @@ module Daru
200
185
  false
201
186
  end
202
187
  end
188
+
189
+ private
190
+
191
+ DARU_OPT_KEYS = [:clone, :order, :index, :name].freeze
192
+
193
+ def from_csv_prepare_opts opts
194
+ opts[:col_sep] ||= ','
195
+ opts[:converters] ||= :numeric
196
+
197
+ daru_options = opts.keys.each_with_object({}) do |k, hash|
198
+ hash[k] = opts.delete(k) if DARU_OPT_KEYS.include?(k)
199
+ end
200
+ [daru_options, opts]
201
+ end
202
+
203
+ def from_csv_hash_with_headers(path, opts)
204
+ opts[:header_converters] ||= :symbol
205
+
206
+ ::CSV
207
+ .read(path, 'rb',opts)
208
+ .tap { |c| yield c if block_given? }
209
+ .by_col.map { |col_name, values| [col_name, values] }.to_h
210
+ end
211
+
212
+ def from_csv_hash(path, opts)
213
+ csv_as_arrays =
214
+ ::CSV
215
+ .open(path, 'rb', opts)
216
+ .tap { |c| yield c if block_given? }
217
+ .to_a
218
+
219
+ headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
220
+ csv_as_arrays = csv_as_arrays.transpose
221
+
222
+ headers.each_with_index.map { |h, i| [h, csv_as_arrays[i]] }.to_h
223
+ end
203
224
  end
204
225
  end
205
226
  end
@@ -1,55 +1,52 @@
1
1
  module Daru
2
2
  module IO
3
3
  class SqlDataSource
4
- # Private adapter class for DBI::DatabaseHandle
5
4
  # @private
6
- class DbiAdapter
7
- def initialize(dbh, query)
8
- @dbh = dbh
5
+ class Adapter
6
+ def initialize(conn, query)
7
+ @conn = conn
9
8
  @query = query
10
9
  end
11
10
 
12
- def each_column_name
13
- result.column_names.each do |column_name|
14
- yield(column_name.to_sym)
15
- end
11
+ def result_hash
12
+ column_names
13
+ .map(&:to_sym)
14
+ .zip(rows.transpose)
15
+ .to_h
16
16
  end
17
+ end
18
+
19
+ # Private adapter class for DBI::DatabaseHandle
20
+ # @private
21
+ class DbiAdapter < Adapter
22
+ private
17
23
 
18
- def each_row
19
- result.fetch do |row|
20
- yield(row.to_a)
21
- end
24
+ def column_names
25
+ result.column_names
22
26
  end
23
27
 
24
- private
28
+ def rows
29
+ result.to_a.map(&:to_a)
30
+ end
25
31
 
26
32
  def result
27
- @result ||= @dbh.execute(@query)
33
+ @result ||= @conn.execute(@query)
28
34
  end
29
35
  end
30
36
 
31
37
  # Private adapter class for connections of ActiveRecord
32
38
  # @private
33
- class ActiveRecordConnectionAdapter
34
- def initialize(conn, query)
35
- @conn = conn
36
- @query = query
37
- end
39
+ class ActiveRecordConnectionAdapter < Adapter
40
+ private
38
41
 
39
- def each_column_name
40
- result.columns.each do |column_name|
41
- yield(column_name.to_sym)
42
- end
42
+ def column_names
43
+ result.columns
43
44
  end
44
45
 
45
- def each_row
46
- result.each do |row|
47
- yield(row.values)
48
- end
46
+ def rows
47
+ result.cast_values
49
48
  end
50
49
 
51
- private
52
-
53
50
  def result
54
51
  @result ||= @conn.exec_query(@query)
55
52
  end
@@ -67,50 +64,24 @@ module Daru
67
64
  end
68
65
 
69
66
  def make_dataframe
70
- vectors = {}
71
- fields = []
72
- @adapter.each_column_name do |column_name|
73
- vectors[column_name] = Daru::Vector.new([])
74
- vectors[column_name].rename column_name
75
- fields.push column_name
76
- end
77
-
78
- df = Daru::DataFrame.new(vectors, order: fields)
79
- @adapter.each_row do |row|
80
- df.add_row(row)
81
- end
82
-
83
- df.update
84
-
85
- df
67
+ Daru::DataFrame.new(@adapter.result_hash).tap(&:update)
86
68
  end
87
69
 
88
70
  private
89
71
 
90
72
  def init_adapter(db, query)
91
- begin
92
- query = query.to_str
93
- rescue
94
- raise ArgumentError, 'query must be a string'
95
- end
73
+ query = String.try_convert(query) or
74
+ raise ArgumentError, "Query must be a string, #{query.class} received"
96
75
 
97
- case
98
- when check_dbi(db)
76
+ case db
77
+ when DBI::DatabaseHandle
99
78
  DbiAdapter.new(db, query)
100
- when check_active_record_connection(db)
79
+ when ActiveRecord::ConnectionAdapters::AbstractAdapter
101
80
  ActiveRecordConnectionAdapter.new(db, query)
102
81
  else
103
- raise ArgumentError, 'unknown database type'
82
+ raise ArgumentError, "Unknown database adapter type #{db.class}"
104
83
  end
105
84
  end
106
-
107
- def check_dbi(obj)
108
- obj.is_a?(DBI::DatabaseHandle)
109
- end
110
-
111
- def check_active_record_connection(obj)
112
- obj.is_a?(ActiveRecord::ConnectionAdapters::AbstractAdapter)
113
- end
114
85
  end
115
86
  end
116
87
  end
@@ -0,0 +1,38 @@
1
+ module Daru
2
+ # @private
3
+ module IRuby
4
+ module Helpers
5
+ module_function
6
+
7
+ def tuples_with_rowspans(index)
8
+ index.sparse_tuples.transpose
9
+ .map { |r| nils_counted(r) }
10
+ .transpose.map(&:compact)
11
+ end
12
+
13
+ def tuples_with_colspans(index)
14
+ index.sparse_tuples.transpose
15
+ .map { |r| nils_counted(r) }
16
+ .map(&:compact)
17
+ end
18
+
19
+ # It is complicated, but the only algo I could think of.
20
+ # It does [:a, nil, nil, :b, nil, :c] # =>
21
+ # [[:a,3], nil, nil, [:b,2], nil, :c]
22
+ # Needed by tuples_with_colspans/rowspans, which we need for pretty HTML
23
+ def nils_counted array
24
+ grouped = [[array.first]]
25
+ array[1..-1].each do |val|
26
+ if val
27
+ grouped << [val]
28
+ else
29
+ grouped.last << val
30
+ end
31
+ end
32
+ grouped.flat_map { |items|
33
+ [[items.first, items.count], *[nil] * (items.count - 1)]
34
+ }
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,52 @@
1
+ <table>
2
+ <tr>
3
+ <th colspan='<%= @vectors.size+1 %>'>Daru::DataFrame<%= name ? ": #{name} " : ''%>(<%=nrows%>x<%=ncols%>)</th>
4
+ </tr>
5
+
6
+ <% if @vectors.is_a? MultiIndex %>
7
+ <% Daru::IRuby::Helpers.tuples_with_colspans(@vectors).each_with_index do |tuple, idx| %>
8
+ <tr>
9
+ <% if idx.zero? %>
10
+ <th rowspan="<%= @vectors.width %>"></th>
11
+ <% end %>
12
+ <% tuple.each do |idx, span| %>
13
+ <th colspan="<%= span %>"><%= idx %></th>
14
+ <% end %>
15
+ </tr>
16
+ <% end %>
17
+ <% else %>
18
+ <tr>
19
+ <th></th>
20
+ <% @vectors.each do |vector| %>
21
+ <th><%=vector%></th>
22
+ <% end %>
23
+ </tr>
24
+ <% end %>
25
+
26
+ <% @index.first(threshold).each_with_index do |index, pos| %>
27
+ <tr>
28
+ <td><%= index %></td>
29
+ <% row.at(pos).each do |element| %>
30
+ <td><%= element.to_s %></td>
31
+ <% end %>
32
+ </tr>
33
+ <% end %>
34
+
35
+ <% if nrows > threshold %>
36
+ <tr>
37
+ <% (@vectors.size + 1).times do %>
38
+ <td>...</td>
39
+ <% end %>
40
+ </tr>
41
+
42
+ <% last_index = @index.to_a.last
43
+ last_row = row_at @index.size-1 %>
44
+
45
+ <tr>
46
+ <td><%= last_index %></td>
47
+ <% last_row.each do |element| %>
48
+ <td><%= element.to_s %></td>
49
+ <% end %>
50
+ </tr>
51
+ <% end %>
52
+ </table>
@@ -0,0 +1,58 @@
1
+ <table>
2
+ <tr>
3
+ <th colspan='<%= @vectors.size+index.width %>'>Daru::DataFrame<%= name ? ": #{name} " : ''%>(<%=nrows%>x<%=ncols%>)</th>
4
+ </tr>
5
+ <% if @vectors.is_a? MultiIndex %>
6
+ <% Daru::IRuby::Helpers.tuples_with_colspans(@vectors).each_with_index do |tuple, idx| %>
7
+ <tr>
8
+ <% if idx.zero? %>
9
+ <th colspan="<%= index.width %>" rowspan="<%= @vectors.width %>"></th>
10
+ <% end %>
11
+ <% tuple.each do |idx, span| %>
12
+ <th colspan="<%= span %>"><%= idx %></th>
13
+ <% end %>
14
+ </tr>
15
+ <% end %>
16
+ <% else %>
17
+ <tr>
18
+ <th colspan="<%= index.width %>"></th>
19
+ <% @vectors.each do |vector| %>
20
+ <th><%=vector%></th>
21
+ <% end %>
22
+ </tr>
23
+ <% end %>
24
+
25
+ <% Daru::IRuby::Helpers.tuples_with_rowspans(@index).first(threshold).zip(@index.to_a).each do |tuple, index| %>
26
+ <tr>
27
+ <% tuple.each do |idx, span| %>
28
+ <th rowspan="<%= span %>"><%= idx %></th>
29
+ <% end %>
30
+ <% row[index].each do |element| %>
31
+ <td><%= element.to_s %></td>
32
+ <% end %>
33
+ </tr>
34
+ <% end %>
35
+
36
+ <% if nrows > threshold %>
37
+ <tr>
38
+ <% index.width.times do %>
39
+ <th>...</th>
40
+ <% end %>
41
+ <% @vectors.size.times do %>
42
+ <td>...</td>
43
+ <% end %>
44
+ </tr>
45
+
46
+ <% last_index = @index.to_a.last
47
+ last_row = row[last_index] %>
48
+
49
+ <tr>
50
+ <% last_index.each do |idx| %>
51
+ <th><%= idx %></td>
52
+ <% end %>
53
+ <% last_row.each do |element| %>
54
+ <td><%= element.to_s %></td>
55
+ <% end %>
56
+ </tr>
57
+ <% end %>
58
+ </table>
@@ -0,0 +1,12 @@
1
+ <table>
2
+ <tr>
3
+ <th colspan="<%= width %>">Daru::MultiIndex(<%= size %>x<%= width %>)</th>
4
+ </tr>
5
+ <% Daru::IRuby::Helpers.tuples_with_rowspans(self).each do |row| %>
6
+ <tr>
7
+ <% row.each do |val, span| %>
8
+ <th rowspan="<%= span %>"><%= val %></th>
9
+ <% end %>
10
+ </tr>
11
+ <% end %>
12
+ </table>