daru 0.1.3.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -4,14 +4,32 @@ module Daru
4
4
  def process_row(row,empty)
5
5
  row.to_a.map do |c|
6
6
  if empty.include?(c)
7
+ # FIXME: As far as I can guess, it will never work.
8
+ # It is called only inside `from_plaintext`, and there
9
+ # data is splitted by `\s+` -- there is no chance that
10
+ # "empty" (currently just '') will be between data?..
7
11
  nil
8
- elsif c.is_a?(String) && c.is_number?
9
- c =~ /^\d+$/ ? c.to_i : c.tr(',','.').to_f
10
12
  else
11
- c
13
+ try_string_to_number(c)
12
14
  end
13
15
  end
14
16
  end
17
+
18
+ private
19
+
20
+ INT_PATTERN = /^[-+]?\d+$/
21
+ FLOAT_PATTERN = /^[-+]?\d+[,.]?\d*(e-?\d+)?$/
22
+
23
+ def try_string_to_number(s)
24
+ case s
25
+ when INT_PATTERN
26
+ s.to_i
27
+ when FLOAT_PATTERN
28
+ s.tr(',', '.').to_f
29
+ else
30
+ s
31
+ end
32
+ end
15
33
  end
16
34
  end
17
35
 
@@ -27,7 +45,7 @@ module Daru
27
45
  worksheet_id = opts[:worksheet_id]
28
46
  book = Spreadsheet.open path
29
47
  worksheet = book.worksheet worksheet_id
30
- headers = worksheet.row(0).recode_repeated.map(&:to_sym)
48
+ headers = ArrayHelper.recode_repeated(worksheet.row(0)).map(&:to_sym)
31
49
 
32
50
  df = Daru::DataFrame.new({})
33
51
  headers.each_with_index do |h,i|
@@ -57,46 +75,18 @@ module Daru
57
75
 
58
76
  # Functions for loading/writing CSV files
59
77
  def from_csv path, opts={}
60
- opts[:col_sep] ||= ','
61
- opts[:converters] ||= :numeric
62
-
63
- daru_options = opts.keys.each_with_object({}) do |k, hash|
64
- if [:clone, :order, :index, :name].include?(k)
65
- hash[k] = opts[k]
66
- opts.delete k
67
- end
68
- end
78
+ daru_options, opts = from_csv_prepare_opts opts
69
79
 
70
80
  # Preprocess headers for detecting and correcting repetition in
71
81
  # case the :headers option is not specified.
72
- if opts[:headers]
73
- opts[:header_converters] ||= :symbol
74
-
75
- csv = ::CSV.read(path, 'rb',opts)
76
- yield csv if block_given?
77
-
78
- hsh = {}
79
- csv.by_col.each do |col_name, values|
80
- hsh[col_name] = values
81
- end
82
- else
83
- csv = ::CSV.open(path, 'rb', opts)
84
- yield csv if block_given?
85
-
86
- csv_as_arrays = csv.to_a
87
- headers = csv_as_arrays[0].recode_repeated.map
88
- csv_as_arrays.delete_at 0
89
- csv_as_arrays = csv_as_arrays.transpose
90
-
91
- hsh = {}
92
- headers.each_with_index do |h, i|
93
- hsh[h] = csv_as_arrays[i]
82
+ hsh =
83
+ if opts[:headers]
84
+ from_csv_hash_with_headers(path, opts)
85
+ else
86
+ from_csv_hash(path, opts)
87
+ .tap { |hash| daru_options[:order] = hash.keys }
94
88
  end
95
89
 
96
- # Order columns as given in CSV
97
- daru_options[:order] = headers.to_a
98
- end
99
-
100
90
  Daru::DataFrame.new(hsh,daru_options)
101
91
  end
102
92
 
@@ -154,12 +144,7 @@ module Daru
154
144
  fields = fields.map(&:to_sym)
155
145
  end
156
146
 
157
- vectors = Hash[*fields.map { |name|
158
- [
159
- name,
160
- Daru::Vector.new([]).tap { |v| v.rename name }
161
- ]
162
- }.flatten]
147
+ vectors = fields.map { |name| [name, Daru::Vector.new([], name: name)] }.to_h
163
148
 
164
149
  Daru::DataFrame.new(vectors, order: fields).tap do |df|
165
150
  relation.pluck(*fields).each do |record|
@@ -200,6 +185,42 @@ module Daru
200
185
  false
201
186
  end
202
187
  end
188
+
189
+ private
190
+
191
+ DARU_OPT_KEYS = [:clone, :order, :index, :name].freeze
192
+
193
+ def from_csv_prepare_opts opts
194
+ opts[:col_sep] ||= ','
195
+ opts[:converters] ||= :numeric
196
+
197
+ daru_options = opts.keys.each_with_object({}) do |k, hash|
198
+ hash[k] = opts.delete(k) if DARU_OPT_KEYS.include?(k)
199
+ end
200
+ [daru_options, opts]
201
+ end
202
+
203
+ def from_csv_hash_with_headers(path, opts)
204
+ opts[:header_converters] ||= :symbol
205
+
206
+ ::CSV
207
+ .read(path, 'rb',opts)
208
+ .tap { |c| yield c if block_given? }
209
+ .by_col.map { |col_name, values| [col_name, values] }.to_h
210
+ end
211
+
212
+ def from_csv_hash(path, opts)
213
+ csv_as_arrays =
214
+ ::CSV
215
+ .open(path, 'rb', opts)
216
+ .tap { |c| yield c if block_given? }
217
+ .to_a
218
+
219
+ headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
220
+ csv_as_arrays = csv_as_arrays.transpose
221
+
222
+ headers.each_with_index.map { |h, i| [h, csv_as_arrays[i]] }.to_h
223
+ end
203
224
  end
204
225
  end
205
226
  end
@@ -1,55 +1,52 @@
1
1
  module Daru
2
2
  module IO
3
3
  class SqlDataSource
4
- # Private adapter class for DBI::DatabaseHandle
5
4
  # @private
6
- class DbiAdapter
7
- def initialize(dbh, query)
8
- @dbh = dbh
5
+ class Adapter
6
+ def initialize(conn, query)
7
+ @conn = conn
9
8
  @query = query
10
9
  end
11
10
 
12
- def each_column_name
13
- result.column_names.each do |column_name|
14
- yield(column_name.to_sym)
15
- end
11
+ def result_hash
12
+ column_names
13
+ .map(&:to_sym)
14
+ .zip(rows.transpose)
15
+ .to_h
16
16
  end
17
+ end
18
+
19
+ # Private adapter class for DBI::DatabaseHandle
20
+ # @private
21
+ class DbiAdapter < Adapter
22
+ private
17
23
 
18
- def each_row
19
- result.fetch do |row|
20
- yield(row.to_a)
21
- end
24
+ def column_names
25
+ result.column_names
22
26
  end
23
27
 
24
- private
28
+ def rows
29
+ result.to_a.map(&:to_a)
30
+ end
25
31
 
26
32
  def result
27
- @result ||= @dbh.execute(@query)
33
+ @result ||= @conn.execute(@query)
28
34
  end
29
35
  end
30
36
 
31
37
  # Private adapter class for connections of ActiveRecord
32
38
  # @private
33
- class ActiveRecordConnectionAdapter
34
- def initialize(conn, query)
35
- @conn = conn
36
- @query = query
37
- end
39
+ class ActiveRecordConnectionAdapter < Adapter
40
+ private
38
41
 
39
- def each_column_name
40
- result.columns.each do |column_name|
41
- yield(column_name.to_sym)
42
- end
42
+ def column_names
43
+ result.columns
43
44
  end
44
45
 
45
- def each_row
46
- result.each do |row|
47
- yield(row.values)
48
- end
46
+ def rows
47
+ result.cast_values
49
48
  end
50
49
 
51
- private
52
-
53
50
  def result
54
51
  @result ||= @conn.exec_query(@query)
55
52
  end
@@ -67,50 +64,24 @@ module Daru
67
64
  end
68
65
 
69
66
  def make_dataframe
70
- vectors = {}
71
- fields = []
72
- @adapter.each_column_name do |column_name|
73
- vectors[column_name] = Daru::Vector.new([])
74
- vectors[column_name].rename column_name
75
- fields.push column_name
76
- end
77
-
78
- df = Daru::DataFrame.new(vectors, order: fields)
79
- @adapter.each_row do |row|
80
- df.add_row(row)
81
- end
82
-
83
- df.update
84
-
85
- df
67
+ Daru::DataFrame.new(@adapter.result_hash).tap(&:update)
86
68
  end
87
69
 
88
70
  private
89
71
 
90
72
  def init_adapter(db, query)
91
- begin
92
- query = query.to_str
93
- rescue
94
- raise ArgumentError, 'query must be a string'
95
- end
73
+ query = String.try_convert(query) or
74
+ raise ArgumentError, "Query must be a string, #{query.class} received"
96
75
 
97
- case
98
- when check_dbi(db)
76
+ case db
77
+ when DBI::DatabaseHandle
99
78
  DbiAdapter.new(db, query)
100
- when check_active_record_connection(db)
79
+ when ActiveRecord::ConnectionAdapters::AbstractAdapter
101
80
  ActiveRecordConnectionAdapter.new(db, query)
102
81
  else
103
- raise ArgumentError, 'unknown database type'
82
+ raise ArgumentError, "Unknown database adapter type #{db.class}"
104
83
  end
105
84
  end
106
-
107
- def check_dbi(obj)
108
- obj.is_a?(DBI::DatabaseHandle)
109
- end
110
-
111
- def check_active_record_connection(obj)
112
- obj.is_a?(ActiveRecord::ConnectionAdapters::AbstractAdapter)
113
- end
114
85
  end
115
86
  end
116
87
  end
@@ -0,0 +1,38 @@
1
+ module Daru
2
+ # @private
3
+ module IRuby
4
+ module Helpers
5
+ module_function
6
+
7
+ def tuples_with_rowspans(index)
8
+ index.sparse_tuples.transpose
9
+ .map { |r| nils_counted(r) }
10
+ .transpose.map(&:compact)
11
+ end
12
+
13
+ def tuples_with_colspans(index)
14
+ index.sparse_tuples.transpose
15
+ .map { |r| nils_counted(r) }
16
+ .map(&:compact)
17
+ end
18
+
19
+ # It is complicated, but the only algo I could think of.
20
+ # It does [:a, nil, nil, :b, nil, :c] # =>
21
+ # [[:a,3], nil, nil, [:b,2], nil, :c]
22
+ # Needed by tuples_with_colspans/rowspans, which we need for pretty HTML
23
+ def nils_counted array
24
+ grouped = [[array.first]]
25
+ array[1..-1].each do |val|
26
+ if val
27
+ grouped << [val]
28
+ else
29
+ grouped.last << val
30
+ end
31
+ end
32
+ grouped.flat_map { |items|
33
+ [[items.first, items.count], *[nil] * (items.count - 1)]
34
+ }
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,52 @@
1
+ <table>
2
+ <tr>
3
+ <th colspan='<%= @vectors.size+1 %>'>Daru::DataFrame<%= name ? ": #{name} " : ''%>(<%=nrows%>x<%=ncols%>)</th>
4
+ </tr>
5
+
6
+ <% if @vectors.is_a? MultiIndex %>
7
+ <% Daru::IRuby::Helpers.tuples_with_colspans(@vectors).each_with_index do |tuple, idx| %>
8
+ <tr>
9
+ <% if idx.zero? %>
10
+ <th rowspan="<%= @vectors.width %>"></th>
11
+ <% end %>
12
+ <% tuple.each do |idx, span| %>
13
+ <th colspan="<%= span %>"><%= idx %></th>
14
+ <% end %>
15
+ </tr>
16
+ <% end %>
17
+ <% else %>
18
+ <tr>
19
+ <th></th>
20
+ <% @vectors.each do |vector| %>
21
+ <th><%=vector%></th>
22
+ <% end %>
23
+ </tr>
24
+ <% end %>
25
+
26
+ <% @index.first(threshold).each_with_index do |index, pos| %>
27
+ <tr>
28
+ <td><%= index %></td>
29
+ <% row.at(pos).each do |element| %>
30
+ <td><%= element.to_s %></td>
31
+ <% end %>
32
+ </tr>
33
+ <% end %>
34
+
35
+ <% if nrows > threshold %>
36
+ <tr>
37
+ <% (@vectors.size + 1).times do %>
38
+ <td>...</td>
39
+ <% end %>
40
+ </tr>
41
+
42
+ <% last_index = @index.to_a.last
43
+ last_row = row_at @index.size-1 %>
44
+
45
+ <tr>
46
+ <td><%= last_index %></td>
47
+ <% last_row.each do |element| %>
48
+ <td><%= element.to_s %></td>
49
+ <% end %>
50
+ </tr>
51
+ <% end %>
52
+ </table>
@@ -0,0 +1,58 @@
1
+ <table>
2
+ <tr>
3
+ <th colspan='<%= @vectors.size+index.width %>'>Daru::DataFrame<%= name ? ": #{name} " : ''%>(<%=nrows%>x<%=ncols%>)</th>
4
+ </tr>
5
+ <% if @vectors.is_a? MultiIndex %>
6
+ <% Daru::IRuby::Helpers.tuples_with_colspans(@vectors).each_with_index do |tuple, idx| %>
7
+ <tr>
8
+ <% if idx.zero? %>
9
+ <th colspan="<%= index.width %>" rowspan="<%= @vectors.width %>"></th>
10
+ <% end %>
11
+ <% tuple.each do |idx, span| %>
12
+ <th colspan="<%= span %>"><%= idx %></th>
13
+ <% end %>
14
+ </tr>
15
+ <% end %>
16
+ <% else %>
17
+ <tr>
18
+ <th colspan="<%= index.width %>"></th>
19
+ <% @vectors.each do |vector| %>
20
+ <th><%=vector%></th>
21
+ <% end %>
22
+ </tr>
23
+ <% end %>
24
+
25
+ <% Daru::IRuby::Helpers.tuples_with_rowspans(@index).first(threshold).zip(@index.to_a).each do |tuple, index| %>
26
+ <tr>
27
+ <% tuple.each do |idx, span| %>
28
+ <th rowspan="<%= span %>"><%= idx %></th>
29
+ <% end %>
30
+ <% row[index].each do |element| %>
31
+ <td><%= element.to_s %></td>
32
+ <% end %>
33
+ </tr>
34
+ <% end %>
35
+
36
+ <% if nrows > threshold %>
37
+ <tr>
38
+ <% index.width.times do %>
39
+ <th>...</th>
40
+ <% end %>
41
+ <% @vectors.size.times do %>
42
+ <td>...</td>
43
+ <% end %>
44
+ </tr>
45
+
46
+ <% last_index = @index.to_a.last
47
+ last_row = row[last_index] %>
48
+
49
+ <tr>
50
+ <% last_index.each do |idx| %>
51
+ <th><%= idx %></td>
52
+ <% end %>
53
+ <% last_row.each do |element| %>
54
+ <td><%= element.to_s %></td>
55
+ <% end %>
56
+ </tr>
57
+ <% end %>
58
+ </table>
@@ -0,0 +1,12 @@
1
+ <table>
2
+ <tr>
3
+ <th colspan="<%= width %>">Daru::MultiIndex(<%= size %>x<%= width %>)</th>
4
+ </tr>
5
+ <% Daru::IRuby::Helpers.tuples_with_rowspans(self).each do |row| %>
6
+ <tr>
7
+ <% row.each do |val, span| %>
8
+ <th rowspan="<%= span %>"><%= val %></th>
9
+ <% end %>
10
+ </tr>
11
+ <% end %>
12
+ </table>