daru 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +99 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.travis.yml +3 -1
  6. data/CONTRIBUTING.md +5 -1
  7. data/History.md +43 -0
  8. data/README.md +3 -4
  9. data/benchmarks/duplicating.rb +45 -0
  10. data/benchmarks/group_by.rb +7 -7
  11. data/benchmarks/joining.rb +52 -0
  12. data/benchmarks/sorting.rb +9 -2
  13. data/benchmarks/statistics.rb +39 -0
  14. data/daru.gemspec +4 -4
  15. data/lib/daru.rb +9 -9
  16. data/lib/daru/accessors/array_wrapper.rb +15 -11
  17. data/lib/daru/accessors/dataframe_by_row.rb +1 -1
  18. data/lib/daru/accessors/gsl_wrapper.rb +30 -19
  19. data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
  20. data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
  21. data/lib/daru/core/group_by.rb +69 -16
  22. data/lib/daru/core/merge.rb +135 -151
  23. data/lib/daru/core/query.rb +9 -30
  24. data/lib/daru/dataframe.rb +476 -439
  25. data/lib/daru/date_time/index.rb +150 -137
  26. data/lib/daru/date_time/offsets.rb +45 -41
  27. data/lib/daru/extensions/rserve.rb +4 -4
  28. data/lib/daru/index.rb +88 -64
  29. data/lib/daru/io/io.rb +33 -34
  30. data/lib/daru/io/sql_data_source.rb +11 -11
  31. data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
  32. data/lib/daru/maths/arithmetic/vector.rb +9 -14
  33. data/lib/daru/maths/statistics/dataframe.rb +89 -61
  34. data/lib/daru/maths/statistics/vector.rb +226 -97
  35. data/lib/daru/monkeys.rb +23 -30
  36. data/lib/daru/plotting/dataframe.rb +27 -28
  37. data/lib/daru/plotting/vector.rb +12 -13
  38. data/lib/daru/vector.rb +221 -330
  39. data/lib/daru/version.rb +2 -2
  40. data/spec/core/group_by_spec.rb +16 -0
  41. data/spec/core/merge_spec.rb +30 -14
  42. data/spec/dataframe_spec.rb +268 -14
  43. data/spec/index_spec.rb +23 -5
  44. data/spec/io/io_spec.rb +37 -16
  45. data/spec/math/statistics/dataframe_spec.rb +40 -8
  46. data/spec/math/statistics/vector_spec.rb +135 -10
  47. data/spec/monkeys_spec.rb +3 -3
  48. data/spec/vector_spec.rb +157 -25
  49. metadata +41 -21
@@ -1,4 +1,6 @@
1
1
  module Daru
2
+ # rubocop:disable Style/OpMethod
3
+
2
4
  # Generic class for generating date offsets.
3
5
  class DateOffset
4
6
  # A Daru::DateOffset object is created by a passing certain options
@@ -15,7 +17,7 @@ module Daru
15
17
  # * :weeks - Create a weeks offset
16
18
  # * :months - Create a months offset
17
19
  # * :years - Create a years offset
18
- #
20
+ #
19
21
  # Additionaly, passing the `:n` option will apply the offset that many times.
20
22
  #
21
23
  # @example Usage of DateOffset
@@ -23,7 +25,7 @@ module Daru
23
25
  # offset = Daru::DateOffset.new(weeks: 3)
24
26
  # offset + DateTime.new(2012,5,3)
25
27
  # #=> #<DateTime: 2012-05-24T00:00:00+00:00 ((2456072j,0s,0n),+0s,2299161j)>
26
- #
28
+ #
27
29
  # # Create an offset of 5 hours
28
30
  # offset = Daru::DateOffset.new(hours: 5)
29
31
  # offset + DateTime.new(2015,3,3,23,5,1)
@@ -37,22 +39,22 @@ module Daru
37
39
  n = opts[:n] || 1
38
40
 
39
41
  @offset =
40
- case
41
- when opts[:secs]
42
- Offsets::Second.new(n*opts[:secs])
43
- when opts[:mins]
44
- Offsets::Minute.new(n*opts[:mins])
45
- when opts[:hours]
46
- Offsets::Hour.new(n*opts[:hours])
47
- when opts[:days]
48
- Offsets::Day.new(n*opts[:days])
49
- when opts[:weeks]
50
- Offsets::Day.new(7*n*opts[:weeks])
51
- when opts[:months]
52
- Offsets::Month.new(n*opts[:months])
53
- when opts[:years]
54
- Offsets::Year.new(n*opts[:years])
55
- end
42
+ case
43
+ when opts[:secs]
44
+ Offsets::Second.new(n*opts[:secs])
45
+ when opts[:mins]
46
+ Offsets::Minute.new(n*opts[:mins])
47
+ when opts[:hours]
48
+ Offsets::Hour.new(n*opts[:hours])
49
+ when opts[:days]
50
+ Offsets::Day.new(n*opts[:days])
51
+ when opts[:weeks]
52
+ Offsets::Day.new(7*n*opts[:weeks])
53
+ when opts[:months]
54
+ Offsets::Month.new(n*opts[:months])
55
+ when opts[:years]
56
+ Offsets::Year.new(n*opts[:years])
57
+ end
56
58
  end
57
59
 
58
60
  # Offset a DateTime forward.
@@ -93,7 +95,7 @@ module Daru
93
95
  end
94
96
 
95
97
  # Create a seconds offset
96
- #
98
+ #
97
99
  # @param n [Integer] The number of times an offset should be applied.
98
100
  # @example Create a Seconds offset
99
101
  # offset = Daru::Offsets::Second.new(5)
@@ -110,7 +112,7 @@ module Daru
110
112
  end
111
113
 
112
114
  # Create a minutes offset
113
- #
115
+ #
114
116
  # @param n [Integer] The number of times an offset should be applied.
115
117
  # @example Create a Minutes offset
116
118
  # offset = Daru::Offsets::Minute.new(8)
@@ -127,7 +129,7 @@ module Daru
127
129
  end
128
130
 
129
131
  # Create an hours offset
130
- #
132
+ #
131
133
  # @param n [Integer] The number of times an offset should be applied.
132
134
  # @example Create a Hour offset
133
135
  # offset = Daru::Offsets::Hour.new(8)
@@ -137,14 +139,14 @@ module Daru
137
139
  def multiplier
138
140
  0.041666666666666664
139
141
  end
140
-
142
+
141
143
  def freq_string
142
144
  (@n == 1 ? '' : @n.to_s) + 'H'
143
145
  end
144
146
  end
145
147
 
146
148
  # Create an days offset
147
- #
149
+ #
148
150
  # @param n [Integer] The number of times an offset should be applied.
149
151
  # @example Create a Day offset
150
152
  # offset = Daru::Offsets::Day.new(2)
@@ -161,7 +163,7 @@ module Daru
161
163
  end
162
164
 
163
165
  # Create an months offset
164
- #
166
+ #
165
167
  # @param n [Integer] The number of times an offset should be applied.
166
168
  # @example Create a Month offset
167
169
  # offset = Daru::Offsets::Month.new(5)
@@ -182,7 +184,7 @@ module Daru
182
184
  end
183
185
 
184
186
  # Create a years offset
185
- #
187
+ #
186
188
  # @param n [Integer] The number of times an offset should be applied.
187
189
  # @example Create a Year offset
188
190
  # offset = Daru::Offsets::Year.new(2)
@@ -239,7 +241,7 @@ module Daru
239
241
  end
240
242
 
241
243
  # Create a month begin offset
242
- #
244
+ #
243
245
  # @param n [Integer] The number of times an offset should be applied.
244
246
  # @example Create a MonthBegin offset
245
247
  # offset = Daru::Offsets::MonthBegin.new(2)
@@ -251,14 +253,14 @@ module Daru
251
253
  end
252
254
 
253
255
  def freq_string
254
- (@n == 1 ? '' : @n.to_s) + "MB"
256
+ (@n == 1 ? '' : @n.to_s) + 'MB'
255
257
  end
256
258
 
257
259
  def + date_time
258
260
  @n.times do
259
261
  days_in_month = Daru::MONTH_DAYS[date_time.month]
260
- days_in_month += 1 if date_time.leap? and date_time.month == 2
261
- date_time = date_time + (days_in_month - date_time.day + 1)
262
+ days_in_month += 1 if date_time.leap? && date_time.month == 2
263
+ date_time += (days_in_month - date_time.day + 1)
262
264
  end
263
265
 
264
266
  date_time
@@ -267,7 +269,7 @@ module Daru
267
269
  def - date_time
268
270
  @n.times do
269
271
  date_time = date_time << 1 if on_offset?(date_time)
270
- date_time = DateTime.new(date_time.year, date_time.month, 1,
272
+ date_time = DateTime.new(date_time.year, date_time.month, 1,
271
273
  date_time.hour, date_time.min, date_time.sec)
272
274
  end
273
275
 
@@ -280,7 +282,7 @@ module Daru
280
282
  end
281
283
 
282
284
  # Create a month end offset
283
- #
285
+ #
284
286
  # @param n [Integer] The number of times an offset should be applied.
285
287
  # @example Create a MonthEnd offset
286
288
  # offset = Daru::Offsets::MonthEnd.new
@@ -296,24 +298,24 @@ module Daru
296
298
  end
297
299
 
298
300
  def + date_time
299
- @n.times do
301
+ @n.times do
300
302
  date_time = date_time >> 1 if on_offset?(date_time)
301
303
  days_in_month = Daru::MONTH_DAYS[date_time.month]
302
- days_in_month += 1 if date_time.leap? and date_time.month == 2
304
+ days_in_month += 1 if date_time.leap? && date_time.month == 2
303
305
 
304
- date_time = date_time + (days_in_month - date_time.day)
306
+ date_time += (days_in_month - date_time.day)
305
307
  end
306
308
 
307
309
  date_time
308
310
  end
309
311
 
310
312
  def - date_time
311
- @n.times do
313
+ @n.times do
312
314
  date_time = date_time << 1
313
315
  days_in_month = Daru::MONTH_DAYS[date_time.month]
314
- days_in_month += 1 if date_time.leap? and date_time.month == 2
316
+ days_in_month += 1 if date_time.leap? && date_time.month == 2
315
317
 
316
- date_time = date_time + (days_in_month - date_time.day)
318
+ date_time += (days_in_month - date_time.day)
317
319
  end
318
320
 
319
321
  date_time
@@ -325,7 +327,7 @@ module Daru
325
327
  end
326
328
 
327
329
  # Create a year begin offset
328
- #
330
+ #
329
331
  # @param n [Integer] The number of times an offset should be applied.
330
332
  # @example Create a YearBegin offset
331
333
  # offset = Daru::Offsets::YearBegin.new(3)
@@ -347,7 +349,7 @@ module Daru
347
349
 
348
350
  def - date_time
349
351
  if on_offset?(date_time)
350
- DateTime.new(date_time.year - @n, 1, 1,
352
+ DateTime.new(date_time.year - @n, 1, 1,
351
353
  date_time.hour,date_time.min, date_time.sec)
352
354
  else
353
355
  DateTime.new(date_time.year - (@n-1), 1, 1)
@@ -360,7 +362,7 @@ module Daru
360
362
  end
361
363
 
362
364
  # Create a year end offset
363
- #
365
+ #
364
366
  # @param n [Integer] The number of times an offset should be applied.
365
367
  # @example Create a YearEnd offset
366
368
  # offset = Daru::Offsets::YearEnd.new
@@ -394,4 +396,6 @@ module Daru
394
396
  end
395
397
  end
396
398
  end
397
- end
399
+
400
+ # rubocop:enable Style/OpMethod
401
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Daru
4
4
  class DataFrame
5
- def to_REXP
5
+ def to_REXP # rubocop:disable Style/MethodName
6
6
  names = @vectors.to_a
7
7
  data = names.map do |f|
8
8
  Rserve::REXP::Wrapper.wrap(self[f].to_a)
@@ -14,8 +14,8 @@ module Daru
14
14
  end
15
15
 
16
16
  class Vector
17
- def to_REXP
18
- Rserve::REXP::Wrapper.wrap(self.to_a)
17
+ def to_REXP # rubocop:disable Style/MethodName
18
+ Rserve::REXP::Wrapper.wrap(to_a)
19
19
  end
20
20
  end
21
- end
21
+ end
@@ -3,14 +3,14 @@ module Daru
3
3
  include Enumerable
4
4
  # It so happens that over riding the .new method in a super class also
5
5
  # tampers with the default .new method for class that inherit from the
6
- # super class (Index in this case). Thus we first alias the original
7
- # new method (from Object) to __new__ when the Index class is evaluated,
6
+ # super class (Index in this case). Thus we first alias the original
7
+ # new method (from Object) to __new__ when the Index class is evaluated,
8
8
  # and then we use an inherited hook such that the old new method (from
9
9
  # Object) is once again the default .new for the subclass.
10
10
  # Refer http://blog.sidu.in/2007/12/rubys-new-as-factory.html
11
11
  class << self
12
12
  alias :__new__ :new
13
-
13
+
14
14
  def inherited subclass
15
15
  class << subclass
16
16
  alias :new :__new__
@@ -23,19 +23,14 @@ module Daru
23
23
  def self.new *args, &block
24
24
  source = args[0]
25
25
 
26
- idx =
27
- if source and source[0].is_a?(Array)
26
+ if source.respond_to?(:first) && source.first.is_a?(Array)
28
27
  Daru::MultiIndex.from_tuples source
29
- elsif source and source.is_a?(Array) and !source.empty? and
30
- source.all? { |e| e.is_a?(DateTime) }
28
+ elsif source && source.is_a?(Array) && !source.empty? &&
29
+ source.all? { |e| e.is_a?(DateTime) }
31
30
  Daru::DateTimeIndex.new(source, freq: :infer)
32
31
  else
33
- i = self.allocate
34
- i.send :initialize, *args, &block
35
- i
32
+ allocate.tap { |i| i.send :initialize, *args, &block }
36
33
  end
37
-
38
- idx
39
34
  end
40
35
 
41
36
  def each(&block)
@@ -50,41 +45,54 @@ module Daru
50
45
  attr_reader :relation_hash, :size
51
46
 
52
47
  def initialize index
53
- index = 0 if index.nil?
54
- index = Array.new(index) { |i| i} if index.is_a? Integer
55
- index = index.to_a if index.is_a? Daru::Index
56
-
57
- @relation_hash = {}
58
- index.each_with_index do |n, idx|
59
- @relation_hash[n] = idx
60
- end
48
+ index =
49
+ case index
50
+ when nil
51
+ []
52
+ when Integer
53
+ index.times.to_a
54
+ when Enumerable
55
+ index.to_a
56
+ else
57
+ raise ArgumentError,
58
+ "Cannot create index from #{index.class} #{index.inspect}"
59
+ end
61
60
 
62
- @relation_hash.freeze
61
+ @relation_hash = index.each_with_index.to_h.freeze
63
62
  @keys = @relation_hash.keys
64
63
  @size = @relation_hash.size
65
64
  end
66
65
 
67
66
  def ==(other)
68
- return false if self.class != other.class or other.size != @size
67
+ return false if self.class != other.class || other.size != @size
69
68
 
70
- @relation_hash.keys == other.to_a and
71
- @relation_hash.values == other.relation_hash.values
69
+ @relation_hash.keys == other.to_a &&
70
+ @relation_hash.values == other.relation_hash.values
72
71
  end
73
72
 
74
73
  def [](*key)
75
74
  loc = key[0]
76
75
 
77
- case
76
+ case
78
77
  when loc.is_a?(Range)
79
78
  first = loc.first
80
79
  last = loc.last
81
80
 
82
81
  slice first, last
83
82
  when key.size > 1
84
- Daru::Index.new key.map { |k| self[k] }
83
+ if include? key[0]
84
+ Daru::Index.new key.map { |k| k }
85
+ else
86
+ # Assume the user is specifing values for index not keys
87
+ # Return index object having keys corresponding to values provided
88
+ Daru::Index.new key.map { |k| key k }
89
+ end
85
90
  else
86
91
  v = @relation_hash[loc]
87
- return loc if v.nil?
92
+ unless v
93
+ return loc if loc.is_a?(Numeric) && loc < size
94
+ raise IndexError, "Specified index #{loc.inspect} does not exist"
95
+ end
88
96
  v
89
97
  end
90
98
  end
@@ -92,9 +100,8 @@ module Daru
92
100
  def slice *args
93
101
  start = args[0]
94
102
  en = args[1]
95
- indexes = []
96
103
 
97
- if start.is_a?(Integer) and en.is_a?(Integer)
104
+ if start.is_a?(Integer) && en.is_a?(Integer)
98
105
  Index.new @keys[start..en]
99
106
  else
100
107
  start_idx = @relation_hash[start]
@@ -111,7 +118,6 @@ module Daru
111
118
 
112
119
  # Produce a new index from the set intersection of two indexes
113
120
  def & other
114
-
115
121
  end
116
122
 
117
123
  def to_a
@@ -123,7 +129,7 @@ module Daru
123
129
  end
124
130
 
125
131
  def include? index
126
- @relation_hash.has_key? index
132
+ @relation_hash.key? index
127
133
  end
128
134
 
129
135
  def empty?
@@ -134,8 +140,8 @@ module Daru
134
140
  Daru::Index.new @relation_hash.keys
135
141
  end
136
142
 
137
- def _dump depth
138
- Marshal.dump({relation_hash: @relation_hash})
143
+ def _dump(*)
144
+ Marshal.dump(relation_hash: @relation_hash)
139
145
  end
140
146
 
141
147
  def self._load data
@@ -143,13 +149,21 @@ module Daru
143
149
 
144
150
  Daru::Index.new(h[:relation_hash].keys)
145
151
  end
152
+
153
+ # Provide an Index for sub vector produced
154
+ #
155
+ # @param input_indexes [Array] the input by user to index the vector
156
+ # @return [Object] the Index object for sub vector produced
157
+ def conform(*)
158
+ self
159
+ end
146
160
  end # class Index
147
161
 
148
162
  class MultiIndex < Index
149
163
  include Enumerable
150
164
 
151
165
  def each(&block)
152
- to_a.each(&block)
166
+ to_a.each(&block)
153
167
  end
154
168
 
155
169
  def map(&block)
@@ -159,28 +173,27 @@ module Daru
159
173
  attr_reader :labels
160
174
 
161
175
  def levels
162
- @levels.map { |e| e.keys }
176
+ @levels.map(&:keys)
163
177
  end
164
178
 
165
179
  def initialize opts={}
166
180
  labels = opts[:labels]
167
181
  levels = opts[:levels]
168
182
 
169
- raise ArgumentError,
170
- "Must specify both labels and levels" unless labels and levels
171
183
  raise ArgumentError,
172
- "Labels and levels should be same size" if labels.size != levels.size
184
+ 'Must specify both labels and levels' unless labels && levels
185
+ raise ArgumentError,
186
+ 'Labels and levels should be same size' if labels.size != levels.size
173
187
  raise ArgumentError,
174
- "Incorrect labels and levels" if incorrect_fields?(labels, levels)
188
+ 'Incorrect labels and levels' if incorrect_fields?(labels, levels)
175
189
 
176
190
  @labels = labels
177
- @levels = levels.map { |e| Hash[e.map.with_index.to_a]}
191
+ @levels = levels.map { |e| Hash[e.map.with_index.to_a] }
178
192
  end
179
193
 
180
- def incorrect_fields? labels, levels
181
- max_level = levels[0].size
194
+ def incorrect_fields?(_labels, levels)
195
+ levels[0].size # FIXME: without this call everything fails
182
196
 
183
- correct = labels.all? { |e| e.size == max_level }
184
197
  correct = levels.all? { |e| e.uniq.size == e.size }
185
198
 
186
199
  !correct
@@ -189,7 +202,7 @@ module Daru
189
202
  private :incorrect_fields?
190
203
 
191
204
  def self.from_arrays arrays
192
- levels = arrays.map { |e| e.uniq.sort_by { |a| a.to_s } }
205
+ levels = arrays.map { |e| e.uniq.sort_by(&:to_s) }
193
206
  labels = []
194
207
 
195
208
  arrays.each_with_index do |arry, level_index|
@@ -214,12 +227,17 @@ module Daru
214
227
  case
215
228
  when key[0].is_a?(Range) then retrieve_from_range(key[0])
216
229
  when (key[0].is_a?(Integer) and key.size == 1) then try_retrieve_from_integer(key[0])
217
- else retrieve_from_tuples(key)
230
+ else
231
+ begin
232
+ retrieve_from_tuples key
233
+ rescue NoMethodError
234
+ raise IndexError, "Specified index #{key.inspect} do not exist"
235
+ end
218
236
  end
219
237
  end
220
238
 
221
239
  def try_retrieve_from_integer int
222
- return retrieve_from_tuples([int]) if @levels[0].has_key?(int)
240
+ return retrieve_from_tuples([int]) if @levels[0].key?(int)
223
241
  int
224
242
  end
225
243
 
@@ -232,12 +250,13 @@ module Daru
232
250
 
233
251
  key.each_with_index do |k, depth|
234
252
  level_index = @levels[depth][k]
253
+ raise IndexError, "Specified index #{key.inspect} do not exist" if level_index.nil?
235
254
  label = @labels[depth]
236
255
  chosen = find_all_indexes label, level_index, chosen
237
256
  end
238
257
 
239
- return chosen[0] if chosen.size == 1
240
- return multi_index_from_multiple_selections(chosen)
258
+ return chosen[0] if chosen.size == 1 && key.size == @levels.size
259
+ multi_index_from_multiple_selections(chosen)
241
260
  end
242
261
 
243
262
  def multi_index_from_multiple_selections chosen
@@ -246,14 +265,11 @@ module Daru
246
265
 
247
266
  def find_all_indexes label, level_index, chosen
248
267
  if chosen.empty?
249
- label.each_with_index do |lbl, i|
250
- if lbl == level_index then chosen << i end
251
- end
268
+ label.each_with_index
269
+ .select { |lbl, _| lbl == level_index }.map(&:last)
252
270
  else
253
271
  chosen.keep_if { |c| label[c] == level_index }
254
272
  end
255
-
256
- chosen
257
273
  end
258
274
 
259
275
  private :find_all_indexes, :multi_index_from_multiple_selections,
@@ -263,11 +279,10 @@ module Daru
263
279
  raise ArgumentError,
264
280
  "Key #{index} is too large" if index >= @labels[0].size
265
281
 
266
- level_indexes =
267
- @labels.inject([]) do |memo, label|
268
- memo << label[index]
269
- memo
270
- end
282
+ level_indexes =
283
+ @labels.each_with_object([]) do |label, memo|
284
+ memo << label[index]
285
+ end
271
286
 
272
287
  tuple = []
273
288
  level_indexes.each_with_index do |level_index, i|
@@ -294,7 +309,7 @@ module Daru
294
309
  end
295
310
 
296
311
  def empty?
297
- @labels.flatten.empty? and @levels.all? { |l| l.empty? }
312
+ @labels.flatten.empty? and @levels.all?(&:empty?)
298
313
  end
299
314
 
300
315
  def include? tuple
@@ -314,9 +329,9 @@ module Daru
314
329
  end
315
330
 
316
331
  def == other
317
- self.class == other.class and
318
- labels == other.labels and
319
- levels == other.levels
332
+ self.class == other.class &&
333
+ labels == other.labels &&
334
+ levels == other.levels
320
335
  end
321
336
 
322
337
  def to_a
@@ -328,7 +343,16 @@ module Daru
328
343
  end
329
344
 
330
345
  def inspect
331
- "Daru::MultiIndex:#{self.object_id} (levels: #{levels}\nlabels: #{labels})"
346
+ "Daru::MultiIndex:#{object_id} (levels: #{levels}\nlabels: #{labels})"
347
+ end
348
+
349
+ # Provide a MultiIndex for sub vector produced
350
+ #
351
+ # @param input_indexes [Array] the input by user to index the vector
352
+ # @return [Object] the MultiIndex object for sub vector produced
353
+ def conform input_indexes
354
+ return self if input_indexes[0].is_a? Range
355
+ drop_left_level input_indexes.size
332
356
  end
333
357
  end
334
- end
358
+ end