daru 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +99 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.travis.yml +3 -1
  6. data/CONTRIBUTING.md +5 -1
  7. data/History.md +43 -0
  8. data/README.md +3 -4
  9. data/benchmarks/duplicating.rb +45 -0
  10. data/benchmarks/group_by.rb +7 -7
  11. data/benchmarks/joining.rb +52 -0
  12. data/benchmarks/sorting.rb +9 -2
  13. data/benchmarks/statistics.rb +39 -0
  14. data/daru.gemspec +4 -4
  15. data/lib/daru.rb +9 -9
  16. data/lib/daru/accessors/array_wrapper.rb +15 -11
  17. data/lib/daru/accessors/dataframe_by_row.rb +1 -1
  18. data/lib/daru/accessors/gsl_wrapper.rb +30 -19
  19. data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
  20. data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
  21. data/lib/daru/core/group_by.rb +69 -16
  22. data/lib/daru/core/merge.rb +135 -151
  23. data/lib/daru/core/query.rb +9 -30
  24. data/lib/daru/dataframe.rb +476 -439
  25. data/lib/daru/date_time/index.rb +150 -137
  26. data/lib/daru/date_time/offsets.rb +45 -41
  27. data/lib/daru/extensions/rserve.rb +4 -4
  28. data/lib/daru/index.rb +88 -64
  29. data/lib/daru/io/io.rb +33 -34
  30. data/lib/daru/io/sql_data_source.rb +11 -11
  31. data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
  32. data/lib/daru/maths/arithmetic/vector.rb +9 -14
  33. data/lib/daru/maths/statistics/dataframe.rb +89 -61
  34. data/lib/daru/maths/statistics/vector.rb +226 -97
  35. data/lib/daru/monkeys.rb +23 -30
  36. data/lib/daru/plotting/dataframe.rb +27 -28
  37. data/lib/daru/plotting/vector.rb +12 -13
  38. data/lib/daru/vector.rb +221 -330
  39. data/lib/daru/version.rb +2 -2
  40. data/spec/core/group_by_spec.rb +16 -0
  41. data/spec/core/merge_spec.rb +30 -14
  42. data/spec/dataframe_spec.rb +268 -14
  43. data/spec/index_spec.rb +23 -5
  44. data/spec/io/io_spec.rb +37 -16
  45. data/spec/math/statistics/dataframe_spec.rb +40 -8
  46. data/spec/math/statistics/vector_spec.rb +135 -10
  47. data/spec/monkeys_spec.rb +3 -3
  48. data/spec/vector_spec.rb +157 -25
  49. metadata +41 -21
@@ -1,4 +1,6 @@
1
1
  module Daru
2
+ # rubocop:disable Style/OpMethod
3
+
2
4
  # Generic class for generating date offsets.
3
5
  class DateOffset
4
6
  # A Daru::DateOffset object is created by a passing certain options
@@ -15,7 +17,7 @@ module Daru
15
17
  # * :weeks - Create a weeks offset
16
18
  # * :months - Create a months offset
17
19
  # * :years - Create a years offset
18
- #
20
+ #
19
21
  # Additionaly, passing the `:n` option will apply the offset that many times.
20
22
  #
21
23
  # @example Usage of DateOffset
@@ -23,7 +25,7 @@ module Daru
23
25
  # offset = Daru::DateOffset.new(weeks: 3)
24
26
  # offset + DateTime.new(2012,5,3)
25
27
  # #=> #<DateTime: 2012-05-24T00:00:00+00:00 ((2456072j,0s,0n),+0s,2299161j)>
26
- #
28
+ #
27
29
  # # Create an offset of 5 hours
28
30
  # offset = Daru::DateOffset.new(hours: 5)
29
31
  # offset + DateTime.new(2015,3,3,23,5,1)
@@ -37,22 +39,22 @@ module Daru
37
39
  n = opts[:n] || 1
38
40
 
39
41
  @offset =
40
- case
41
- when opts[:secs]
42
- Offsets::Second.new(n*opts[:secs])
43
- when opts[:mins]
44
- Offsets::Minute.new(n*opts[:mins])
45
- when opts[:hours]
46
- Offsets::Hour.new(n*opts[:hours])
47
- when opts[:days]
48
- Offsets::Day.new(n*opts[:days])
49
- when opts[:weeks]
50
- Offsets::Day.new(7*n*opts[:weeks])
51
- when opts[:months]
52
- Offsets::Month.new(n*opts[:months])
53
- when opts[:years]
54
- Offsets::Year.new(n*opts[:years])
55
- end
42
+ case
43
+ when opts[:secs]
44
+ Offsets::Second.new(n*opts[:secs])
45
+ when opts[:mins]
46
+ Offsets::Minute.new(n*opts[:mins])
47
+ when opts[:hours]
48
+ Offsets::Hour.new(n*opts[:hours])
49
+ when opts[:days]
50
+ Offsets::Day.new(n*opts[:days])
51
+ when opts[:weeks]
52
+ Offsets::Day.new(7*n*opts[:weeks])
53
+ when opts[:months]
54
+ Offsets::Month.new(n*opts[:months])
55
+ when opts[:years]
56
+ Offsets::Year.new(n*opts[:years])
57
+ end
56
58
  end
57
59
 
58
60
  # Offset a DateTime forward.
@@ -93,7 +95,7 @@ module Daru
93
95
  end
94
96
 
95
97
  # Create a seconds offset
96
- #
98
+ #
97
99
  # @param n [Integer] The number of times an offset should be applied.
98
100
  # @example Create a Seconds offset
99
101
  # offset = Daru::Offsets::Second.new(5)
@@ -110,7 +112,7 @@ module Daru
110
112
  end
111
113
 
112
114
  # Create a minutes offset
113
- #
115
+ #
114
116
  # @param n [Integer] The number of times an offset should be applied.
115
117
  # @example Create a Minutes offset
116
118
  # offset = Daru::Offsets::Minute.new(8)
@@ -127,7 +129,7 @@ module Daru
127
129
  end
128
130
 
129
131
  # Create an hours offset
130
- #
132
+ #
131
133
  # @param n [Integer] The number of times an offset should be applied.
132
134
  # @example Create a Hour offset
133
135
  # offset = Daru::Offsets::Hour.new(8)
@@ -137,14 +139,14 @@ module Daru
137
139
  def multiplier
138
140
  0.041666666666666664
139
141
  end
140
-
142
+
141
143
  def freq_string
142
144
  (@n == 1 ? '' : @n.to_s) + 'H'
143
145
  end
144
146
  end
145
147
 
146
148
  # Create an days offset
147
- #
149
+ #
148
150
  # @param n [Integer] The number of times an offset should be applied.
149
151
  # @example Create a Day offset
150
152
  # offset = Daru::Offsets::Day.new(2)
@@ -161,7 +163,7 @@ module Daru
161
163
  end
162
164
 
163
165
  # Create an months offset
164
- #
166
+ #
165
167
  # @param n [Integer] The number of times an offset should be applied.
166
168
  # @example Create a Month offset
167
169
  # offset = Daru::Offsets::Month.new(5)
@@ -182,7 +184,7 @@ module Daru
182
184
  end
183
185
 
184
186
  # Create a years offset
185
- #
187
+ #
186
188
  # @param n [Integer] The number of times an offset should be applied.
187
189
  # @example Create a Year offset
188
190
  # offset = Daru::Offsets::Year.new(2)
@@ -239,7 +241,7 @@ module Daru
239
241
  end
240
242
 
241
243
  # Create a month begin offset
242
- #
244
+ #
243
245
  # @param n [Integer] The number of times an offset should be applied.
244
246
  # @example Create a MonthBegin offset
245
247
  # offset = Daru::Offsets::MonthBegin.new(2)
@@ -251,14 +253,14 @@ module Daru
251
253
  end
252
254
 
253
255
  def freq_string
254
- (@n == 1 ? '' : @n.to_s) + "MB"
256
+ (@n == 1 ? '' : @n.to_s) + 'MB'
255
257
  end
256
258
 
257
259
  def + date_time
258
260
  @n.times do
259
261
  days_in_month = Daru::MONTH_DAYS[date_time.month]
260
- days_in_month += 1 if date_time.leap? and date_time.month == 2
261
- date_time = date_time + (days_in_month - date_time.day + 1)
262
+ days_in_month += 1 if date_time.leap? && date_time.month == 2
263
+ date_time += (days_in_month - date_time.day + 1)
262
264
  end
263
265
 
264
266
  date_time
@@ -267,7 +269,7 @@ module Daru
267
269
  def - date_time
268
270
  @n.times do
269
271
  date_time = date_time << 1 if on_offset?(date_time)
270
- date_time = DateTime.new(date_time.year, date_time.month, 1,
272
+ date_time = DateTime.new(date_time.year, date_time.month, 1,
271
273
  date_time.hour, date_time.min, date_time.sec)
272
274
  end
273
275
 
@@ -280,7 +282,7 @@ module Daru
280
282
  end
281
283
 
282
284
  # Create a month end offset
283
- #
285
+ #
284
286
  # @param n [Integer] The number of times an offset should be applied.
285
287
  # @example Create a MonthEnd offset
286
288
  # offset = Daru::Offsets::MonthEnd.new
@@ -296,24 +298,24 @@ module Daru
296
298
  end
297
299
 
298
300
  def + date_time
299
- @n.times do
301
+ @n.times do
300
302
  date_time = date_time >> 1 if on_offset?(date_time)
301
303
  days_in_month = Daru::MONTH_DAYS[date_time.month]
302
- days_in_month += 1 if date_time.leap? and date_time.month == 2
304
+ days_in_month += 1 if date_time.leap? && date_time.month == 2
303
305
 
304
- date_time = date_time + (days_in_month - date_time.day)
306
+ date_time += (days_in_month - date_time.day)
305
307
  end
306
308
 
307
309
  date_time
308
310
  end
309
311
 
310
312
  def - date_time
311
- @n.times do
313
+ @n.times do
312
314
  date_time = date_time << 1
313
315
  days_in_month = Daru::MONTH_DAYS[date_time.month]
314
- days_in_month += 1 if date_time.leap? and date_time.month == 2
316
+ days_in_month += 1 if date_time.leap? && date_time.month == 2
315
317
 
316
- date_time = date_time + (days_in_month - date_time.day)
318
+ date_time += (days_in_month - date_time.day)
317
319
  end
318
320
 
319
321
  date_time
@@ -325,7 +327,7 @@ module Daru
325
327
  end
326
328
 
327
329
  # Create a year begin offset
328
- #
330
+ #
329
331
  # @param n [Integer] The number of times an offset should be applied.
330
332
  # @example Create a YearBegin offset
331
333
  # offset = Daru::Offsets::YearBegin.new(3)
@@ -347,7 +349,7 @@ module Daru
347
349
 
348
350
  def - date_time
349
351
  if on_offset?(date_time)
350
- DateTime.new(date_time.year - @n, 1, 1,
352
+ DateTime.new(date_time.year - @n, 1, 1,
351
353
  date_time.hour,date_time.min, date_time.sec)
352
354
  else
353
355
  DateTime.new(date_time.year - (@n-1), 1, 1)
@@ -360,7 +362,7 @@ module Daru
360
362
  end
361
363
 
362
364
  # Create a year end offset
363
- #
365
+ #
364
366
  # @param n [Integer] The number of times an offset should be applied.
365
367
  # @example Create a YearEnd offset
366
368
  # offset = Daru::Offsets::YearEnd.new
@@ -394,4 +396,6 @@ module Daru
394
396
  end
395
397
  end
396
398
  end
397
- end
399
+
400
+ # rubocop:enable Style/OpMethod
401
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Daru
4
4
  class DataFrame
5
- def to_REXP
5
+ def to_REXP # rubocop:disable Style/MethodName
6
6
  names = @vectors.to_a
7
7
  data = names.map do |f|
8
8
  Rserve::REXP::Wrapper.wrap(self[f].to_a)
@@ -14,8 +14,8 @@ module Daru
14
14
  end
15
15
 
16
16
  class Vector
17
- def to_REXP
18
- Rserve::REXP::Wrapper.wrap(self.to_a)
17
+ def to_REXP # rubocop:disable Style/MethodName
18
+ Rserve::REXP::Wrapper.wrap(to_a)
19
19
  end
20
20
  end
21
- end
21
+ end
@@ -3,14 +3,14 @@ module Daru
3
3
  include Enumerable
4
4
  # It so happens that over riding the .new method in a super class also
5
5
  # tampers with the default .new method for class that inherit from the
6
- # super class (Index in this case). Thus we first alias the original
7
- # new method (from Object) to __new__ when the Index class is evaluated,
6
+ # super class (Index in this case). Thus we first alias the original
7
+ # new method (from Object) to __new__ when the Index class is evaluated,
8
8
  # and then we use an inherited hook such that the old new method (from
9
9
  # Object) is once again the default .new for the subclass.
10
10
  # Refer http://blog.sidu.in/2007/12/rubys-new-as-factory.html
11
11
  class << self
12
12
  alias :__new__ :new
13
-
13
+
14
14
  def inherited subclass
15
15
  class << subclass
16
16
  alias :new :__new__
@@ -23,19 +23,14 @@ module Daru
23
23
  def self.new *args, &block
24
24
  source = args[0]
25
25
 
26
- idx =
27
- if source and source[0].is_a?(Array)
26
+ if source.respond_to?(:first) && source.first.is_a?(Array)
28
27
  Daru::MultiIndex.from_tuples source
29
- elsif source and source.is_a?(Array) and !source.empty? and
30
- source.all? { |e| e.is_a?(DateTime) }
28
+ elsif source && source.is_a?(Array) && !source.empty? &&
29
+ source.all? { |e| e.is_a?(DateTime) }
31
30
  Daru::DateTimeIndex.new(source, freq: :infer)
32
31
  else
33
- i = self.allocate
34
- i.send :initialize, *args, &block
35
- i
32
+ allocate.tap { |i| i.send :initialize, *args, &block }
36
33
  end
37
-
38
- idx
39
34
  end
40
35
 
41
36
  def each(&block)
@@ -50,41 +45,54 @@ module Daru
50
45
  attr_reader :relation_hash, :size
51
46
 
52
47
  def initialize index
53
- index = 0 if index.nil?
54
- index = Array.new(index) { |i| i} if index.is_a? Integer
55
- index = index.to_a if index.is_a? Daru::Index
56
-
57
- @relation_hash = {}
58
- index.each_with_index do |n, idx|
59
- @relation_hash[n] = idx
60
- end
48
+ index =
49
+ case index
50
+ when nil
51
+ []
52
+ when Integer
53
+ index.times.to_a
54
+ when Enumerable
55
+ index.to_a
56
+ else
57
+ raise ArgumentError,
58
+ "Cannot create index from #{index.class} #{index.inspect}"
59
+ end
61
60
 
62
- @relation_hash.freeze
61
+ @relation_hash = index.each_with_index.to_h.freeze
63
62
  @keys = @relation_hash.keys
64
63
  @size = @relation_hash.size
65
64
  end
66
65
 
67
66
  def ==(other)
68
- return false if self.class != other.class or other.size != @size
67
+ return false if self.class != other.class || other.size != @size
69
68
 
70
- @relation_hash.keys == other.to_a and
71
- @relation_hash.values == other.relation_hash.values
69
+ @relation_hash.keys == other.to_a &&
70
+ @relation_hash.values == other.relation_hash.values
72
71
  end
73
72
 
74
73
  def [](*key)
75
74
  loc = key[0]
76
75
 
77
- case
76
+ case
78
77
  when loc.is_a?(Range)
79
78
  first = loc.first
80
79
  last = loc.last
81
80
 
82
81
  slice first, last
83
82
  when key.size > 1
84
- Daru::Index.new key.map { |k| self[k] }
83
+ if include? key[0]
84
+ Daru::Index.new key.map { |k| k }
85
+ else
86
+ # Assume the user is specifing values for index not keys
87
+ # Return index object having keys corresponding to values provided
88
+ Daru::Index.new key.map { |k| key k }
89
+ end
85
90
  else
86
91
  v = @relation_hash[loc]
87
- return loc if v.nil?
92
+ unless v
93
+ return loc if loc.is_a?(Numeric) && loc < size
94
+ raise IndexError, "Specified index #{loc.inspect} does not exist"
95
+ end
88
96
  v
89
97
  end
90
98
  end
@@ -92,9 +100,8 @@ module Daru
92
100
  def slice *args
93
101
  start = args[0]
94
102
  en = args[1]
95
- indexes = []
96
103
 
97
- if start.is_a?(Integer) and en.is_a?(Integer)
104
+ if start.is_a?(Integer) && en.is_a?(Integer)
98
105
  Index.new @keys[start..en]
99
106
  else
100
107
  start_idx = @relation_hash[start]
@@ -111,7 +118,6 @@ module Daru
111
118
 
112
119
  # Produce a new index from the set intersection of two indexes
113
120
  def & other
114
-
115
121
  end
116
122
 
117
123
  def to_a
@@ -123,7 +129,7 @@ module Daru
123
129
  end
124
130
 
125
131
  def include? index
126
- @relation_hash.has_key? index
132
+ @relation_hash.key? index
127
133
  end
128
134
 
129
135
  def empty?
@@ -134,8 +140,8 @@ module Daru
134
140
  Daru::Index.new @relation_hash.keys
135
141
  end
136
142
 
137
- def _dump depth
138
- Marshal.dump({relation_hash: @relation_hash})
143
+ def _dump(*)
144
+ Marshal.dump(relation_hash: @relation_hash)
139
145
  end
140
146
 
141
147
  def self._load data
@@ -143,13 +149,21 @@ module Daru
143
149
 
144
150
  Daru::Index.new(h[:relation_hash].keys)
145
151
  end
152
+
153
+ # Provide an Index for sub vector produced
154
+ #
155
+ # @param input_indexes [Array] the input by user to index the vector
156
+ # @return [Object] the Index object for sub vector produced
157
+ def conform(*)
158
+ self
159
+ end
146
160
  end # class Index
147
161
 
148
162
  class MultiIndex < Index
149
163
  include Enumerable
150
164
 
151
165
  def each(&block)
152
- to_a.each(&block)
166
+ to_a.each(&block)
153
167
  end
154
168
 
155
169
  def map(&block)
@@ -159,28 +173,27 @@ module Daru
159
173
  attr_reader :labels
160
174
 
161
175
  def levels
162
- @levels.map { |e| e.keys }
176
+ @levels.map(&:keys)
163
177
  end
164
178
 
165
179
  def initialize opts={}
166
180
  labels = opts[:labels]
167
181
  levels = opts[:levels]
168
182
 
169
- raise ArgumentError,
170
- "Must specify both labels and levels" unless labels and levels
171
183
  raise ArgumentError,
172
- "Labels and levels should be same size" if labels.size != levels.size
184
+ 'Must specify both labels and levels' unless labels && levels
185
+ raise ArgumentError,
186
+ 'Labels and levels should be same size' if labels.size != levels.size
173
187
  raise ArgumentError,
174
- "Incorrect labels and levels" if incorrect_fields?(labels, levels)
188
+ 'Incorrect labels and levels' if incorrect_fields?(labels, levels)
175
189
 
176
190
  @labels = labels
177
- @levels = levels.map { |e| Hash[e.map.with_index.to_a]}
191
+ @levels = levels.map { |e| Hash[e.map.with_index.to_a] }
178
192
  end
179
193
 
180
- def incorrect_fields? labels, levels
181
- max_level = levels[0].size
194
+ def incorrect_fields?(_labels, levels)
195
+ levels[0].size # FIXME: without this call everything fails
182
196
 
183
- correct = labels.all? { |e| e.size == max_level }
184
197
  correct = levels.all? { |e| e.uniq.size == e.size }
185
198
 
186
199
  !correct
@@ -189,7 +202,7 @@ module Daru
189
202
  private :incorrect_fields?
190
203
 
191
204
  def self.from_arrays arrays
192
- levels = arrays.map { |e| e.uniq.sort_by { |a| a.to_s } }
205
+ levels = arrays.map { |e| e.uniq.sort_by(&:to_s) }
193
206
  labels = []
194
207
 
195
208
  arrays.each_with_index do |arry, level_index|
@@ -214,12 +227,17 @@ module Daru
214
227
  case
215
228
  when key[0].is_a?(Range) then retrieve_from_range(key[0])
216
229
  when (key[0].is_a?(Integer) and key.size == 1) then try_retrieve_from_integer(key[0])
217
- else retrieve_from_tuples(key)
230
+ else
231
+ begin
232
+ retrieve_from_tuples key
233
+ rescue NoMethodError
234
+ raise IndexError, "Specified index #{key.inspect} do not exist"
235
+ end
218
236
  end
219
237
  end
220
238
 
221
239
  def try_retrieve_from_integer int
222
- return retrieve_from_tuples([int]) if @levels[0].has_key?(int)
240
+ return retrieve_from_tuples([int]) if @levels[0].key?(int)
223
241
  int
224
242
  end
225
243
 
@@ -232,12 +250,13 @@ module Daru
232
250
 
233
251
  key.each_with_index do |k, depth|
234
252
  level_index = @levels[depth][k]
253
+ raise IndexError, "Specified index #{key.inspect} do not exist" if level_index.nil?
235
254
  label = @labels[depth]
236
255
  chosen = find_all_indexes label, level_index, chosen
237
256
  end
238
257
 
239
- return chosen[0] if chosen.size == 1
240
- return multi_index_from_multiple_selections(chosen)
258
+ return chosen[0] if chosen.size == 1 && key.size == @levels.size
259
+ multi_index_from_multiple_selections(chosen)
241
260
  end
242
261
 
243
262
  def multi_index_from_multiple_selections chosen
@@ -246,14 +265,11 @@ module Daru
246
265
 
247
266
  def find_all_indexes label, level_index, chosen
248
267
  if chosen.empty?
249
- label.each_with_index do |lbl, i|
250
- if lbl == level_index then chosen << i end
251
- end
268
+ label.each_with_index
269
+ .select { |lbl, _| lbl == level_index }.map(&:last)
252
270
  else
253
271
  chosen.keep_if { |c| label[c] == level_index }
254
272
  end
255
-
256
- chosen
257
273
  end
258
274
 
259
275
  private :find_all_indexes, :multi_index_from_multiple_selections,
@@ -263,11 +279,10 @@ module Daru
263
279
  raise ArgumentError,
264
280
  "Key #{index} is too large" if index >= @labels[0].size
265
281
 
266
- level_indexes =
267
- @labels.inject([]) do |memo, label|
268
- memo << label[index]
269
- memo
270
- end
282
+ level_indexes =
283
+ @labels.each_with_object([]) do |label, memo|
284
+ memo << label[index]
285
+ end
271
286
 
272
287
  tuple = []
273
288
  level_indexes.each_with_index do |level_index, i|
@@ -294,7 +309,7 @@ module Daru
294
309
  end
295
310
 
296
311
  def empty?
297
- @labels.flatten.empty? and @levels.all? { |l| l.empty? }
312
+ @labels.flatten.empty? and @levels.all?(&:empty?)
298
313
  end
299
314
 
300
315
  def include? tuple
@@ -314,9 +329,9 @@ module Daru
314
329
  end
315
330
 
316
331
  def == other
317
- self.class == other.class and
318
- labels == other.labels and
319
- levels == other.levels
332
+ self.class == other.class &&
333
+ labels == other.labels &&
334
+ levels == other.levels
320
335
  end
321
336
 
322
337
  def to_a
@@ -328,7 +343,16 @@ module Daru
328
343
  end
329
344
 
330
345
  def inspect
331
- "Daru::MultiIndex:#{self.object_id} (levels: #{levels}\nlabels: #{labels})"
346
+ "Daru::MultiIndex:#{object_id} (levels: #{levels}\nlabels: #{labels})"
347
+ end
348
+
349
+ # Provide a MultiIndex for sub vector produced
350
+ #
351
+ # @param input_indexes [Array] the input by user to index the vector
352
+ # @return [Object] the MultiIndex object for sub vector produced
353
+ def conform input_indexes
354
+ return self if input_indexes[0].is_a? Range
355
+ drop_left_level input_indexes.size
332
356
  end
333
357
  end
334
- end
358
+ end