daru 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +6 -6
  3. data/.gitignore +2 -0
  4. data/CONTRIBUTING.md +7 -3
  5. data/History.md +36 -0
  6. data/README.md +21 -13
  7. data/Rakefile +16 -1
  8. data/benchmarks/TradeoffData.csv +65 -0
  9. data/benchmarks/dataframe_creation.rb +39 -0
  10. data/benchmarks/group_by.rb +32 -0
  11. data/benchmarks/row_access.rb +41 -0
  12. data/benchmarks/row_assign.rb +36 -0
  13. data/benchmarks/sorting.rb +44 -0
  14. data/benchmarks/vector_access.rb +31 -0
  15. data/benchmarks/vector_assign.rb +42 -0
  16. data/benchmarks/where_clause.rb +48 -0
  17. data/benchmarks/where_vs_filter.rb +28 -0
  18. data/daru.gemspec +29 -5
  19. data/lib/daru.rb +30 -1
  20. data/lib/daru/accessors/array_wrapper.rb +2 -2
  21. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
  22. data/lib/daru/core/group_by.rb +112 -31
  23. data/lib/daru/core/merge.rb +170 -0
  24. data/lib/daru/core/query.rb +95 -0
  25. data/lib/daru/dataframe.rb +335 -223
  26. data/lib/daru/date_time/index.rb +550 -0
  27. data/lib/daru/date_time/offsets.rb +397 -0
  28. data/lib/daru/index.rb +266 -54
  29. data/lib/daru/io/io.rb +1 -2
  30. data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
  31. data/lib/daru/maths/arithmetic/vector.rb +2 -2
  32. data/lib/daru/maths/statistics/dataframe.rb +58 -8
  33. data/lib/daru/maths/statistics/vector.rb +229 -0
  34. data/lib/daru/vector.rb +230 -80
  35. data/lib/daru/version.rb +1 -1
  36. data/spec/core/group_by_spec.rb +16 -16
  37. data/spec/core/merge_spec.rb +52 -0
  38. data/spec/core/query_spec.rb +171 -0
  39. data/spec/dataframe_spec.rb +278 -280
  40. data/spec/date_time/data_spec.rb +199 -0
  41. data/spec/date_time/index_spec.rb +433 -0
  42. data/spec/date_time/offsets_spec.rb +371 -0
  43. data/spec/fixtures/stock_data.csv +500 -0
  44. data/spec/index_spec.rb +317 -11
  45. data/spec/io/io_spec.rb +18 -17
  46. data/spec/math/arithmetic/dataframe_spec.rb +3 -3
  47. data/spec/math/statistics/dataframe_spec.rb +39 -1
  48. data/spec/math/statistics/vector_spec.rb +163 -1
  49. data/spec/monkeys_spec.rb +4 -0
  50. data/spec/spec_helper.rb +3 -0
  51. data/spec/vector_spec.rb +125 -60
  52. metadata +71 -14
  53. data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
  54. data/lib/daru/multi_index.rb +0 -216
  55. data/spec/multi_index_spec.rb +0 -216
@@ -1,11 +1,46 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::Index do
4
+ context ".new" do
5
+ it "creates an Index object if Index-like data is supplied" do
6
+ i = Daru::Index.new [:one, 'one', 1, 2, :two]
7
+ expect(i.class).to eq(Daru::Index)
8
+ expect(i.to_a) .to eq([:one, 'one', 1, 2, :two])
9
+ end
10
+
11
+ it "creates a MultiIndex if tuples are supplied" do
12
+ i = Daru::Index.new([
13
+ [:b,:one,:bar],
14
+ [:b,:two,:bar],
15
+ [:b,:two,:baz],
16
+ [:b,:one,:foo]
17
+ ])
18
+
19
+ expect(i.class).to eq(Daru::MultiIndex)
20
+ expect(i.levels).to eq([[:b], [:one, :two], [:bar, :baz, :foo]])
21
+ expect(i.labels).to eq([[0,0,0,0],[0,1,1,0],[0,0,1,2]])
22
+ end
23
+
24
+ it "creates DateTimeIndex if date-like objects specified" do
25
+ i = Daru::Index.new([
26
+ DateTime.new(2012,2,4), DateTime.new(2012,2,5), DateTime.new(2012,2,6)])
27
+ expect(i.class).to eq(Daru::DateTimeIndex)
28
+ expect(i.to_a).to eq([DateTime.new(2012,2,4), DateTime.new(2012,2,5), DateTime.new(2012,2,6)])
29
+ expect(i.frequency).to eq('D')
30
+ end
31
+ end
32
+
4
33
  context "#initialize" do
5
34
  it "creates an Index from Array" do
6
35
  idx = Daru::Index.new ['speaker', 'mic', 'guitar', 'amp']
7
36
 
8
- expect(idx.to_a).to eq([:speaker, :mic, :guitar, :amp])
37
+ expect(idx.to_a).to eq(['speaker', 'mic', 'guitar', 'amp'])
38
+ end
39
+
40
+ it "accepts all sorts of objects for Indexing" do
41
+ idx = Daru::Index.new [:a, 'a', :hello, '23', 23]
42
+
43
+ expect(idx.to_a).to eq([:a, 'a', :hello, '23', 23])
9
44
  end
10
45
  end
11
46
 
@@ -17,18 +52,23 @@ describe Daru::Index do
17
52
  end
18
53
  end
19
54
 
20
- context "#+" do
55
+ context "#&" do
56
+ it "returns an intersection of 2 index objects" do
57
+ end
58
+ end
59
+
60
+ context "#|" do
21
61
  before :each do
22
62
  @left = Daru::Index.new [:miles, :geddy, :eric]
23
63
  @right = Daru::Index.new [:bob, :jimi, :richie]
24
64
  end
25
65
 
26
- it "adds 2 indexes and returns an Index" do
27
- expect(@left + @right).to eq([:miles, :geddy, :eric, :bob, :jimi, :richie].to_index)
66
+ it "unions 2 indexes and returns an Index" do
67
+ expect(@left | @right).to eq([:miles, :geddy, :eric, :bob, :jimi, :richie].to_index)
28
68
  end
29
69
 
30
- it "adds an Index and an Array to return an Index" do
31
- expect(@left + [:bob, :jimi, :richie]).to eq([:miles, :geddy, :eric,
70
+ it "unions an Index and an Array to return an Index" do
71
+ expect(@left | [:bob, :jimi, :richie]).to eq([:miles, :geddy, :eric,
32
72
  :bob, :jimi, :richie].to_index)
33
73
  end
34
74
  end
@@ -36,16 +76,282 @@ describe Daru::Index do
36
76
  context "#[]" do
37
77
  before do
38
78
  @id = Daru::Index.new [:one, :two, :three, :four, :five, :six, :seven]
79
+ @mixed_id = Daru::Index.new ['a','b','c',:d,:a,0,3,5]
39
80
  end
40
81
 
41
82
  it "works with ranges" do
42
- expect(@id[:two..:five]).to eq(Daru::Index.new([:two, :three, :four, :five],
43
- [1,2,3,4]))
83
+ expect(@id[:two..:five]).to eq(Daru::Index.new([:two, :three, :four, :five]))
84
+
85
+ expect(@mixed_id['a'..'c']).to eq(Daru::Index.new(['a','b','c']))
86
+
87
+ # If both start and end are numbers then refer to numerical indexes
88
+ expect(@mixed_id[0..2]).to eq(Daru::Index.new(['a','b','c']))
89
+
90
+ # If atleast one is a number then refer to actual indexing
91
+ expect(@mixed_id.slice('b',0)).to eq(Daru::Index.new(['b','c',:d,:a,0]))
44
92
  end
45
93
 
46
94
  it "returns multiple keys if specified multiple indices" do
47
- expect(@id[[0,1,3,4]]).to eq(Daru::Index.new([:one, :two, :four, :five],
48
- [0,1,3,4]))
95
+ expect(@id[0,1,3,4]).to eq(Daru::Index.new([0,1,3,4]))
96
+ expect(@mixed_id[0,5,3,2]).to eq(Daru::Index.new([5, 7, 6, 2]))
97
+ end
98
+
99
+ it "returns correct index position for non-numeric index" do
100
+ expect(@id[:four]).to eq(3)
101
+ expect(@id[3]).to eq(3)
102
+ end
103
+
104
+ it "returns correct index position for mixed index" do
105
+ expect(@mixed_id[0]).to eq(5)
106
+ expect(@mixed_id['c']).to eq(2)
107
+ end
108
+ end
109
+ end
110
+
111
+ describe Daru::MultiIndex do
112
+ before(:each) do
113
+ @index_tuples = [
114
+ [:a,:one,:bar],
115
+ [:a,:one,:baz],
116
+ [:a,:two,:bar],
117
+ [:a,:two,:baz],
118
+ [:b,:one,:bar],
119
+ [:b,:two,:bar],
120
+ [:b,:two,:baz],
121
+ [:b,:one,:foo],
122
+ [:c,:one,:bar],
123
+ [:c,:one,:baz],
124
+ [:c,:two,:foo],
125
+ [:c,:two,:bar]
126
+ ]
127
+ @multi_mi = Daru::MultiIndex.from_tuples(@index_tuples)
128
+ end
129
+
130
+ context ".initialize" do
131
+ it "accepts labels and levels as arguments" do
132
+ mi = Daru::MultiIndex.new(
133
+ levels: [[:a,:b,:c], [:one, :two]],
134
+ labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]])
135
+
136
+ expect(mi[:a, :two]).to eq(1)
137
+ end
138
+
139
+ it "raises error for wrong number of labels or levels" do
140
+ expect {
141
+ Daru::MultiIndex.new(
142
+ levels: [[:a,:a,:b,:b,:c,:c], [:one, :two]],
143
+ labels: [[0,0,1,1,2,2]])
144
+ }.to raise_error
145
+ end
146
+ end
147
+
148
+ context ".from_tuples" do
149
+ it "creates 2 layer MultiIndex from tuples" do
150
+ tuples = [
151
+ [:a, :one],
152
+ [:a, :two],
153
+ [:b, :one],
154
+ [:b, :two],
155
+ [:c, :one],
156
+ [:c, :two]
157
+ ]
158
+ mi = Daru::MultiIndex.from_tuples(tuples)
159
+ expect(mi.levels).to eq([[:a, :b, :c], [:one,:two]])
160
+ expect(mi.labels).to eq([[0,0,1,1,2,2], [0,1,0,1,0,1]])
161
+ end
162
+
163
+ it "creates a triple layer MultiIndex from tuples" do
164
+ expect(@multi_mi.levels).to eq([[:a,:b,:c], [:one, :two],[:bar,:baz,:foo]])
165
+ expect(@multi_mi.labels).to eq([
166
+ [0,0,0,0,1,1,1,1,2,2,2,2],
167
+ [0,0,1,1,0,1,1,0,0,0,1,1],
168
+ [0,1,0,1,0,0,1,2,0,1,2,0]
169
+ ])
170
+ end
171
+ end
172
+
173
+ context "#size" do
174
+ it "returns size of MultiIndex" do
175
+ expect(@multi_mi.size).to eq(12)
176
+ end
177
+ end
178
+
179
+ context "#[]" do
180
+ it "returns the row number when specifying the complete tuple" do
181
+ expect(@multi_mi[:a, :one, :baz]).to eq(1)
182
+ end
183
+
184
+ it "returns MultiIndex when specifying incomplete tuple" do
185
+ expect(@multi_mi[:b]).to eq(Daru::MultiIndex.from_tuples([
186
+ [:b,:one,:bar],
187
+ [:b,:two,:bar],
188
+ [:b,:two,:baz],
189
+ [:b,:one,:foo]
190
+ ]))
191
+ expect(@multi_mi[:b, :one]).to eq(Daru::MultiIndex.from_tuples([
192
+ [:b,:one,:bar],
193
+ [:b,:one,:foo]
194
+ ]))
195
+ # TODO: Return Daru::Index if a single layer of indexes is present.
196
+ end
197
+
198
+ it "returns MultiIndex when specifying wholly numeric ranges" do
199
+ expect(@multi_mi[3..6]).to eq(Daru::MultiIndex.from_tuples([
200
+ [:a,:two,:baz],
201
+ [:b,:one,:bar],
202
+ [:b,:two,:bar],
203
+ [:b,:two,:baz]
204
+ ]))
205
+ end
206
+
207
+ it "works with numerical first levels" do
208
+ mi = Daru::MultiIndex.from_tuples([
209
+ [2000, 'M'],
210
+ [2000, 'F'],
211
+ [2001, 'M'],
212
+ [2001, 'F']
213
+ ])
214
+
215
+ expect(mi[2000]).to eq(Daru::MultiIndex.from_tuples([
216
+ [2000, 'M'],
217
+ [2000, 'F']
218
+ ]))
219
+
220
+ expect(mi[2000,'M']).to eq(0)
221
+ end
222
+ end
223
+
224
+ context "#include?" do
225
+ it "checks if a completely specified tuple exists" do
226
+ expect(@multi_mi.include?([:a,:one,:bar])).to eq(true)
227
+ end
228
+
229
+ it "checks if a top layer incomplete tuple exists" do
230
+ expect(@multi_mi.include?([:a])).to eq(true)
231
+ end
232
+
233
+ it "checks if a middle layer incomplete tuple exists" do
234
+ expect(@multi_mi.include?([:a, :one])).to eq(true)
235
+ end
236
+
237
+ it "checks for non-existence of a tuple" do
238
+ expect(@multi_mi.include?([:boo])).to eq(false)
239
+ end
240
+ end
241
+
242
+ context "#key" do
243
+ it "returns the tuple of the specified number" do
244
+ expect(@multi_mi.key(3)).to eq([:a,:two,:baz])
245
+ end
246
+
247
+ it "returns nil for non-existent pointer number" do
248
+ expect {
249
+ @multi_mi.key(100)
250
+ }.to raise_error ArgumentError
251
+ end
252
+ end
253
+
254
+ context "#to_a" do
255
+ it "returns tuples as an Array" do
256
+ expect(@multi_mi.to_a).to eq(@index_tuples)
257
+ end
258
+ end
259
+
260
+ context "#dup" do
261
+ it "completely duplicates the object" do
262
+ duplicate = @multi_mi.dup
263
+ expect(duplicate) .to eq(@multi_mi)
264
+ expect(duplicate.object_id).to_not eq(@multi_mi.object_id)
265
+ end
266
+ end
267
+
268
+ context "#==" do
269
+ it "returns false for unequal MultiIndex comparisons" do
270
+ mi1 = Daru::MultiIndex.from_tuples([
271
+ [:a, :one, :bar],
272
+ [:a, :two, :baz],
273
+ [:b, :one, :foo],
274
+ [:b, :two, :bar]
275
+ ])
276
+ mi2 = Daru::MultiIndex.from_tuples([
277
+ [:a, :two, :bar],
278
+ [:b, :one, :foo],
279
+ [:a, :one, :baz],
280
+ [:b, :two, :baz]
281
+ ])
282
+
283
+ expect(mi1 == mi2).to eq(false)
284
+ end
285
+ end
286
+
287
+ context "#values" do
288
+ it "returns an array of indices in order" do
289
+ mi = Daru::MultiIndex.from_tuples([
290
+ [:a, :one, :bar],
291
+ [:a, :two, :baz],
292
+ [:b, :one, :foo],
293
+ [:b, :two, :bar]
294
+ ])
295
+
296
+ expect(mi.values).to eq([0,1,2,3])
297
+ end
298
+ end
299
+
300
+ context "#|" do
301
+ before do
302
+ @mi1 = Daru::MultiIndex.from_tuples([
303
+ [:a, :one, :bar],
304
+ [:a, :two, :baz],
305
+ [:b, :one, :foo],
306
+ [:b, :two, :bar]
307
+ ])
308
+ @mi2 = Daru::MultiIndex.from_tuples([
309
+ [:a, :two, :bar],
310
+ [:b, :one, :foo],
311
+ [:a, :one, :baz],
312
+ [:b, :two, :baz]
313
+ ])
314
+ end
315
+
316
+ it "returns a union of two MultiIndex objects" do
317
+ expect(@mi1 | @mi2).to eq(Daru::MultiIndex.new(
318
+ levels: [[:a, :b], [:one, :two], [:bar, :baz, :foo]],
319
+ labels: [
320
+ [0, 0, 1, 1, 0, 0, 1],
321
+ [0, 1, 0, 1, 1, 0, 1],
322
+ [0, 1, 2, 0, 0, 1, 1]
323
+ ])
324
+ )
325
+ end
326
+ end
327
+
328
+ context "#&" do
329
+ it "returns the intersection of two MI objects" do
330
+ end
331
+ end
332
+
333
+ context "#empty?" do
334
+ it "returns true if nothing present in MultiIndex" do
335
+ expect(Daru::MultiIndex.new(labels: [[]], levels: [[]]).empty?).to eq(true)
336
+ end
337
+ end
338
+
339
+ context "#drop_left_level" do
340
+ it "drops the leftmost level" do
341
+ expect(
342
+ Daru::MultiIndex.from_tuples([
343
+ [:c,:one,:bar],
344
+ [:c,:one,:baz],
345
+ [:c,:two,:foo],
346
+ [:c,:two,:bar]
347
+ ]).drop_left_level).to eq(
348
+ Daru::MultiIndex.from_tuples([
349
+ [:one,:bar],
350
+ [:one,:baz],
351
+ [:two,:foo],
352
+ [:two,:bar]
353
+ ])
354
+ )
49
355
  end
50
356
  end
51
- end
357
+ end
@@ -7,24 +7,25 @@ describe Daru::IO do
7
7
  df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
8
8
  col_sep: ' ', headers: true)
9
9
 
10
+ df.vectors = [:image_resolution, :mls, :true_transform].to_index
10
11
  expect(df.vectors).to eq([:image_resolution, :mls, :true_transform].to_index)
11
- expect(df.vector[:image_resolution].first).to eq(6.55779)
12
- expect(df.vector[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
12
+ expect(df[:image_resolution].first).to eq(6.55779)
13
+ expect(df[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
13
14
  end
14
15
 
15
- it "works properly for repeated headers", focus: true do
16
- df = Daru::DataFrame.from_csv('spec/fixtures/repeated_fields.csv')
17
- expect(df.vectors.to_a).to eq([:a1, :age_1, :age_2, :city, :id, :name_1, :name_2])
16
+ it "works properly for repeated headers" do
17
+ df = Daru::DataFrame.from_csv('spec/fixtures/repeated_fields.csv',header_converters: :symbol)
18
+ expect(df.vectors.to_a).to eq(['a1', 'age_1', 'age_2', 'city', 'id', 'name_1', 'name_2'])
18
19
 
19
20
  age = Daru::Vector.new([3, 4, 5, 6, nil, 8])
20
- expect(df[:age_2]).to eq(age)
21
+ expect(df['age_2']).to eq(age)
21
22
  end
22
23
 
23
24
  it "accepts scientific notation as float" do
24
25
  ds = Daru::DataFrame.from_csv('spec/fixtures/scientific_notation.csv')
25
- expect(ds.vectors.to_a).to eq([:x, :y])
26
+ expect(ds.vectors.to_a).to eq(['x', 'y'])
26
27
  y = [9.629587310436753e+127, 1.9341543147883677e+129, 3.88485279048245e+130]
27
- y.zip(ds[:y]).each do |y_expected, y_ds|
28
+ y.zip(ds['y']).each do |y_expected, y_ds|
28
29
  expect(y_ds).to be_within(0.001).of(y_expected)
29
30
  end
30
31
  end
@@ -33,10 +34,10 @@ describe Daru::IO do
33
34
  context "#write_csv" do
34
35
  it "writes DataFrame to a CSV file" do
35
36
  df = Daru::DataFrame.new({
36
- a: [1,2,3,4,5],
37
- b: [11,22,33,44,55],
38
- c: ['a', 'g', 4, 5,'addadf'],
39
- d: [nil, 23, 4,'a','ff']})
37
+ 'a' => [1,2,3,4,5],
38
+ 'b' => [11,22,33,44,55],
39
+ 'c' => ['a', 'g', 4, 5,'addadf'],
40
+ 'd' => [nil, 23, 4,'a','ff']})
40
41
  t = Tempfile.new('data.csv')
41
42
  df.write_csv t.path
42
43
 
@@ -111,11 +112,11 @@ describe Daru::IO do
111
112
  df = Daru::DataFrame.new JSON.parse(json)
112
113
 
113
114
  expect(df.vectors).to eq([
114
- :name, :nativeName, :tld, :cca2, :ccn3, :cca3, :currency, :callingCode,
115
- :capital, :altSpellings, :relevance, :region, :subregion, :language,
116
- :languageCodes, :translations, :latlng, :demonym, :borders, :area].to_index)
115
+ 'name', 'nativeName', 'tld', 'cca2', 'ccn3', 'cca3', 'currency', 'callingCode',
116
+ 'capital', 'altSpellings', 'relevance', 'region', 'subregion', 'language',
117
+ 'languageCodes', 'translations', 'latlng', 'demonym', 'borders', 'area'].to_index)
117
118
 
118
- expect(df.row[0][:name]).to eq("Afghanistan")
119
+ expect(df.row[0]['name']).to eq("Afghanistan")
119
120
  end
120
121
  end
121
122
 
@@ -169,7 +170,7 @@ describe Daru::IO do
169
170
  describe Daru::Index do
170
171
  context "Marshalling" do
171
172
  it "" do
172
- i = Daru::Index.new([:a, :b, :c, :d, :e], [8,6,4,3,5])
173
+ i = Daru::Index.new([:a, :b, :c, :d, :e])
173
174
  expect(Marshal.load(Marshal.dump(i))).to eq(i)
174
175
  end
175
176
  end
@@ -18,8 +18,8 @@ describe Daru::DataFrame do
18
18
 
19
19
  it "adds two dataframes to produce a third" do
20
20
  expect(@left + @right).to eq(Daru::DataFrame.new({
21
- a: [2,nil,nil,8,nil,nil,nil],
22
- b: [20,nil,nil,80,nil,nil,nil],
21
+ a: [2,nil,nil,8,nil,nil],
22
+ b: [20,nil,nil,80,nil,nil],
23
23
  c: [nil,nil,nil,nil,nil,nil]
24
24
  }, index: [0,1,2,3,4,5,6]))
25
25
  end
@@ -66,7 +66,7 @@ describe Daru::DataFrame do
66
66
  end
67
67
  end
68
68
 
69
- context "#round" do
69
+ context "#round", focus: true do
70
70
  it "rounds to precision" do
71
71
  df = Daru::DataFrame.new({
72
72
  a: [1.3434,2.4332,5.6655,12.3344,32.233],