daru 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +6 -6
  3. data/.gitignore +2 -0
  4. data/CONTRIBUTING.md +7 -3
  5. data/History.md +36 -0
  6. data/README.md +21 -13
  7. data/Rakefile +16 -1
  8. data/benchmarks/TradeoffData.csv +65 -0
  9. data/benchmarks/dataframe_creation.rb +39 -0
  10. data/benchmarks/group_by.rb +32 -0
  11. data/benchmarks/row_access.rb +41 -0
  12. data/benchmarks/row_assign.rb +36 -0
  13. data/benchmarks/sorting.rb +44 -0
  14. data/benchmarks/vector_access.rb +31 -0
  15. data/benchmarks/vector_assign.rb +42 -0
  16. data/benchmarks/where_clause.rb +48 -0
  17. data/benchmarks/where_vs_filter.rb +28 -0
  18. data/daru.gemspec +29 -5
  19. data/lib/daru.rb +30 -1
  20. data/lib/daru/accessors/array_wrapper.rb +2 -2
  21. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
  22. data/lib/daru/core/group_by.rb +112 -31
  23. data/lib/daru/core/merge.rb +170 -0
  24. data/lib/daru/core/query.rb +95 -0
  25. data/lib/daru/dataframe.rb +335 -223
  26. data/lib/daru/date_time/index.rb +550 -0
  27. data/lib/daru/date_time/offsets.rb +397 -0
  28. data/lib/daru/index.rb +266 -54
  29. data/lib/daru/io/io.rb +1 -2
  30. data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
  31. data/lib/daru/maths/arithmetic/vector.rb +2 -2
  32. data/lib/daru/maths/statistics/dataframe.rb +58 -8
  33. data/lib/daru/maths/statistics/vector.rb +229 -0
  34. data/lib/daru/vector.rb +230 -80
  35. data/lib/daru/version.rb +1 -1
  36. data/spec/core/group_by_spec.rb +16 -16
  37. data/spec/core/merge_spec.rb +52 -0
  38. data/spec/core/query_spec.rb +171 -0
  39. data/spec/dataframe_spec.rb +278 -280
  40. data/spec/date_time/data_spec.rb +199 -0
  41. data/spec/date_time/index_spec.rb +433 -0
  42. data/spec/date_time/offsets_spec.rb +371 -0
  43. data/spec/fixtures/stock_data.csv +500 -0
  44. data/spec/index_spec.rb +317 -11
  45. data/spec/io/io_spec.rb +18 -17
  46. data/spec/math/arithmetic/dataframe_spec.rb +3 -3
  47. data/spec/math/statistics/dataframe_spec.rb +39 -1
  48. data/spec/math/statistics/vector_spec.rb +163 -1
  49. data/spec/monkeys_spec.rb +4 -0
  50. data/spec/spec_helper.rb +3 -0
  51. data/spec/vector_spec.rb +125 -60
  52. metadata +71 -14
  53. data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
  54. data/lib/daru/multi_index.rb +0 -216
  55. data/spec/multi_index_spec.rb +0 -216
@@ -1,11 +1,46 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::Index do
4
+ context ".new" do
5
+ it "creates an Index object if Index-like data is supplied" do
6
+ i = Daru::Index.new [:one, 'one', 1, 2, :two]
7
+ expect(i.class).to eq(Daru::Index)
8
+ expect(i.to_a) .to eq([:one, 'one', 1, 2, :two])
9
+ end
10
+
11
+ it "creates a MultiIndex if tuples are supplied" do
12
+ i = Daru::Index.new([
13
+ [:b,:one,:bar],
14
+ [:b,:two,:bar],
15
+ [:b,:two,:baz],
16
+ [:b,:one,:foo]
17
+ ])
18
+
19
+ expect(i.class).to eq(Daru::MultiIndex)
20
+ expect(i.levels).to eq([[:b], [:one, :two], [:bar, :baz, :foo]])
21
+ expect(i.labels).to eq([[0,0,0,0],[0,1,1,0],[0,0,1,2]])
22
+ end
23
+
24
+ it "creates DateTimeIndex if date-like objects specified" do
25
+ i = Daru::Index.new([
26
+ DateTime.new(2012,2,4), DateTime.new(2012,2,5), DateTime.new(2012,2,6)])
27
+ expect(i.class).to eq(Daru::DateTimeIndex)
28
+ expect(i.to_a).to eq([DateTime.new(2012,2,4), DateTime.new(2012,2,5), DateTime.new(2012,2,6)])
29
+ expect(i.frequency).to eq('D')
30
+ end
31
+ end
32
+
4
33
  context "#initialize" do
5
34
  it "creates an Index from Array" do
6
35
  idx = Daru::Index.new ['speaker', 'mic', 'guitar', 'amp']
7
36
 
8
- expect(idx.to_a).to eq([:speaker, :mic, :guitar, :amp])
37
+ expect(idx.to_a).to eq(['speaker', 'mic', 'guitar', 'amp'])
38
+ end
39
+
40
+ it "accepts all sorts of objects for Indexing" do
41
+ idx = Daru::Index.new [:a, 'a', :hello, '23', 23]
42
+
43
+ expect(idx.to_a).to eq([:a, 'a', :hello, '23', 23])
9
44
  end
10
45
  end
11
46
 
@@ -17,18 +52,23 @@ describe Daru::Index do
17
52
  end
18
53
  end
19
54
 
20
- context "#+" do
55
+ context "#&" do
56
+ it "returns an intersection of 2 index objects" do
57
+ end
58
+ end
59
+
60
+ context "#|" do
21
61
  before :each do
22
62
  @left = Daru::Index.new [:miles, :geddy, :eric]
23
63
  @right = Daru::Index.new [:bob, :jimi, :richie]
24
64
  end
25
65
 
26
- it "adds 2 indexes and returns an Index" do
27
- expect(@left + @right).to eq([:miles, :geddy, :eric, :bob, :jimi, :richie].to_index)
66
+ it "unions 2 indexes and returns an Index" do
67
+ expect(@left | @right).to eq([:miles, :geddy, :eric, :bob, :jimi, :richie].to_index)
28
68
  end
29
69
 
30
- it "adds an Index and an Array to return an Index" do
31
- expect(@left + [:bob, :jimi, :richie]).to eq([:miles, :geddy, :eric,
70
+ it "unions an Index and an Array to return an Index" do
71
+ expect(@left | [:bob, :jimi, :richie]).to eq([:miles, :geddy, :eric,
32
72
  :bob, :jimi, :richie].to_index)
33
73
  end
34
74
  end
@@ -36,16 +76,282 @@ describe Daru::Index do
36
76
  context "#[]" do
37
77
  before do
38
78
  @id = Daru::Index.new [:one, :two, :three, :four, :five, :six, :seven]
79
+ @mixed_id = Daru::Index.new ['a','b','c',:d,:a,0,3,5]
39
80
  end
40
81
 
41
82
  it "works with ranges" do
42
- expect(@id[:two..:five]).to eq(Daru::Index.new([:two, :three, :four, :five],
43
- [1,2,3,4]))
83
+ expect(@id[:two..:five]).to eq(Daru::Index.new([:two, :three, :four, :five]))
84
+
85
+ expect(@mixed_id['a'..'c']).to eq(Daru::Index.new(['a','b','c']))
86
+
87
+ # If both start and end are numbers then refer to numerical indexes
88
+ expect(@mixed_id[0..2]).to eq(Daru::Index.new(['a','b','c']))
89
+
90
+ # If atleast one is a number then refer to actual indexing
91
+ expect(@mixed_id.slice('b',0)).to eq(Daru::Index.new(['b','c',:d,:a,0]))
44
92
  end
45
93
 
46
94
  it "returns multiple keys if specified multiple indices" do
47
- expect(@id[[0,1,3,4]]).to eq(Daru::Index.new([:one, :two, :four, :five],
48
- [0,1,3,4]))
95
+ expect(@id[0,1,3,4]).to eq(Daru::Index.new([0,1,3,4]))
96
+ expect(@mixed_id[0,5,3,2]).to eq(Daru::Index.new([5, 7, 6, 2]))
97
+ end
98
+
99
+ it "returns correct index position for non-numeric index" do
100
+ expect(@id[:four]).to eq(3)
101
+ expect(@id[3]).to eq(3)
102
+ end
103
+
104
+ it "returns correct index position for mixed index" do
105
+ expect(@mixed_id[0]).to eq(5)
106
+ expect(@mixed_id['c']).to eq(2)
107
+ end
108
+ end
109
+ end
110
+
111
+ describe Daru::MultiIndex do
112
+ before(:each) do
113
+ @index_tuples = [
114
+ [:a,:one,:bar],
115
+ [:a,:one,:baz],
116
+ [:a,:two,:bar],
117
+ [:a,:two,:baz],
118
+ [:b,:one,:bar],
119
+ [:b,:two,:bar],
120
+ [:b,:two,:baz],
121
+ [:b,:one,:foo],
122
+ [:c,:one,:bar],
123
+ [:c,:one,:baz],
124
+ [:c,:two,:foo],
125
+ [:c,:two,:bar]
126
+ ]
127
+ @multi_mi = Daru::MultiIndex.from_tuples(@index_tuples)
128
+ end
129
+
130
+ context ".initialize" do
131
+ it "accepts labels and levels as arguments" do
132
+ mi = Daru::MultiIndex.new(
133
+ levels: [[:a,:b,:c], [:one, :two]],
134
+ labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]])
135
+
136
+ expect(mi[:a, :two]).to eq(1)
137
+ end
138
+
139
+ it "raises error for wrong number of labels or levels" do
140
+ expect {
141
+ Daru::MultiIndex.new(
142
+ levels: [[:a,:a,:b,:b,:c,:c], [:one, :two]],
143
+ labels: [[0,0,1,1,2,2]])
144
+ }.to raise_error
145
+ end
146
+ end
147
+
148
+ context ".from_tuples" do
149
+ it "creates 2 layer MultiIndex from tuples" do
150
+ tuples = [
151
+ [:a, :one],
152
+ [:a, :two],
153
+ [:b, :one],
154
+ [:b, :two],
155
+ [:c, :one],
156
+ [:c, :two]
157
+ ]
158
+ mi = Daru::MultiIndex.from_tuples(tuples)
159
+ expect(mi.levels).to eq([[:a, :b, :c], [:one,:two]])
160
+ expect(mi.labels).to eq([[0,0,1,1,2,2], [0,1,0,1,0,1]])
161
+ end
162
+
163
+ it "creates a triple layer MultiIndex from tuples" do
164
+ expect(@multi_mi.levels).to eq([[:a,:b,:c], [:one, :two],[:bar,:baz,:foo]])
165
+ expect(@multi_mi.labels).to eq([
166
+ [0,0,0,0,1,1,1,1,2,2,2,2],
167
+ [0,0,1,1,0,1,1,0,0,0,1,1],
168
+ [0,1,0,1,0,0,1,2,0,1,2,0]
169
+ ])
170
+ end
171
+ end
172
+
173
+ context "#size" do
174
+ it "returns size of MultiIndex" do
175
+ expect(@multi_mi.size).to eq(12)
176
+ end
177
+ end
178
+
179
+ context "#[]" do
180
+ it "returns the row number when specifying the complete tuple" do
181
+ expect(@multi_mi[:a, :one, :baz]).to eq(1)
182
+ end
183
+
184
+ it "returns MultiIndex when specifying incomplete tuple" do
185
+ expect(@multi_mi[:b]).to eq(Daru::MultiIndex.from_tuples([
186
+ [:b,:one,:bar],
187
+ [:b,:two,:bar],
188
+ [:b,:two,:baz],
189
+ [:b,:one,:foo]
190
+ ]))
191
+ expect(@multi_mi[:b, :one]).to eq(Daru::MultiIndex.from_tuples([
192
+ [:b,:one,:bar],
193
+ [:b,:one,:foo]
194
+ ]))
195
+ # TODO: Return Daru::Index if a single layer of indexes is present.
196
+ end
197
+
198
+ it "returns MultiIndex when specifying wholly numeric ranges" do
199
+ expect(@multi_mi[3..6]).to eq(Daru::MultiIndex.from_tuples([
200
+ [:a,:two,:baz],
201
+ [:b,:one,:bar],
202
+ [:b,:two,:bar],
203
+ [:b,:two,:baz]
204
+ ]))
205
+ end
206
+
207
+ it "works with numerical first levels" do
208
+ mi = Daru::MultiIndex.from_tuples([
209
+ [2000, 'M'],
210
+ [2000, 'F'],
211
+ [2001, 'M'],
212
+ [2001, 'F']
213
+ ])
214
+
215
+ expect(mi[2000]).to eq(Daru::MultiIndex.from_tuples([
216
+ [2000, 'M'],
217
+ [2000, 'F']
218
+ ]))
219
+
220
+ expect(mi[2000,'M']).to eq(0)
221
+ end
222
+ end
223
+
224
+ context "#include?" do
225
+ it "checks if a completely specified tuple exists" do
226
+ expect(@multi_mi.include?([:a,:one,:bar])).to eq(true)
227
+ end
228
+
229
+ it "checks if a top layer incomplete tuple exists" do
230
+ expect(@multi_mi.include?([:a])).to eq(true)
231
+ end
232
+
233
+ it "checks if a middle layer incomplete tuple exists" do
234
+ expect(@multi_mi.include?([:a, :one])).to eq(true)
235
+ end
236
+
237
+ it "checks for non-existence of a tuple" do
238
+ expect(@multi_mi.include?([:boo])).to eq(false)
239
+ end
240
+ end
241
+
242
+ context "#key" do
243
+ it "returns the tuple of the specified number" do
244
+ expect(@multi_mi.key(3)).to eq([:a,:two,:baz])
245
+ end
246
+
247
+ it "returns nil for non-existent pointer number" do
248
+ expect {
249
+ @multi_mi.key(100)
250
+ }.to raise_error ArgumentError
251
+ end
252
+ end
253
+
254
+ context "#to_a" do
255
+ it "returns tuples as an Array" do
256
+ expect(@multi_mi.to_a).to eq(@index_tuples)
257
+ end
258
+ end
259
+
260
+ context "#dup" do
261
+ it "completely duplicates the object" do
262
+ duplicate = @multi_mi.dup
263
+ expect(duplicate) .to eq(@multi_mi)
264
+ expect(duplicate.object_id).to_not eq(@multi_mi.object_id)
265
+ end
266
+ end
267
+
268
+ context "#==" do
269
+ it "returns false for unequal MultiIndex comparisons" do
270
+ mi1 = Daru::MultiIndex.from_tuples([
271
+ [:a, :one, :bar],
272
+ [:a, :two, :baz],
273
+ [:b, :one, :foo],
274
+ [:b, :two, :bar]
275
+ ])
276
+ mi2 = Daru::MultiIndex.from_tuples([
277
+ [:a, :two, :bar],
278
+ [:b, :one, :foo],
279
+ [:a, :one, :baz],
280
+ [:b, :two, :baz]
281
+ ])
282
+
283
+ expect(mi1 == mi2).to eq(false)
284
+ end
285
+ end
286
+
287
+ context "#values" do
288
+ it "returns an array of indices in order" do
289
+ mi = Daru::MultiIndex.from_tuples([
290
+ [:a, :one, :bar],
291
+ [:a, :two, :baz],
292
+ [:b, :one, :foo],
293
+ [:b, :two, :bar]
294
+ ])
295
+
296
+ expect(mi.values).to eq([0,1,2,3])
297
+ end
298
+ end
299
+
300
+ context "#|" do
301
+ before do
302
+ @mi1 = Daru::MultiIndex.from_tuples([
303
+ [:a, :one, :bar],
304
+ [:a, :two, :baz],
305
+ [:b, :one, :foo],
306
+ [:b, :two, :bar]
307
+ ])
308
+ @mi2 = Daru::MultiIndex.from_tuples([
309
+ [:a, :two, :bar],
310
+ [:b, :one, :foo],
311
+ [:a, :one, :baz],
312
+ [:b, :two, :baz]
313
+ ])
314
+ end
315
+
316
+ it "returns a union of two MultiIndex objects" do
317
+ expect(@mi1 | @mi2).to eq(Daru::MultiIndex.new(
318
+ levels: [[:a, :b], [:one, :two], [:bar, :baz, :foo]],
319
+ labels: [
320
+ [0, 0, 1, 1, 0, 0, 1],
321
+ [0, 1, 0, 1, 1, 0, 1],
322
+ [0, 1, 2, 0, 0, 1, 1]
323
+ ])
324
+ )
325
+ end
326
+ end
327
+
328
+ context "#&" do
329
+ it "returns the intersection of two MI objects" do
330
+ end
331
+ end
332
+
333
+ context "#empty?" do
334
+ it "returns true if nothing present in MultiIndex" do
335
+ expect(Daru::MultiIndex.new(labels: [[]], levels: [[]]).empty?).to eq(true)
336
+ end
337
+ end
338
+
339
+ context "#drop_left_level" do
340
+ it "drops the leftmost level" do
341
+ expect(
342
+ Daru::MultiIndex.from_tuples([
343
+ [:c,:one,:bar],
344
+ [:c,:one,:baz],
345
+ [:c,:two,:foo],
346
+ [:c,:two,:bar]
347
+ ]).drop_left_level).to eq(
348
+ Daru::MultiIndex.from_tuples([
349
+ [:one,:bar],
350
+ [:one,:baz],
351
+ [:two,:foo],
352
+ [:two,:bar]
353
+ ])
354
+ )
49
355
  end
50
356
  end
51
- end
357
+ end
@@ -7,24 +7,25 @@ describe Daru::IO do
7
7
  df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
8
8
  col_sep: ' ', headers: true)
9
9
 
10
+ df.vectors = [:image_resolution, :mls, :true_transform].to_index
10
11
  expect(df.vectors).to eq([:image_resolution, :mls, :true_transform].to_index)
11
- expect(df.vector[:image_resolution].first).to eq(6.55779)
12
- expect(df.vector[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
12
+ expect(df[:image_resolution].first).to eq(6.55779)
13
+ expect(df[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
13
14
  end
14
15
 
15
- it "works properly for repeated headers", focus: true do
16
- df = Daru::DataFrame.from_csv('spec/fixtures/repeated_fields.csv')
17
- expect(df.vectors.to_a).to eq([:a1, :age_1, :age_2, :city, :id, :name_1, :name_2])
16
+ it "works properly for repeated headers" do
17
+ df = Daru::DataFrame.from_csv('spec/fixtures/repeated_fields.csv',header_converters: :symbol)
18
+ expect(df.vectors.to_a).to eq(['a1', 'age_1', 'age_2', 'city', 'id', 'name_1', 'name_2'])
18
19
 
19
20
  age = Daru::Vector.new([3, 4, 5, 6, nil, 8])
20
- expect(df[:age_2]).to eq(age)
21
+ expect(df['age_2']).to eq(age)
21
22
  end
22
23
 
23
24
  it "accepts scientific notation as float" do
24
25
  ds = Daru::DataFrame.from_csv('spec/fixtures/scientific_notation.csv')
25
- expect(ds.vectors.to_a).to eq([:x, :y])
26
+ expect(ds.vectors.to_a).to eq(['x', 'y'])
26
27
  y = [9.629587310436753e+127, 1.9341543147883677e+129, 3.88485279048245e+130]
27
- y.zip(ds[:y]).each do |y_expected, y_ds|
28
+ y.zip(ds['y']).each do |y_expected, y_ds|
28
29
  expect(y_ds).to be_within(0.001).of(y_expected)
29
30
  end
30
31
  end
@@ -33,10 +34,10 @@ describe Daru::IO do
33
34
  context "#write_csv" do
34
35
  it "writes DataFrame to a CSV file" do
35
36
  df = Daru::DataFrame.new({
36
- a: [1,2,3,4,5],
37
- b: [11,22,33,44,55],
38
- c: ['a', 'g', 4, 5,'addadf'],
39
- d: [nil, 23, 4,'a','ff']})
37
+ 'a' => [1,2,3,4,5],
38
+ 'b' => [11,22,33,44,55],
39
+ 'c' => ['a', 'g', 4, 5,'addadf'],
40
+ 'd' => [nil, 23, 4,'a','ff']})
40
41
  t = Tempfile.new('data.csv')
41
42
  df.write_csv t.path
42
43
 
@@ -111,11 +112,11 @@ describe Daru::IO do
111
112
  df = Daru::DataFrame.new JSON.parse(json)
112
113
 
113
114
  expect(df.vectors).to eq([
114
- :name, :nativeName, :tld, :cca2, :ccn3, :cca3, :currency, :callingCode,
115
- :capital, :altSpellings, :relevance, :region, :subregion, :language,
116
- :languageCodes, :translations, :latlng, :demonym, :borders, :area].to_index)
115
+ 'name', 'nativeName', 'tld', 'cca2', 'ccn3', 'cca3', 'currency', 'callingCode',
116
+ 'capital', 'altSpellings', 'relevance', 'region', 'subregion', 'language',
117
+ 'languageCodes', 'translations', 'latlng', 'demonym', 'borders', 'area'].to_index)
117
118
 
118
- expect(df.row[0][:name]).to eq("Afghanistan")
119
+ expect(df.row[0]['name']).to eq("Afghanistan")
119
120
  end
120
121
  end
121
122
 
@@ -169,7 +170,7 @@ describe Daru::IO do
169
170
  describe Daru::Index do
170
171
  context "Marshalling" do
171
172
  it "" do
172
- i = Daru::Index.new([:a, :b, :c, :d, :e], [8,6,4,3,5])
173
+ i = Daru::Index.new([:a, :b, :c, :d, :e])
173
174
  expect(Marshal.load(Marshal.dump(i))).to eq(i)
174
175
  end
175
176
  end
@@ -18,8 +18,8 @@ describe Daru::DataFrame do
18
18
 
19
19
  it "adds two dataframes to produce a third" do
20
20
  expect(@left + @right).to eq(Daru::DataFrame.new({
21
- a: [2,nil,nil,8,nil,nil,nil],
22
- b: [20,nil,nil,80,nil,nil,nil],
21
+ a: [2,nil,nil,8,nil,nil],
22
+ b: [20,nil,nil,80,nil,nil],
23
23
  c: [nil,nil,nil,nil,nil,nil]
24
24
  }, index: [0,1,2,3,4,5,6]))
25
25
  end
@@ -66,7 +66,7 @@ describe Daru::DataFrame do
66
66
  end
67
67
  end
68
68
 
69
- context "#round" do
69
+ context "#round", focus: true do
70
70
  it "rounds to precision" do
71
71
  df = Daru::DataFrame.new({
72
72
  a: [1.3434,2.4332,5.6655,12.3344,32.233],