daru_lite 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -0,0 +1,250 @@
1
+ shared_examples_for 'a filterable DataFrame' do
2
+ describe '#uniq' do
3
+ let(:df) { DaruLite::DataFrame.from_csv 'spec/fixtures/duplicates.csv' }
4
+
5
+ context 'with no args' do
6
+ subject { df.uniq }
7
+
8
+ it 'returns the correct result' do
9
+ expect(subject.shape.first).to eq 30
10
+ end
11
+ end
12
+
13
+ context 'given a vector' do
14
+ subject { df.uniq('color') }
15
+
16
+ it 'returns the correct result' do
17
+ expect(subject.shape.first).to eq 2
18
+ end
19
+ end
20
+
21
+ context 'given an array of vectors' do
22
+ subject { df.uniq("color", "director_name") }
23
+
24
+ it 'returns the correct result' do
25
+ expect(subject.shape.first).to eq 29
26
+ end
27
+ end
28
+ end
29
+
30
+ describe "#filter" do
31
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }) }
32
+
33
+ context 'avis is row' do
34
+ subject { df.filter(:row) { |r| r[:a] % 2 == 0 } }
35
+
36
+ it { is_expected.to eq(df.filter_rows { |r| r[:a] % 2 == 0 }) }
37
+ end
38
+
39
+ context 'avis is vector' do
40
+ subject { df.filter(:vector) { |v| v[0] == 1 } }
41
+
42
+ it { is_expected.to eq(df.filter_vectors { |v| v[0] == 1 }) }
43
+ end
44
+
45
+ context 'avis is unknown' do
46
+ subject { df.filter(:kitten) {} }
47
+
48
+ it { expect { subject }.to raise_error ArgumentError, /axis/ }
49
+ end
50
+ end
51
+
52
+
53
+ describe '#reject_values' do
54
+ let(:df) do
55
+ DaruLite::DataFrame.new({
56
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
57
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
58
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
59
+ }, index: 11..18)
60
+ end
61
+ before { df.to_category :b }
62
+
63
+ context 'remove nils only' do
64
+ subject { df.reject_values nil }
65
+
66
+ it { is_expected.to be_a DaruLite::DataFrame }
67
+ its(:'b.type') { is_expected.to eq :category }
68
+ its(:'a.to_a') { is_expected.to eq [1, 2, 7] }
69
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 8] }
70
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 7] }
71
+ its(:'index.to_a') { is_expected.to eq [11, 12, 18] }
72
+ end
73
+
74
+ context 'remove Float::NAN only' do
75
+ subject { df.reject_values Float::NAN }
76
+
77
+ it { is_expected.to be_a DaruLite::DataFrame }
78
+ its(:'b.type') { is_expected.to eq :category }
79
+ its(:'a.to_a') { is_expected.to eq [1, 3, nil, 1, 7] }
80
+ its(:'b.to_a') { is_expected.to eq [:a, nil, 3, 5, 8] }
81
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 5, nil, 7] }
82
+ its(:'index.to_a') { is_expected.to eq [11, 13, 16, 17, 18] }
83
+ end
84
+
85
+ context 'remove both nil and Float::NAN' do
86
+ subject { df.reject_values nil, Float::NAN }
87
+
88
+ it { is_expected.to be_a DaruLite::DataFrame }
89
+ its(:'b.type') { is_expected.to eq :category }
90
+ its(:'a.to_a') { is_expected.to eq [1, 7] }
91
+ its(:'b.to_a') { is_expected.to eq [:a, 8] }
92
+ its(:'c.to_a') { is_expected.to eq ['a', 7] }
93
+ its(:'index.to_a') { is_expected.to eq [11, 18] }
94
+ end
95
+
96
+ context 'any other values' do
97
+ subject { df.reject_values 1, 5 }
98
+
99
+ it { is_expected.to be_a DaruLite::DataFrame }
100
+ its(:'b.type') { is_expected.to eq :category }
101
+ its(:'a.to_a') { is_expected.to eq [2, 3, nil, Float::NAN, 7] }
102
+ its(:'b.to_a') { is_expected.to eq [:b, nil, Float::NAN, nil, 8] }
103
+ its(:'c.to_a') { is_expected.to eq [Float::NAN, 3, 4, 3, 7] }
104
+ its(:'index.to_a') { is_expected.to eq [12, 13, 14, 15, 18] }
105
+ end
106
+
107
+ context 'when resultant dataframe has one row' do
108
+ subject { df.reject_values 1, 2, 3, 4, 5, nil, Float::NAN }
109
+
110
+ it { is_expected.to be_a DaruLite::DataFrame }
111
+ its(:'b.type') { is_expected.to eq :category }
112
+ its(:'a.to_a') { is_expected.to eq [7] }
113
+ its(:'b.to_a') { is_expected.to eq [8] }
114
+ its(:'c.to_a') { is_expected.to eq [7] }
115
+ its(:'index.to_a') { is_expected.to eq [18] }
116
+ end
117
+
118
+ context 'when resultant dataframe is empty' do
119
+ subject { df.reject_values 1, 2, 3, 4, 5, 6, 7, nil, Float::NAN }
120
+
121
+ it { is_expected.to be_a DaruLite::DataFrame }
122
+ its(:'b.type') { is_expected.to eq :category }
123
+ its(:'a.to_a') { is_expected.to eq [] }
124
+ its(:'b.to_a') { is_expected.to eq [] }
125
+ its(:'c.to_a') { is_expected.to eq [] }
126
+ its(:'index.to_a') { is_expected.to eq [] }
127
+ end
128
+ end
129
+
130
+ describe "#keep_row_if" do
131
+ subject { df.keep_row_if { |row| row[:a] % 10 == 0 } }
132
+
133
+ let(:index) { [:one, :two, :three, :four, :five] }
134
+ let(:order) { [:a, :b, :c] }
135
+ let(:df) do
136
+ DaruLite::DataFrame.new({
137
+ b: [10, 12, 20, 23, 30],
138
+ a: [50, 30, 30, 1, 5],
139
+ c: [10, 20, 30, 40, 50]
140
+ },
141
+ order:,
142
+ index:
143
+ )
144
+ end
145
+
146
+ context DaruLite::Index do
147
+ it "keeps row if block evaluates to true" do
148
+ subject
149
+ expect(df).to eq(
150
+ DaruLite::DataFrame.new(
151
+ { b: [10, 12, 20], a: [50, 30, 30], c: [10, 20, 30] },
152
+ order:,
153
+ index: index[..2]
154
+ )
155
+ )
156
+ end
157
+ end
158
+
159
+ context DaruLite::CategoricalIndex do
160
+ let (:index) { DaruLite::CategoricalIndex.new([:a, 1, 1, :a, :c]) }
161
+
162
+ it "keeps row if block evaluates to true" do
163
+ subject
164
+ expect(df).to eq(
165
+ DaruLite::DataFrame.new(
166
+ { b: [10, 12, 20], a: [50, 30, 30], c: [10, 20, 30] },
167
+ order:,
168
+ index: DaruLite::CategoricalIndex.new([:a, 1, 1])
169
+ )
170
+ )
171
+ end
172
+ end
173
+ end
174
+
175
+ describe "#keep_vector_if" do
176
+ it "keeps vector if block evaluates to true" do
177
+ df.keep_vector_if do |vector|
178
+ vector == [1,2,3,4,5].dv(nil, [:one, :two, :three, :four, :five])
179
+ end
180
+
181
+ expect(df).to eq(DaruLite::DataFrame.new({a: [1,2,3,4,5]}, order: [:a],
182
+ index: [:one, :two, :three, :four, :five]))
183
+ end
184
+ end
185
+
186
+ describe "#filter_vectors" do
187
+ context DaruLite::Index do
188
+ subject { df.filter_vectors { |vector| vector[0] == 1 } }
189
+
190
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }) }
191
+
192
+ it "filters vectors" do
193
+ expect(subject).to eq(DaruLite::DataFrame.new({a: [1,2,3]}))
194
+ end
195
+ end
196
+ end
197
+
198
+ describe "#filter_rows" do
199
+ context DaruLite::Index do
200
+ subject { df.filter_rows { |r| r[:a] != 2 } }
201
+
202
+ let(:df) { DaruLite::DataFrame.new a: 1..3, b: 4..6 }
203
+
204
+ it "preserves names of vectors" do
205
+ expect(subject[:a].name).to eq(df[:a].name)
206
+ end
207
+
208
+ context "when specified no index" do
209
+ subject { df.filter_rows { |row| row[:a] % 2 == 0 } }
210
+
211
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }) }
212
+
213
+ it "filters rows" do
214
+ expect(subject).to eq(DaruLite::DataFrame.new({ a: [2], b: [3] }, order: [:a, :b], index: [1]))
215
+ end
216
+ end
217
+
218
+ context "when specified numerical index" do
219
+ subject { df.filter_rows { |row| row[:a] % 2 == 0 } }
220
+
221
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }, index: [1,2,3]) }
222
+
223
+ it "filters rows" do
224
+ expect(subject).to eq(DaruLite::DataFrame.new({ a: [2], b: [3] }, order: [:a, :b], index: [2]))
225
+ end
226
+ end
227
+ end
228
+ end
229
+
230
+ context "#filter_vector" do
231
+ subject { df.filter_vector(:id) { |c| c[:id] == 2 or c[:id] == 4 } }
232
+
233
+ let(:df) do
234
+ DaruLite::DataFrame.new(
235
+ {
236
+ id: DaruLite::Vector.new([1, 2, 3, 4, 5]),
237
+ name: DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
238
+ age: DaruLite::Vector.new([20, 23, 25, 27, 5]),
239
+ city: DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
240
+ a1: DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c'])
241
+ },
242
+ order: [:id, :name, :age, :city, :a1]
243
+ )
244
+ end
245
+
246
+ it "creates new vector with the data of a given field for which block returns true" do
247
+ expect(subject).to eq(DaruLite::Vector.new([2,4]))
248
+ end
249
+ end
250
+ end
@@ -0,0 +1,221 @@
1
+ shared_examples_for 'an indexable DataFrame' do
2
+ describe "#set_index" do
3
+ let(:df) do
4
+ DaruLite::DataFrame.new(
5
+ {
6
+ a: [1,2,3,4,5],
7
+ b: ['a','b','c','d','e'],
8
+ c: [11,22,33,44,55]
9
+ }
10
+ )
11
+ end
12
+
13
+ it "sets a particular column as the index and deletes that column" do
14
+ df.set_index(:b)
15
+ expect(df).to eq(
16
+ DaruLite::DataFrame.new({
17
+ a: [1,2,3,4,5],
18
+ c: [11,22,33,44,55]
19
+ }, index: ['a','b','c','d','e'])
20
+ )
21
+ end
22
+
23
+ it "sets a particular column as index but keeps that column" do
24
+ expect(df.set_index(:c, keep: true)).to eq(
25
+ DaruLite::DataFrame.new({
26
+ a: [1,2,3,4,5],
27
+ b: ['a','b','c','d','e'],
28
+ c: [11,22,33,44,55]
29
+ }, index: [11,22,33,44,55]))
30
+ expect(df[:c]).to eq(df[:c])
31
+ end
32
+
33
+ it "sets categorical index if categorical is true" do
34
+ data = {
35
+ a: [1, 2, 3, 4, 5],
36
+ b: [:a, 1, :a, 1, 'c'],
37
+ c: %w[a b c d e]
38
+ }
39
+ df = DaruLite::DataFrame.new(data)
40
+ df.set_index(:b, categorical: true)
41
+ expected = DaruLite::DataFrame.new(
42
+ data.slice(:a, :c),
43
+ index: DaruLite::CategoricalIndex.new(data[:b])
44
+ )
45
+ expect(df).to eq(expected)
46
+ end
47
+
48
+ it "raises error if all elements in the column aren't unique" do
49
+ jholu = DaruLite::DataFrame.new({
50
+ a: ['a','b','a'],
51
+ b: [1,2,4]
52
+ })
53
+
54
+ expect {
55
+ jholu.set_index(:a)
56
+ }.to raise_error(ArgumentError)
57
+ end
58
+
59
+ it "sets multiindex if array is given" do
60
+ df = DaruLite::DataFrame.new({
61
+ a: %w[a a b b],
62
+ b: [1, 2, 1, 2],
63
+ c: %w[a b c d]
64
+ })
65
+ df.set_index(%i[a b])
66
+ expected =
67
+ DaruLite::DataFrame.new(
68
+ { c: %w[a b c d] },
69
+ index: DaruLite::MultiIndex.from_tuples(
70
+ [['a', 1], ['a', 2], ['b', 1], ['b', 2]]
71
+ )
72
+ ).tap do |df|
73
+ df.index.name = %i[a b]
74
+ df
75
+ end
76
+ expect(df).to eq(expected)
77
+ end
78
+ end
79
+
80
+ describe "#reindex" do
81
+ subject { df.reindex(DaruLite::Index.new([1,3,0,8,2])) }
82
+
83
+ let(:df) do
84
+ DaruLite::DataFrame.new({
85
+ a: [1,2,3,4,5],
86
+ b: [11,22,33,44,55],
87
+ c: %w(a b c d e)
88
+ })
89
+ end
90
+
91
+ it "re indexes and aligns accordingly" do
92
+ expect(subject).to eq(
93
+ DaruLite::DataFrame.new(
94
+ {
95
+ a: [2,4,1,nil,3],
96
+ b: [22,44,11,nil,33],
97
+ c: ['b','d','a',nil,'c']
98
+ },
99
+ index: DaruLite::Index.new([1,3,0,8,2])
100
+ )
101
+ )
102
+ end
103
+
104
+ it { is_expected.to_not eq(df) }
105
+ end
106
+
107
+ describe '#reset_index' do
108
+ context 'when Index' do
109
+ subject do
110
+ DaruLite::DataFrame.new(
111
+ {'vals' => [1,2,3,4,5]},
112
+ index: DaruLite::Index.new(%w[a b c d e], name: 'indices')
113
+ ).reset_index
114
+ end
115
+
116
+ it { is_expected.to eq DaruLite::DataFrame.new(
117
+ 'indices' => %w[a b c d e],
118
+ 'vals' => [1,2,3,4,5]
119
+ )}
120
+ end
121
+
122
+ context 'when MultiIndex' do
123
+ subject do
124
+ mi = DaruLite::MultiIndex.from_tuples([
125
+ [0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
126
+ ])
127
+ mi.name = %w[nums alphas]
128
+ DaruLite::DataFrame.new(
129
+ {'vals' => [1,2,3,4]},
130
+ index: mi
131
+ ).reset_index
132
+ end
133
+
134
+ it { is_expected.to eq DaruLite::DataFrame.new(
135
+ 'nums' => [0,0,1,1],
136
+ 'alphas' => %w[a b a b],
137
+ 'vals' => [1,2,3,4]
138
+ )}
139
+ end
140
+ end
141
+
142
+ describe "#index=" do
143
+ let(:df) do
144
+ DaruLite::DataFrame.new({
145
+ a: [1,2,3,4,5],
146
+ b: [11,22,33,44,55],
147
+ c: %w(a b c d e)
148
+ })
149
+ end
150
+
151
+ it "simply reassigns the index" do
152
+ df.index = DaruLite::Index.new(['4','foo', :bar, 0, 23])
153
+ expect(df.row['foo']).to eq(DaruLite::Vector.new([2,22,'b'], index: [:a,:b,:c]))
154
+ end
155
+
156
+ it "raises error for improper length index" do
157
+ expect {
158
+ df.index = DaruLite::Index.new([1,2])
159
+ }.to raise_error(ArgumentError)
160
+ end
161
+
162
+ it "is able to accept array" do
163
+ df.index = (1..5).to_a
164
+ expect(df.index).to eq DaruLite::Index.new (1..5).to_a
165
+ end
166
+ end
167
+
168
+ describe "#reindex_vectors" do
169
+ it "re indexes vectors and aligns accordingly" do
170
+ df = DaruLite::DataFrame.new({
171
+ a: [1,2,3,4,5],
172
+ b: [11,22,33,44,55],
173
+ c: %w(a b c d e)
174
+ })
175
+
176
+ ans = df.reindex_vectors(DaruLite::Index.new([:b, 'a', :a]))
177
+ expect(ans).to eq(DaruLite::DataFrame.new({
178
+ :b => [11,22,33,44,55],
179
+ 'a' => [nil, nil, nil, nil, nil],
180
+ :a => [1,2,3,4,5]
181
+ }, order: [:b, 'a', :a]))
182
+ end
183
+
184
+ it 'raises ArgumentError if argument was not an index' do
185
+ df = DaruLite::DataFrame.new([])
186
+ expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
187
+ end
188
+ end
189
+
190
+ describe "#vectors=" do
191
+ let(:df) do
192
+ DaruLite::DataFrame.new({
193
+ a: [1,2,3,4,5],
194
+ b: [11,22,33,44,55],
195
+ c: %w(a b c d e)
196
+ })
197
+ end
198
+
199
+ it "simply reassigns vectors" do
200
+ df.vectors = DaruLite::Index.new(['b',0,'m'])
201
+
202
+ expect(df.vectors).to eq(DaruLite::Index.new(['b',0,'m']))
203
+ expect(df['b']).to eq(DaruLite::Vector.new([1,2,3,4,5]))
204
+ expect(df[0]).to eq(DaruLite::Vector.new([11,22,33,44,55]))
205
+ expect(df['m']).to eq(DaruLite::Vector.new(%w(a b c d e)))
206
+ end
207
+
208
+ it "raises error for improper length index" do
209
+ expect {
210
+ df.vectors = DaruLite::Index.new([1,2,'3',4,'5'])
211
+ }.to raise_error(ArgumentError)
212
+ end
213
+
214
+ it "change name of vectors in @data" do
215
+ new_index_array = [:k, :l, :m]
216
+ df.vectors = DaruLite::Index.new(new_index_array)
217
+
218
+ expect(df.data.map { |vector| vector.name }).to eq(new_index_array)
219
+ end
220
+ end
221
+ end