daru_lite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -0,0 +1,250 @@
1
+ shared_examples_for 'a filterable DataFrame' do
2
+ describe '#uniq' do
3
+ let(:df) { DaruLite::DataFrame.from_csv 'spec/fixtures/duplicates.csv' }
4
+
5
+ context 'with no args' do
6
+ subject { df.uniq }
7
+
8
+ it 'returns the correct result' do
9
+ expect(subject.shape.first).to eq 30
10
+ end
11
+ end
12
+
13
+ context 'given a vector' do
14
+ subject { df.uniq('color') }
15
+
16
+ it 'returns the correct result' do
17
+ expect(subject.shape.first).to eq 2
18
+ end
19
+ end
20
+
21
+ context 'given an array of vectors' do
22
+ subject { df.uniq("color", "director_name") }
23
+
24
+ it 'returns the correct result' do
25
+ expect(subject.shape.first).to eq 29
26
+ end
27
+ end
28
+ end
29
+
30
+ describe "#filter" do
31
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }) }
32
+
33
+ context 'avis is row' do
34
+ subject { df.filter(:row) { |r| r[:a] % 2 == 0 } }
35
+
36
+ it { is_expected.to eq(df.filter_rows { |r| r[:a] % 2 == 0 }) }
37
+ end
38
+
39
+ context 'avis is vector' do
40
+ subject { df.filter(:vector) { |v| v[0] == 1 } }
41
+
42
+ it { is_expected.to eq(df.filter_vectors { |v| v[0] == 1 }) }
43
+ end
44
+
45
+ context 'avis is unknown' do
46
+ subject { df.filter(:kitten) {} }
47
+
48
+ it { expect { subject }.to raise_error ArgumentError, /axis/ }
49
+ end
50
+ end
51
+
52
+
53
+ describe '#reject_values' do
54
+ let(:df) do
55
+ DaruLite::DataFrame.new({
56
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
57
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
58
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
59
+ }, index: 11..18)
60
+ end
61
+ before { df.to_category :b }
62
+
63
+ context 'remove nils only' do
64
+ subject { df.reject_values nil }
65
+
66
+ it { is_expected.to be_a DaruLite::DataFrame }
67
+ its(:'b.type') { is_expected.to eq :category }
68
+ its(:'a.to_a') { is_expected.to eq [1, 2, 7] }
69
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 8] }
70
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 7] }
71
+ its(:'index.to_a') { is_expected.to eq [11, 12, 18] }
72
+ end
73
+
74
+ context 'remove Float::NAN only' do
75
+ subject { df.reject_values Float::NAN }
76
+
77
+ it { is_expected.to be_a DaruLite::DataFrame }
78
+ its(:'b.type') { is_expected.to eq :category }
79
+ its(:'a.to_a') { is_expected.to eq [1, 3, nil, 1, 7] }
80
+ its(:'b.to_a') { is_expected.to eq [:a, nil, 3, 5, 8] }
81
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 5, nil, 7] }
82
+ its(:'index.to_a') { is_expected.to eq [11, 13, 16, 17, 18] }
83
+ end
84
+
85
+ context 'remove both nil and Float::NAN' do
86
+ subject { df.reject_values nil, Float::NAN }
87
+
88
+ it { is_expected.to be_a DaruLite::DataFrame }
89
+ its(:'b.type') { is_expected.to eq :category }
90
+ its(:'a.to_a') { is_expected.to eq [1, 7] }
91
+ its(:'b.to_a') { is_expected.to eq [:a, 8] }
92
+ its(:'c.to_a') { is_expected.to eq ['a', 7] }
93
+ its(:'index.to_a') { is_expected.to eq [11, 18] }
94
+ end
95
+
96
+ context 'any other values' do
97
+ subject { df.reject_values 1, 5 }
98
+
99
+ it { is_expected.to be_a DaruLite::DataFrame }
100
+ its(:'b.type') { is_expected.to eq :category }
101
+ its(:'a.to_a') { is_expected.to eq [2, 3, nil, Float::NAN, 7] }
102
+ its(:'b.to_a') { is_expected.to eq [:b, nil, Float::NAN, nil, 8] }
103
+ its(:'c.to_a') { is_expected.to eq [Float::NAN, 3, 4, 3, 7] }
104
+ its(:'index.to_a') { is_expected.to eq [12, 13, 14, 15, 18] }
105
+ end
106
+
107
+ context 'when resultant dataframe has one row' do
108
+ subject { df.reject_values 1, 2, 3, 4, 5, nil, Float::NAN }
109
+
110
+ it { is_expected.to be_a DaruLite::DataFrame }
111
+ its(:'b.type') { is_expected.to eq :category }
112
+ its(:'a.to_a') { is_expected.to eq [7] }
113
+ its(:'b.to_a') { is_expected.to eq [8] }
114
+ its(:'c.to_a') { is_expected.to eq [7] }
115
+ its(:'index.to_a') { is_expected.to eq [18] }
116
+ end
117
+
118
+ context 'when resultant dataframe is empty' do
119
+ subject { df.reject_values 1, 2, 3, 4, 5, 6, 7, nil, Float::NAN }
120
+
121
+ it { is_expected.to be_a DaruLite::DataFrame }
122
+ its(:'b.type') { is_expected.to eq :category }
123
+ its(:'a.to_a') { is_expected.to eq [] }
124
+ its(:'b.to_a') { is_expected.to eq [] }
125
+ its(:'c.to_a') { is_expected.to eq [] }
126
+ its(:'index.to_a') { is_expected.to eq [] }
127
+ end
128
+ end
129
+
130
+ describe "#keep_row_if" do
131
+ subject { df.keep_row_if { |row| row[:a] % 10 == 0 } }
132
+
133
+ let(:index) { [:one, :two, :three, :four, :five] }
134
+ let(:order) { [:a, :b, :c] }
135
+ let(:df) do
136
+ DaruLite::DataFrame.new({
137
+ b: [10, 12, 20, 23, 30],
138
+ a: [50, 30, 30, 1, 5],
139
+ c: [10, 20, 30, 40, 50]
140
+ },
141
+ order:,
142
+ index:
143
+ )
144
+ end
145
+
146
+ context DaruLite::Index do
147
+ it "keeps row if block evaluates to true" do
148
+ subject
149
+ expect(df).to eq(
150
+ DaruLite::DataFrame.new(
151
+ { b: [10, 12, 20], a: [50, 30, 30], c: [10, 20, 30] },
152
+ order:,
153
+ index: index[..2]
154
+ )
155
+ )
156
+ end
157
+ end
158
+
159
+ context DaruLite::CategoricalIndex do
160
+ let (:index) { DaruLite::CategoricalIndex.new([:a, 1, 1, :a, :c]) }
161
+
162
+ it "keeps row if block evaluates to true" do
163
+ subject
164
+ expect(df).to eq(
165
+ DaruLite::DataFrame.new(
166
+ { b: [10, 12, 20], a: [50, 30, 30], c: [10, 20, 30] },
167
+ order:,
168
+ index: DaruLite::CategoricalIndex.new([:a, 1, 1])
169
+ )
170
+ )
171
+ end
172
+ end
173
+ end
174
+
175
+ describe "#keep_vector_if" do
176
+ it "keeps vector if block evaluates to true" do
177
+ df.keep_vector_if do |vector|
178
+ vector == [1,2,3,4,5].dv(nil, [:one, :two, :three, :four, :five])
179
+ end
180
+
181
+ expect(df).to eq(DaruLite::DataFrame.new({a: [1,2,3,4,5]}, order: [:a],
182
+ index: [:one, :two, :three, :four, :five]))
183
+ end
184
+ end
185
+
186
+ describe "#filter_vectors" do
187
+ context DaruLite::Index do
188
+ subject { df.filter_vectors { |vector| vector[0] == 1 } }
189
+
190
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }) }
191
+
192
+ it "filters vectors" do
193
+ expect(subject).to eq(DaruLite::DataFrame.new({a: [1,2,3]}))
194
+ end
195
+ end
196
+ end
197
+
198
+ describe "#filter_rows" do
199
+ context DaruLite::Index do
200
+ subject { df.filter_rows { |r| r[:a] != 2 } }
201
+
202
+ let(:df) { DaruLite::DataFrame.new a: 1..3, b: 4..6 }
203
+
204
+ it "preserves names of vectors" do
205
+ expect(subject[:a].name).to eq(df[:a].name)
206
+ end
207
+
208
+ context "when specified no index" do
209
+ subject { df.filter_rows { |row| row[:a] % 2 == 0 } }
210
+
211
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }) }
212
+
213
+ it "filters rows" do
214
+ expect(subject).to eq(DaruLite::DataFrame.new({ a: [2], b: [3] }, order: [:a, :b], index: [1]))
215
+ end
216
+ end
217
+
218
+ context "when specified numerical index" do
219
+ subject { df.filter_rows { |row| row[:a] % 2 == 0 } }
220
+
221
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3], b: [2,3,4] }, index: [1,2,3]) }
222
+
223
+ it "filters rows" do
224
+ expect(subject).to eq(DaruLite::DataFrame.new({ a: [2], b: [3] }, order: [:a, :b], index: [2]))
225
+ end
226
+ end
227
+ end
228
+ end
229
+
230
+ context "#filter_vector" do
231
+ subject { df.filter_vector(:id) { |c| c[:id] == 2 or c[:id] == 4 } }
232
+
233
+ let(:df) do
234
+ DaruLite::DataFrame.new(
235
+ {
236
+ id: DaruLite::Vector.new([1, 2, 3, 4, 5]),
237
+ name: DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
238
+ age: DaruLite::Vector.new([20, 23, 25, 27, 5]),
239
+ city: DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
240
+ a1: DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c'])
241
+ },
242
+ order: [:id, :name, :age, :city, :a1]
243
+ )
244
+ end
245
+
246
+ it "creates new vector with the data of a given field for which block returns true" do
247
+ expect(subject).to eq(DaruLite::Vector.new([2,4]))
248
+ end
249
+ end
250
+ end
@@ -0,0 +1,221 @@
1
+ shared_examples_for 'an indexable DataFrame' do
2
+ describe "#set_index" do
3
+ let(:df) do
4
+ DaruLite::DataFrame.new(
5
+ {
6
+ a: [1,2,3,4,5],
7
+ b: ['a','b','c','d','e'],
8
+ c: [11,22,33,44,55]
9
+ }
10
+ )
11
+ end
12
+
13
+ it "sets a particular column as the index and deletes that column" do
14
+ df.set_index(:b)
15
+ expect(df).to eq(
16
+ DaruLite::DataFrame.new({
17
+ a: [1,2,3,4,5],
18
+ c: [11,22,33,44,55]
19
+ }, index: ['a','b','c','d','e'])
20
+ )
21
+ end
22
+
23
+ it "sets a particular column as index but keeps that column" do
24
+ expect(df.set_index(:c, keep: true)).to eq(
25
+ DaruLite::DataFrame.new({
26
+ a: [1,2,3,4,5],
27
+ b: ['a','b','c','d','e'],
28
+ c: [11,22,33,44,55]
29
+ }, index: [11,22,33,44,55]))
30
+ expect(df[:c]).to eq(df[:c])
31
+ end
32
+
33
+ it "sets categorical index if categorical is true" do
34
+ data = {
35
+ a: [1, 2, 3, 4, 5],
36
+ b: [:a, 1, :a, 1, 'c'],
37
+ c: %w[a b c d e]
38
+ }
39
+ df = DaruLite::DataFrame.new(data)
40
+ df.set_index(:b, categorical: true)
41
+ expected = DaruLite::DataFrame.new(
42
+ data.slice(:a, :c),
43
+ index: DaruLite::CategoricalIndex.new(data[:b])
44
+ )
45
+ expect(df).to eq(expected)
46
+ end
47
+
48
+ it "raises error if all elements in the column aren't unique" do
49
+ jholu = DaruLite::DataFrame.new({
50
+ a: ['a','b','a'],
51
+ b: [1,2,4]
52
+ })
53
+
54
+ expect {
55
+ jholu.set_index(:a)
56
+ }.to raise_error(ArgumentError)
57
+ end
58
+
59
+ it "sets multiindex if array is given" do
60
+ df = DaruLite::DataFrame.new({
61
+ a: %w[a a b b],
62
+ b: [1, 2, 1, 2],
63
+ c: %w[a b c d]
64
+ })
65
+ df.set_index(%i[a b])
66
+ expected =
67
+ DaruLite::DataFrame.new(
68
+ { c: %w[a b c d] },
69
+ index: DaruLite::MultiIndex.from_tuples(
70
+ [['a', 1], ['a', 2], ['b', 1], ['b', 2]]
71
+ )
72
+ ).tap do |df|
73
+ df.index.name = %i[a b]
74
+ df
75
+ end
76
+ expect(df).to eq(expected)
77
+ end
78
+ end
79
+
80
+ describe "#reindex" do
81
+ subject { df.reindex(DaruLite::Index.new([1,3,0,8,2])) }
82
+
83
+ let(:df) do
84
+ DaruLite::DataFrame.new({
85
+ a: [1,2,3,4,5],
86
+ b: [11,22,33,44,55],
87
+ c: %w(a b c d e)
88
+ })
89
+ end
90
+
91
+ it "re indexes and aligns accordingly" do
92
+ expect(subject).to eq(
93
+ DaruLite::DataFrame.new(
94
+ {
95
+ a: [2,4,1,nil,3],
96
+ b: [22,44,11,nil,33],
97
+ c: ['b','d','a',nil,'c']
98
+ },
99
+ index: DaruLite::Index.new([1,3,0,8,2])
100
+ )
101
+ )
102
+ end
103
+
104
+ it { is_expected.to_not eq(df) }
105
+ end
106
+
107
+ describe '#reset_index' do
108
+ context 'when Index' do
109
+ subject do
110
+ DaruLite::DataFrame.new(
111
+ {'vals' => [1,2,3,4,5]},
112
+ index: DaruLite::Index.new(%w[a b c d e], name: 'indices')
113
+ ).reset_index
114
+ end
115
+
116
+ it { is_expected.to eq DaruLite::DataFrame.new(
117
+ 'indices' => %w[a b c d e],
118
+ 'vals' => [1,2,3,4,5]
119
+ )}
120
+ end
121
+
122
+ context 'when MultiIndex' do
123
+ subject do
124
+ mi = DaruLite::MultiIndex.from_tuples([
125
+ [0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
126
+ ])
127
+ mi.name = %w[nums alphas]
128
+ DaruLite::DataFrame.new(
129
+ {'vals' => [1,2,3,4]},
130
+ index: mi
131
+ ).reset_index
132
+ end
133
+
134
+ it { is_expected.to eq DaruLite::DataFrame.new(
135
+ 'nums' => [0,0,1,1],
136
+ 'alphas' => %w[a b a b],
137
+ 'vals' => [1,2,3,4]
138
+ )}
139
+ end
140
+ end
141
+
142
+ describe "#index=" do
143
+ let(:df) do
144
+ DaruLite::DataFrame.new({
145
+ a: [1,2,3,4,5],
146
+ b: [11,22,33,44,55],
147
+ c: %w(a b c d e)
148
+ })
149
+ end
150
+
151
+ it "simply reassigns the index" do
152
+ df.index = DaruLite::Index.new(['4','foo', :bar, 0, 23])
153
+ expect(df.row['foo']).to eq(DaruLite::Vector.new([2,22,'b'], index: [:a,:b,:c]))
154
+ end
155
+
156
+ it "raises error for improper length index" do
157
+ expect {
158
+ df.index = DaruLite::Index.new([1,2])
159
+ }.to raise_error(ArgumentError)
160
+ end
161
+
162
+ it "is able to accept array" do
163
+ df.index = (1..5).to_a
164
+ expect(df.index).to eq DaruLite::Index.new (1..5).to_a
165
+ end
166
+ end
167
+
168
+ describe "#reindex_vectors" do
169
+ it "re indexes vectors and aligns accordingly" do
170
+ df = DaruLite::DataFrame.new({
171
+ a: [1,2,3,4,5],
172
+ b: [11,22,33,44,55],
173
+ c: %w(a b c d e)
174
+ })
175
+
176
+ ans = df.reindex_vectors(DaruLite::Index.new([:b, 'a', :a]))
177
+ expect(ans).to eq(DaruLite::DataFrame.new({
178
+ :b => [11,22,33,44,55],
179
+ 'a' => [nil, nil, nil, nil, nil],
180
+ :a => [1,2,3,4,5]
181
+ }, order: [:b, 'a', :a]))
182
+ end
183
+
184
+ it 'raises ArgumentError if argument was not an index' do
185
+ df = DaruLite::DataFrame.new([])
186
+ expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
187
+ end
188
+ end
189
+
190
+ describe "#vectors=" do
191
+ let(:df) do
192
+ DaruLite::DataFrame.new({
193
+ a: [1,2,3,4,5],
194
+ b: [11,22,33,44,55],
195
+ c: %w(a b c d e)
196
+ })
197
+ end
198
+
199
+ it "simply reassigns vectors" do
200
+ df.vectors = DaruLite::Index.new(['b',0,'m'])
201
+
202
+ expect(df.vectors).to eq(DaruLite::Index.new(['b',0,'m']))
203
+ expect(df['b']).to eq(DaruLite::Vector.new([1,2,3,4,5]))
204
+ expect(df[0]).to eq(DaruLite::Vector.new([11,22,33,44,55]))
205
+ expect(df['m']).to eq(DaruLite::Vector.new(%w(a b c d e)))
206
+ end
207
+
208
+ it "raises error for improper length index" do
209
+ expect {
210
+ df.vectors = DaruLite::Index.new([1,2,'3',4,'5'])
211
+ }.to raise_error(ArgumentError)
212
+ end
213
+
214
+ it "change name of vectors in @data" do
215
+ new_index_array = [:k, :l, :m]
216
+ df.vectors = DaruLite::Index.new(new_index_array)
217
+
218
+ expect(df.data.map { |vector| vector.name }).to eq(new_index_array)
219
+ end
220
+ end
221
+ end