daru_lite 0.2.0 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +0 -20
- data/.rubocop_todo.yml +1 -1
- data/README.md +0 -2
- data/lib/daru_lite/data_frame/fetchable.rb +1 -1
- data/lib/daru_lite/data_frame/joinable.rb +4 -1
- data/lib/daru_lite/index/index.rb +1 -1
- data/lib/daru_lite/index/multi_index.rb +26 -15
- data/lib/daru_lite/vector/indexable.rb +1 -1
- data/lib/daru_lite/vector/missable.rb +1 -1
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/fetchable_example.rb +5 -0
- data/spec/data_frame/joinable_example.rb +61 -36
- data/spec/index/multi_index_spec.rb +24 -0
- metadata +7 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 027d1747a070b7555d839ffc99bf4ad075c1d2e88551f2c77cb97c4cfc49a5d9
|
|
4
|
+
data.tar.gz: 196e960c448e230c18b337017426d6a6a06ac6e2e06e658f2f82cddb46064ec9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 42131c6e6fce7e99c5791e58c9a2ba94460e67c60f1c37ab9827ff85990f0f10ecb7de1972f47ce91429289de994c7b44283f07d07689e0168ba86834ae78ef9
|
|
7
|
+
data.tar.gz: 1f8f4c6c51a19ad5c1f8b8c1612632c8f23e600f27493183c2055522e55149719cd352b797fd6feb6a2adcc9fd29570618f2723ae833b8afc536820fc83f8835
|
data/.github/workflows/ci.yml
CHANGED
|
@@ -1,15 +1,6 @@
|
|
|
1
1
|
name: CI
|
|
2
2
|
on: [push]
|
|
3
3
|
|
|
4
|
-
env:
|
|
5
|
-
CC_TEST_REPORTER_ID: ${{secrets.CC_TEST_REPORTER_ID}}
|
|
6
|
-
# `github.ref` points to the *merge commit* when running tests on a pull request, which will be a commit
|
|
7
|
-
# that doesn't exists in our code base. Since this workflow triggers from a PR, we use the HEAD SHA instead.
|
|
8
|
-
#
|
|
9
|
-
# NOTE: These are both used by Code Climate (cc-test-reporter).
|
|
10
|
-
GIT_COMMIT_SHA: ${{github.event.pull_request.head.sha}}
|
|
11
|
-
GIT_BRANCH: ${{github.head_ref}}
|
|
12
|
-
|
|
13
4
|
jobs:
|
|
14
5
|
lint:
|
|
15
6
|
runs-on: ubuntu-latest
|
|
@@ -38,16 +29,5 @@ jobs:
|
|
|
38
29
|
with:
|
|
39
30
|
ruby-version: ${{ matrix.ruby-version }}
|
|
40
31
|
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
|
41
|
-
- name: "Download cc-test-reporter from codeclimate.com"
|
|
42
|
-
run: |
|
|
43
|
-
curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
|
|
44
|
-
chmod +x ./cc-test-reporter
|
|
45
|
-
- name: "Report to Code Climate that we will send a coverage report."
|
|
46
|
-
run: ./cc-test-reporter before-build
|
|
47
32
|
- name: Run tests
|
|
48
33
|
run: bundle exec rspec
|
|
49
|
-
- name: Upload code coverage to Code Climate
|
|
50
|
-
run: |
|
|
51
|
-
./cc-test-reporter after-build \
|
|
52
|
-
--coverage-input-type simplecov \
|
|
53
|
-
./coverage/.resultset.json
|
data/.rubocop_todo.yml
CHANGED
|
@@ -80,7 +80,7 @@ Naming/MethodParameterName:
|
|
|
80
80
|
# ForbiddenPrefixes: is_, has_, have_
|
|
81
81
|
# AllowedMethods: is_a?
|
|
82
82
|
# MethodDefinitionMacros: define_method, define_singleton_method
|
|
83
|
-
Naming/
|
|
83
|
+
Naming/PredicatePrefix:
|
|
84
84
|
Exclude:
|
|
85
85
|
- 'spec/**/*'
|
|
86
86
|
- 'lib/daru_lite/data_frame/missable.rb'
|
data/README.md
CHANGED
|
@@ -4,8 +4,6 @@ Simple, straightforward DataFrames for Ruby
|
|
|
4
4
|
|
|
5
5
|
[](https://github.com/pollandroll/daru_lite/actions)
|
|
6
6
|
[](https://rubygems.org/gems/daru_lite)
|
|
7
|
-
[](https://codeclimate.com/github/pollandroll/daru_lite/maintainability)
|
|
8
|
-
[](https://codeclimate.com/github/pollandroll/daru_lite/test_coverage)
|
|
9
7
|
|
|
10
8
|
## Introduction
|
|
11
9
|
|
|
@@ -22,7 +22,10 @@ module DaruLite
|
|
|
22
22
|
df = row[*(@index.to_a - other_df.index.to_a)]
|
|
23
23
|
|
|
24
24
|
df = df.concat(other_df)
|
|
25
|
-
df.index =
|
|
25
|
+
df.index = @index.class.public_send(
|
|
26
|
+
@index.is_a?(DaruLite::MultiIndex) ? :from_tuples : :new,
|
|
27
|
+
index
|
|
28
|
+
)
|
|
26
29
|
df
|
|
27
30
|
end
|
|
28
31
|
|
|
@@ -229,7 +229,7 @@ module DaruLite
|
|
|
229
229
|
# # 2 false
|
|
230
230
|
# # 3 false
|
|
231
231
|
# # 4 true
|
|
232
|
-
def is_values(*indexes) # rubocop:disable Naming/
|
|
232
|
+
def is_values(*indexes) # rubocop:disable Naming/PredicatePrefix
|
|
233
233
|
bool_array = @keys.map { |r| indexes.include?(r) }
|
|
234
234
|
DaruLite::Vector.new(bool_array)
|
|
235
235
|
end
|
|
@@ -122,16 +122,16 @@ module DaruLite
|
|
|
122
122
|
end
|
|
123
123
|
|
|
124
124
|
def [](*key)
|
|
125
|
-
key.
|
|
126
|
-
|
|
127
|
-
retrieve_from_range(key[0])
|
|
128
|
-
elsif key[0].is_a?(Integer) && key.size == 1
|
|
129
|
-
try_retrieve_from_integer(key[0])
|
|
125
|
+
if key.all? { |subkey| subkey.is_a?(Array) && subkey.length > 1 }
|
|
126
|
+
retrieve_from_tuples(*key)
|
|
130
127
|
else
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
128
|
+
key.flatten!
|
|
129
|
+
if key[0].is_a?(Range)
|
|
130
|
+
retrieve_from_range(key[0])
|
|
131
|
+
elsif key[0].is_a?(Integer) && key.size == 1
|
|
132
|
+
try_retrieve_from_integer(key[0])
|
|
133
|
+
else
|
|
134
|
+
retrieve_from_tuple(key)
|
|
135
135
|
end
|
|
136
136
|
end
|
|
137
137
|
end
|
|
@@ -158,15 +158,19 @@ module DaruLite
|
|
|
158
158
|
|
|
159
159
|
return indexes
|
|
160
160
|
end
|
|
161
|
-
res = self[indexes]
|
|
161
|
+
res = self[*indexes]
|
|
162
162
|
return res if res.is_a? Integer
|
|
163
163
|
|
|
164
164
|
res.map { |i| self[i] }
|
|
165
165
|
end
|
|
166
166
|
|
|
167
167
|
def subset(*indexes)
|
|
168
|
-
|
|
168
|
+
first_index = indexes.first
|
|
169
|
+
if first_index.is_a? Integer
|
|
169
170
|
MultiIndex.from_tuples(indexes.map { |index| key(index) })
|
|
171
|
+
elsif first_index.is_a?(Array) && include?(first_index)
|
|
172
|
+
# Same logic as in DaruLite::Index#subset
|
|
173
|
+
MultiIndex.from_tuples indexes
|
|
170
174
|
else
|
|
171
175
|
self[indexes].conform indexes
|
|
172
176
|
end
|
|
@@ -211,14 +215,19 @@ module DaruLite
|
|
|
211
215
|
end
|
|
212
216
|
|
|
213
217
|
def try_retrieve_from_integer(int)
|
|
214
|
-
@levels[0].key?(int) ?
|
|
218
|
+
@levels[0].key?(int) ? retrieve_from_tuple([int]) : int
|
|
215
219
|
end
|
|
216
220
|
|
|
217
221
|
def retrieve_from_range(range)
|
|
218
222
|
MultiIndex.from_tuples(range.map { |index| key(index) })
|
|
219
223
|
end
|
|
220
224
|
|
|
221
|
-
def retrieve_from_tuples(
|
|
225
|
+
def retrieve_from_tuples(*keys)
|
|
226
|
+
collection = keys.map { |key| retrieve_from_tuple(key) }
|
|
227
|
+
collection.one? ? collection.first : collection
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def retrieve_from_tuple(key)
|
|
222
231
|
chosen = []
|
|
223
232
|
|
|
224
233
|
key.each_with_index do |k, depth|
|
|
@@ -232,6 +241,8 @@ module DaruLite
|
|
|
232
241
|
return chosen[0] if chosen.size == 1 && key.size == @levels.size
|
|
233
242
|
|
|
234
243
|
multi_index_from_multiple_selections(chosen)
|
|
244
|
+
rescue NoMethodError
|
|
245
|
+
raise IndexError, "Specified index #{key.inspect} do not exist"
|
|
235
246
|
end
|
|
236
247
|
|
|
237
248
|
def multi_index_from_multiple_selections(chosen)
|
|
@@ -281,8 +292,8 @@ module DaruLite
|
|
|
281
292
|
raise SizeError, [error_msg, suggestion_msg].join("\n") if names.size < levels.size
|
|
282
293
|
end
|
|
283
294
|
|
|
284
|
-
private :find_all_indexes, :multi_index_from_multiple_selections,
|
|
285
|
-
:
|
|
295
|
+
private :find_all_indexes, :multi_index_from_multiple_selections, :retrieve_from_range,
|
|
296
|
+
:retrieve_from_tuples, :retrieve_from_tuple, :validate_name
|
|
286
297
|
|
|
287
298
|
def key(index)
|
|
288
299
|
raise ArgumentError, "Key #{index} is too large" if index >= @labels[0].size
|
|
@@ -4,7 +4,7 @@ module DaruLite
|
|
|
4
4
|
extend Gem::Deprecate
|
|
5
5
|
|
|
6
6
|
# Reports whether missing data is present in the Vector.
|
|
7
|
-
def has_missing_data? # rubocop:disable Naming/
|
|
7
|
+
def has_missing_data? # rubocop:disable Naming/PredicatePrefix
|
|
8
8
|
!indexes(*DaruLite::MISSING_VALUES).empty?
|
|
9
9
|
end
|
|
10
10
|
alias flawed? has_missing_data?
|
data/lib/daru_lite/version.rb
CHANGED
|
@@ -58,6 +58,11 @@ shared_examples_for 'a fetchable DataFrame' do
|
|
|
58
58
|
DaruLite::Vector.new(vector_arry1, index: multi_index))
|
|
59
59
|
end
|
|
60
60
|
|
|
61
|
+
it "returns a vector when specifying last tuple part as an array" do
|
|
62
|
+
expect(df_mi[:a, :one, [:bar]]).to eq(
|
|
63
|
+
DaruLite::Vector.new(vector_arry1, index: multi_index))
|
|
64
|
+
end
|
|
65
|
+
|
|
61
66
|
it "returns DataFrame when specified first layer of MultiIndex" do
|
|
62
67
|
sub_order = DaruLite::MultiIndex.from_tuples([
|
|
63
68
|
[:one, :bar],
|
|
@@ -41,66 +41,91 @@ shared_examples_for 'a joinable DataFrame' do
|
|
|
41
41
|
end
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
describe "#union" do
|
|
45
45
|
let(:df1) do
|
|
46
46
|
DaruLite::DataFrame.new({
|
|
47
47
|
a: [1, 2, 3],
|
|
48
|
-
b: [1, 2, 3]}
|
|
49
|
-
index: [1,3,5]
|
|
48
|
+
b: [1, 2, 3]}
|
|
50
49
|
)
|
|
51
50
|
end
|
|
52
51
|
let(:df2) do
|
|
53
52
|
DaruLite::DataFrame.new({
|
|
54
53
|
a: [4, 5, 6],
|
|
55
|
-
c: [4, 5, 6]}
|
|
56
|
-
index: [7,9,11]
|
|
54
|
+
c: [4, 5, 6]}
|
|
57
55
|
)
|
|
58
56
|
end
|
|
59
57
|
let(:df3) do
|
|
60
58
|
DaruLite::DataFrame.new({
|
|
61
59
|
a: [4, 5, 6],
|
|
62
|
-
c: [4, 5, 6]}
|
|
63
|
-
index: [5,7,9]
|
|
60
|
+
c: [4, 5, 6]}
|
|
64
61
|
)
|
|
65
62
|
end
|
|
66
63
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
64
|
+
shared_examples_for '#union' do
|
|
65
|
+
it 'does not modify the original dataframes' do
|
|
66
|
+
df1_a = df1[:a].to_a.dup
|
|
67
|
+
df2_a = df2[:a].to_a.dup
|
|
68
|
+
|
|
69
|
+
_ = df1.union df2
|
|
70
|
+
expect(df1[:a].to_a).to eq df1_a
|
|
71
|
+
expect(df2[:a].to_a).to eq df2_a
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
|
|
75
|
+
df1_a = df1[:a].to_a.dup
|
|
76
|
+
df2_a = df2[:a].to_a.dup
|
|
77
|
+
|
|
78
|
+
df_union = df1.union df2
|
|
79
|
+
expect(df_union[:a].to_a).to eq df1_a + df2_a
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it 'fills in missing vectors with nils' do
|
|
83
|
+
df1_b = df1[:b].to_a.dup
|
|
84
|
+
df2_c = df2[:c].to_a.dup
|
|
85
|
+
|
|
86
|
+
df_union = df1.union df2
|
|
87
|
+
expect(df_union[:b].to_a).to eq df1_b + [nil] * df2.size
|
|
88
|
+
expect(df_union[:c].to_a).to eq [nil] * df1.size + df2_c
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it 'overwrites part of the first dataframe if there are double indices' do
|
|
92
|
+
vec = DaruLite::Vector.new({a: 4, b: nil, c: 4})
|
|
93
|
+
expect(df1.union(df3).row[df1_df3_common_indice]).to eq vec
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
it 'concats the indices' do
|
|
97
|
+
v1 = df1.index.to_a
|
|
98
|
+
v2 = df2.index.to_a
|
|
99
|
+
|
|
100
|
+
df_union = df1.union df2
|
|
101
|
+
expect(df_union.index.to_a).to eq v1 + v2
|
|
102
|
+
end
|
|
74
103
|
end
|
|
75
104
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
105
|
+
context 'with regular index' do
|
|
106
|
+
let(:df1_df3_common_indice) { 5 }
|
|
107
|
+
let(:df2_df3_common_indices) { [7, 9] }
|
|
79
108
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
109
|
+
before do
|
|
110
|
+
df1.index = [1, 3, df1_df3_common_indice]
|
|
111
|
+
df2.index = [*df2_df3_common_indices, 11]
|
|
112
|
+
df3.index = [df1_df3_common_indice, *df2_df3_common_indices]
|
|
113
|
+
end
|
|
83
114
|
|
|
84
|
-
|
|
85
|
-
df1_b = df1[:b].to_a.dup
|
|
86
|
-
df2_c = df2[:c].to_a.dup
|
|
87
|
-
|
|
88
|
-
df_union = df1.union df2
|
|
89
|
-
expect(df_union[:b].to_a).to eq df1_b + [nil] * df2.size
|
|
90
|
-
expect(df_union[:c].to_a).to eq [nil] * df1.size + df2_c
|
|
115
|
+
it_behaves_like '#union'
|
|
91
116
|
end
|
|
92
117
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
end
|
|
118
|
+
context 'with multi index' do
|
|
119
|
+
let(:df1_df3_common_indice) { [:c, 5] }
|
|
120
|
+
let(:df2_df3_common_indices) { [[:a, 7],[:b, 9]] }
|
|
97
121
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
122
|
+
before do
|
|
123
|
+
df1.index = DaruLite::MultiIndex.from_tuples([[:a, 1], [:b, 3], df1_df3_common_indice])
|
|
124
|
+
df2.index = DaruLite::MultiIndex.from_tuples([*df2_df3_common_indices, [:c, 11]])
|
|
125
|
+
df3.index = DaruLite::MultiIndex.from_tuples([df1_df3_common_indice, *df2_df3_common_indices])
|
|
126
|
+
end
|
|
101
127
|
|
|
102
|
-
|
|
103
|
-
expect(df_union.index.to_a).to eq v1 + v2
|
|
128
|
+
it_behaves_like '#union'
|
|
104
129
|
end
|
|
105
130
|
end
|
|
106
131
|
end
|
|
@@ -141,6 +141,14 @@ describe DaruLite::MultiIndex do
|
|
|
141
141
|
expect(index[:a, :one, :baz]).to eq(1)
|
|
142
142
|
end
|
|
143
143
|
|
|
144
|
+
it "returns the row number when specifying last tuple part as an array" do
|
|
145
|
+
expect(index[:a, :one, [:baz]]).to eq(1)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
it "returns the row numbers when specifying multiple tuples" do
|
|
149
|
+
expect(index[[:a, :one, :baz], [:b, :two, :bar]]).to eq([1, 5])
|
|
150
|
+
end
|
|
151
|
+
|
|
144
152
|
it "returns MultiIndex when specifying incomplete tuple" do
|
|
145
153
|
expect(index[:b]).to eq(DaruLite::MultiIndex.from_tuples([
|
|
146
154
|
[:b,:one,:bar],
|
|
@@ -523,6 +531,14 @@ describe DaruLite::MultiIndex do
|
|
|
523
531
|
it { is_expected.to eq [0, 1] }
|
|
524
532
|
end
|
|
525
533
|
|
|
534
|
+
context "multiple tuple indexes" do
|
|
535
|
+
subject { idx.pos [:b,:two,:bar], [:b,:one,:foo] }
|
|
536
|
+
|
|
537
|
+
it { is_expected.to be_a Array }
|
|
538
|
+
its(:size) { is_expected.to eq 2 }
|
|
539
|
+
it { is_expected.to eq [1, 3] }
|
|
540
|
+
end
|
|
541
|
+
|
|
526
542
|
# TODO: Add specs for IndexError
|
|
527
543
|
end
|
|
528
544
|
|
|
@@ -552,6 +568,14 @@ describe DaruLite::MultiIndex do
|
|
|
552
568
|
its(:to_a) { is_expected.to eq [[:b, :one, :bar], [:b, :two, :bar]] }
|
|
553
569
|
end
|
|
554
570
|
|
|
571
|
+
context "multiple tuple indexes" do
|
|
572
|
+
subject { idx.subset [:b,:two,:bar], [:b,:one,:foo] }
|
|
573
|
+
|
|
574
|
+
it { is_expected.to be_a described_class }
|
|
575
|
+
its(:size) { is_expected.to eq 2 }
|
|
576
|
+
its(:to_a) { is_expected.to eq [[:b,:two,:bar], [:b,:one,:foo]] }
|
|
577
|
+
end
|
|
578
|
+
|
|
555
579
|
# TODO: Checks for invalid indexes
|
|
556
580
|
end
|
|
557
581
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: daru_lite
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Thomas Naude-Filonnière
|
|
@@ -9,9 +9,10 @@ authors:
|
|
|
9
9
|
- Julie Thomas
|
|
10
10
|
- Amar Slaoua
|
|
11
11
|
- Mourtada Belhantri
|
|
12
|
+
autorequire:
|
|
12
13
|
bindir: bin
|
|
13
14
|
cert_chain: []
|
|
14
|
-
date:
|
|
15
|
+
date: 2026-03-27 00:00:00.000000000 Z
|
|
15
16
|
dependencies:
|
|
16
17
|
- !ruby/object:Gem::Dependency
|
|
17
18
|
name: activerecord
|
|
@@ -316,6 +317,7 @@ description: |
|
|
|
316
317
|
and can be used with many others like mixed_models, gnuplotrb and iruby.
|
|
317
318
|
|
|
318
319
|
Daru Lite is a fork of Daru that aims to focus on data manipulation and stability.
|
|
320
|
+
email:
|
|
319
321
|
executables: []
|
|
320
322
|
extensions: []
|
|
321
323
|
extra_rdoc_files: []
|
|
@@ -531,6 +533,7 @@ homepage: https://github.com/pollandroll/daru
|
|
|
531
533
|
licenses:
|
|
532
534
|
- BSD-2-Clause
|
|
533
535
|
metadata: {}
|
|
536
|
+
post_install_message:
|
|
534
537
|
rdoc_options: []
|
|
535
538
|
require_paths:
|
|
536
539
|
- lib
|
|
@@ -545,7 +548,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
545
548
|
- !ruby/object:Gem::Version
|
|
546
549
|
version: '0'
|
|
547
550
|
requirements: []
|
|
548
|
-
rubygems_version: 3.
|
|
551
|
+
rubygems_version: 3.4.10
|
|
552
|
+
signing_key:
|
|
549
553
|
specification_version: 4
|
|
550
554
|
summary: Data Analysis in RUby, stripped down
|
|
551
555
|
test_files:
|