daru 0.2.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +6 -9
- data/CONTRIBUTING.md +12 -16
- data/History.md +17 -0
- data/README.md +3 -2
- data/daru.gemspec +9 -17
- data/lib/daru.rb +1 -1
- data/lib/daru/configuration.rb +34 -0
- data/lib/daru/core/group_by.rb +23 -22
- data/lib/daru/core/query.rb +16 -0
- data/lib/daru/dataframe.rb +37 -4
- data/lib/daru/date_time/index.rb +6 -5
- data/lib/daru/date_time/offsets.rb +8 -0
- data/lib/daru/formatters/table.rb +2 -5
- data/lib/daru/io/io.rb +16 -8
- data/lib/daru/vector.rb +42 -0
- data/lib/daru/version.rb +1 -1
- data/spec/accessors/gsl_wrapper_spec.rb +38 -35
- data/spec/accessors/nmatrix_wrapper_spec.rb +25 -22
- data/spec/core/merge_spec.rb +1 -1
- data/spec/core/query_spec.rb +10 -0
- data/spec/dataframe_spec.rb +80 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/offsets_spec.rb +14 -0
- data/spec/fixtures/matrix_test.csv +55 -55
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/io/io_spec.rb +33 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +1 -1
- data/spec/spec_helper.rb +8 -6
- data/spec/vector_spec.rb +29 -0
- metadata +19 -28
@@ -111,6 +111,14 @@ module Daru
|
|
111
111
|
def - date_time
|
112
112
|
date_time - @n*multiplier
|
113
113
|
end
|
114
|
+
|
115
|
+
def ==(other_obj)
|
116
|
+
other_obj.is_a?(Tick) && period == other_obj.period
|
117
|
+
end
|
118
|
+
|
119
|
+
def period
|
120
|
+
@n * multiplier
|
121
|
+
end
|
114
122
|
end
|
115
123
|
|
116
124
|
# Create a seconds offset
|
@@ -13,13 +13,10 @@ module Daru
|
|
13
13
|
@row_headers = [''] * @data.to_a.size if @row_headers.empty?
|
14
14
|
end
|
15
15
|
|
16
|
-
DEFAULT_SPACING = 10
|
17
|
-
DEFAULT_THRESHOLD = 15
|
18
|
-
|
19
16
|
def format threshold=nil, spacing=nil
|
20
|
-
rows = build_rows(threshold ||
|
17
|
+
rows = build_rows(threshold || Daru.max_rows)
|
21
18
|
|
22
|
-
formatter = construct_formatter rows, spacing ||
|
19
|
+
formatter = construct_formatter rows, spacing || Daru.spacing
|
23
20
|
|
24
21
|
rows.map { |r| formatter % r }.join("\n")
|
25
22
|
end
|
data/lib/daru/io/io.rb
CHANGED
@@ -39,16 +39,12 @@ module Daru
|
|
39
39
|
# Functions for loading/writing Excel files.
|
40
40
|
|
41
41
|
def from_excel path, opts={}
|
42
|
-
optional_gem 'spreadsheet', '~>1.1.1'
|
43
42
|
opts = {
|
44
|
-
worksheet_id: 0
|
43
|
+
worksheet_id: 0,
|
44
|
+
row_id: 0
|
45
45
|
}.merge opts
|
46
46
|
|
47
|
-
|
48
|
-
book = Spreadsheet.open path
|
49
|
-
worksheet = book.worksheet worksheet_id
|
50
|
-
headers = ArrayHelper.recode_repeated(worksheet.row(0)).map(&:to_sym)
|
51
|
-
|
47
|
+
worksheet, headers = read_from_excel(path, opts)
|
52
48
|
df = Daru::DataFrame.new({})
|
53
49
|
headers.each_with_index do |h,i|
|
54
50
|
col = worksheet.column(i).to_a
|
@@ -59,6 +55,18 @@ module Daru
|
|
59
55
|
df
|
60
56
|
end
|
61
57
|
|
58
|
+
def read_from_excel path, opts
|
59
|
+
optional_gem 'spreadsheet', '~>1.1.1'
|
60
|
+
|
61
|
+
worksheet_id = opts[:worksheet_id]
|
62
|
+
row_id = opts[:row_id]
|
63
|
+
book = Spreadsheet.open path
|
64
|
+
worksheet = book.worksheet worksheet_id
|
65
|
+
headers = ArrayHelper.recode_repeated(worksheet.row(row_id)).map(&:to_sym)
|
66
|
+
|
67
|
+
[worksheet, headers]
|
68
|
+
end
|
69
|
+
|
62
70
|
def dataframe_write_excel dataframe, path, _opts={}
|
63
71
|
book = Spreadsheet::Workbook.new
|
64
72
|
sheet = book.create_worksheet
|
@@ -231,7 +239,7 @@ module Daru
|
|
231
239
|
def from_csv_hash(path, opts)
|
232
240
|
csv_as_arrays =
|
233
241
|
::CSV
|
234
|
-
.parse(open(path), opts)
|
242
|
+
.parse(open(path), **opts)
|
235
243
|
.tap { |c| yield c if block_given? }
|
236
244
|
.to_a
|
237
245
|
headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
|
data/lib/daru/vector.rb
CHANGED
@@ -444,6 +444,27 @@ module Daru
|
|
444
444
|
Daru::Core::Query.vector_where self, bool_array
|
445
445
|
end
|
446
446
|
|
447
|
+
# Return a new vector based on the contents of a boolean array and &block.
|
448
|
+
#
|
449
|
+
# @param bool_array [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
|
450
|
+
# collection containing the true of false values. Each element in the Vector
|
451
|
+
# corresponding to a `true` in the bool_array will be returned along with it's
|
452
|
+
# index. The &block may contain manipulative functions for the Vector elements.
|
453
|
+
#
|
454
|
+
# @return [Daru::Vector]
|
455
|
+
#
|
456
|
+
# @example Usage of #apply_where.
|
457
|
+
# dv = Daru::Vector.new ['3 days', '5 weeks', '2 weeks']
|
458
|
+
# dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
|
459
|
+
# # =>
|
460
|
+
# ##<Daru::Vector(3)>
|
461
|
+
# # 0 3 days
|
462
|
+
# # 1 35 days
|
463
|
+
# # 2 14 days
|
464
|
+
def apply_where bool_array, &block
|
465
|
+
Daru::Core::Query.vector_apply_where self, bool_array, &block
|
466
|
+
end
|
467
|
+
|
447
468
|
def head q=10
|
448
469
|
self[0..(q-1)]
|
449
470
|
end
|
@@ -453,6 +474,11 @@ module Daru
|
|
453
474
|
self[start..(size-1)]
|
454
475
|
end
|
455
476
|
|
477
|
+
def last q=1
|
478
|
+
# The Enumerable mixin dose not provide the last method.
|
479
|
+
tail(q)
|
480
|
+
end
|
481
|
+
|
456
482
|
def empty?
|
457
483
|
@index.empty?
|
458
484
|
end
|
@@ -1252,6 +1278,22 @@ module Daru
|
|
1252
1278
|
Daru::DataFrame.new ps
|
1253
1279
|
end
|
1254
1280
|
|
1281
|
+
# Returns an array of either none or integer values, indicating the
|
1282
|
+
# +regexp+ matching with the given array.
|
1283
|
+
#
|
1284
|
+
# @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
|
1285
|
+
#
|
1286
|
+
# @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
|
1287
|
+
#
|
1288
|
+
# @example
|
1289
|
+
# dv = Daru::Vector.new(['3 days', '5 weeks', '2 weeks'])
|
1290
|
+
# dv.match(/weeks/)
|
1291
|
+
#
|
1292
|
+
# # => [false, true, true]
|
1293
|
+
def match(regexp)
|
1294
|
+
@data.map { |value| !!(value =~ regexp) }
|
1295
|
+
end
|
1296
|
+
|
1255
1297
|
# Creates a new vector consisting only of non-nil data
|
1256
1298
|
#
|
1257
1299
|
# == Arguments
|
data/lib/daru/version.rb
CHANGED
@@ -1,50 +1,53 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
if Daru.has_gsl?
|
2
|
+
describe Daru::Accessors::GSLWrapper do
|
3
|
+
before :each do
|
4
|
+
@stub_context = Object.new
|
5
|
+
@gsl_wrapper = Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6], @stub_context)
|
6
|
+
end
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
8
|
+
context ".new" do
|
9
|
+
it "actually creates a GSL Vector" do
|
10
|
+
expect(@gsl_wrapper.data.class).to eq(GSL::Vector)
|
11
|
+
end
|
10
12
|
end
|
11
|
-
end
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
context "#mean" do
|
15
|
+
it "computes mean" do
|
16
|
+
expect(@gsl_wrapper.mean).to eq(3.5)
|
17
|
+
end
|
16
18
|
end
|
17
|
-
end
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
20
|
+
context "#map!" do
|
21
|
+
it "destructively maps" do
|
22
|
+
expect(@gsl_wrapper.map! { |a| a += 1 }).to eq(
|
23
|
+
Daru::Accessors::GSLWrapper.new([2,3,4,5,6,7], @stub_context)
|
24
|
+
)
|
25
|
+
end
|
24
26
|
end
|
25
|
-
end
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
context "#delete_at" do
|
29
|
+
it "deletes at key" do
|
30
|
+
expect(@gsl_wrapper.delete_at(2)).to eq(3)
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
expect(@gsl_wrapper).to eq(
|
33
|
+
Daru::Accessors::GSLWrapper.new([1,2,4,5,6], @stub_context)
|
34
|
+
)
|
35
|
+
end
|
34
36
|
end
|
35
|
-
end
|
36
37
|
|
37
|
-
|
38
|
-
|
39
|
-
|
38
|
+
context "#index" do
|
39
|
+
it "returns index of value" do
|
40
|
+
expect(@gsl_wrapper.index(3)).to eq(2)
|
41
|
+
end
|
40
42
|
end
|
41
|
-
end
|
42
43
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
context "#push" do
|
45
|
+
it "appends element" do
|
46
|
+
expect(@gsl_wrapper.push(15)).to eq(
|
47
|
+
Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6,15], @stub_context)
|
48
|
+
)
|
49
|
+
end
|
48
50
|
end
|
49
51
|
end
|
50
52
|
end
|
53
|
+
|
@@ -1,32 +1,35 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
if Daru.has_nmatrix?
|
4
|
+
describe Daru::Accessors::NMatrixWrapper do
|
5
|
+
before :each do
|
6
|
+
stub_context = Object.new
|
7
|
+
@nm_wrapper = Daru::Accessors::NMatrixWrapper.new([1,2,3,4,5], stub_context, :float32)
|
8
|
+
end
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
it "checks for actual NMatrix creation" do
|
11
|
+
expect(@nm_wrapper.data.class).to eq(NMatrix)
|
12
|
+
end
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
it "checks the actual size of the NMatrix object" do
|
15
|
+
expect(@nm_wrapper.data.size).to eq(10)
|
16
|
+
end
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
18
|
+
it "checks that @size is the number of elements in the vector" do
|
19
|
+
expect(@nm_wrapper.size).to eq(5)
|
20
|
+
end
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
|
22
|
+
it "checks for underlying NMatrix data type" do
|
23
|
+
expect(@nm_wrapper.data.dtype).to eq(:float32)
|
24
|
+
end
|
24
25
|
|
25
|
-
|
26
|
-
|
26
|
+
it "resizes" do
|
27
|
+
@nm_wrapper.resize(100)
|
27
28
|
|
28
|
-
|
29
|
-
|
30
|
-
|
29
|
+
expect(@nm_wrapper.size).to eq(5)
|
30
|
+
expect(@nm_wrapper.data.size).to eq(100)
|
31
|
+
expect(@nm_wrapper.data).to eq(NMatrix.new [100], [1,2,3,4,5])
|
32
|
+
end
|
31
33
|
end
|
32
34
|
end
|
35
|
+
|
data/spec/core/merge_spec.rb
CHANGED
@@ -112,7 +112,7 @@ describe Daru::DataFrame do
|
|
112
112
|
expect(@left.join(@right, how: :outer, on: [:name])).to eq(answer)
|
113
113
|
end
|
114
114
|
|
115
|
-
it "performs a left outer join"
|
115
|
+
it "performs a left outer join" do
|
116
116
|
answer = Daru::DataFrame.new({
|
117
117
|
:id_1 => [2,3,1,4],
|
118
118
|
:name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
|
data/spec/core/query_spec.rb
CHANGED
@@ -334,4 +334,14 @@ describe "Arel-like syntax" do
|
|
334
334
|
end
|
335
335
|
end
|
336
336
|
end
|
337
|
+
|
338
|
+
describe "apply_where" do
|
339
|
+
context "matches regexp with block input" do
|
340
|
+
subject { dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" } }
|
341
|
+
|
342
|
+
let(:dv) { Daru::Vector.new ['3 days', '5 weeks', '2 weeks'] }
|
343
|
+
|
344
|
+
it { is_expected.to eq(Daru::Vector.new ['3 days', '35 days', '14 days']) }
|
345
|
+
end
|
346
|
+
end
|
337
347
|
end
|
data/spec/dataframe_spec.rb
CHANGED
@@ -738,6 +738,47 @@ describe Daru::DataFrame do
|
|
738
738
|
}
|
739
739
|
end
|
740
740
|
|
741
|
+
context "#insert_vector" do
|
742
|
+
subject(:data_frame) {
|
743
|
+
Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
744
|
+
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
745
|
+
index: [:one, :two, :three, :four, :five])
|
746
|
+
}
|
747
|
+
|
748
|
+
it "insert a new vector at the desired slot" do
|
749
|
+
df = Daru::DataFrame.new({
|
750
|
+
a: [1,2,3,4,5],
|
751
|
+
d: [710, 720, 730, 740, 750],
|
752
|
+
b: [11, 12, 13, 14, 15],
|
753
|
+
c: [11,22,33,44,55]}, order: [:a, :d, :b, :c],
|
754
|
+
index: [:one, :two, :three, :four, :five]
|
755
|
+
)
|
756
|
+
data_frame.insert_vector 1, :d, [710, 720, 730, 740, 750]
|
757
|
+
expect(subject).to eq df
|
758
|
+
end
|
759
|
+
|
760
|
+
it "raises error for data array being too big" do
|
761
|
+
expect {
|
762
|
+
source = (1..8).to_a
|
763
|
+
data_frame.insert_vector 1, :d, source
|
764
|
+
}.to raise_error(IndexError)
|
765
|
+
end
|
766
|
+
|
767
|
+
it "raises error for invalid index value" do
|
768
|
+
expect {
|
769
|
+
source = (1..5).to_a
|
770
|
+
data_frame.insert_vector 4, :d, source
|
771
|
+
}.to raise_error(ArgumentError)
|
772
|
+
end
|
773
|
+
|
774
|
+
it "raises error for invalid source type" do
|
775
|
+
expect {
|
776
|
+
source = 14
|
777
|
+
data_frame.insert_vector 3, :d, source
|
778
|
+
}.to raise_error(ArgumentError)
|
779
|
+
end
|
780
|
+
end
|
781
|
+
|
741
782
|
context "#row[]=" do
|
742
783
|
context Daru::Index do
|
743
784
|
before :each do
|
@@ -2720,6 +2761,26 @@ describe Daru::DataFrame do
|
|
2720
2761
|
end
|
2721
2762
|
end
|
2722
2763
|
|
2764
|
+
context "#rename_vectors!" do
|
2765
|
+
before do
|
2766
|
+
@df = Daru::DataFrame.new({
|
2767
|
+
a: [1,2,3,4,5],
|
2768
|
+
b: [11,22,33,44,55],
|
2769
|
+
c: %w(a b c d e)
|
2770
|
+
})
|
2771
|
+
end
|
2772
|
+
|
2773
|
+
it "returns self as modified dataframe" do
|
2774
|
+
expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
|
2775
|
+
end
|
2776
|
+
|
2777
|
+
it "re-uses rename_vectors method" do
|
2778
|
+
name_map = { :a => :alpha, :c => :gamma }
|
2779
|
+
expect(@df).to receive(:rename_vectors).with(name_map)
|
2780
|
+
@df.rename_vectors! name_map
|
2781
|
+
end
|
2782
|
+
end
|
2783
|
+
|
2723
2784
|
context "#rename_vectors" do
|
2724
2785
|
before do
|
2725
2786
|
@df = Daru::DataFrame.new({
|
@@ -2729,6 +2790,10 @@ describe Daru::DataFrame do
|
|
2729
2790
|
})
|
2730
2791
|
end
|
2731
2792
|
|
2793
|
+
it "returns Daru::Index" do
|
2794
|
+
expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(Daru::Index)
|
2795
|
+
end
|
2796
|
+
|
2732
2797
|
it "renames vectors using a hash map" do
|
2733
2798
|
@df.rename_vectors :a => :alpha, :c => :gamma
|
2734
2799
|
expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
|
@@ -3972,6 +4037,21 @@ describe Daru::DataFrame do
|
|
3972
4037
|
| 12 1 1 1
|
3973
4038
|
| 13 1 1 1
|
3974
4039
|
| 14 1 1 1
|
4040
|
+
| 15 1 1 1
|
4041
|
+
| 16 1 1 1
|
4042
|
+
| 17 1 1 1
|
4043
|
+
| 18 1 1 1
|
4044
|
+
| 19 1 1 1
|
4045
|
+
| 20 1 1 1
|
4046
|
+
| 21 1 1 1
|
4047
|
+
| 22 1 1 1
|
4048
|
+
| 23 1 1 1
|
4049
|
+
| 24 1 1 1
|
4050
|
+
| 25 1 1 1
|
4051
|
+
| 26 1 1 1
|
4052
|
+
| 27 1 1 1
|
4053
|
+
| 28 1 1 1
|
4054
|
+
| 29 1 1 1
|
3975
4055
|
| ... ... ... ...
|
3976
4056
|
}.unindent}
|
3977
4057
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
include Daru
|
2
|
+
|
3
|
+
describe Daru::DateTimeIndexHelper do
|
4
|
+
|
5
|
+
|
6
|
+
describe '.infer_offset' do
|
7
|
+
subject(:offset) { Daru::DateTimeIndexHelper.infer_offset(data) }
|
8
|
+
|
9
|
+
context 'when the dataset does not have a regular offset' do
|
10
|
+
let(:data) do
|
11
|
+
[
|
12
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
13
|
+
DateTime.new(2020, 1, 1, 00, 01, 00),
|
14
|
+
DateTime.new(2020, 1, 1, 00, 05, 00),
|
15
|
+
]
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns nil' do
|
19
|
+
expect(offset).to be_nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context 'when the dataset matches a defined offset' do
|
24
|
+
let(:data) do
|
25
|
+
[
|
26
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
27
|
+
DateTime.new(2020, 1, 1, 00, 01, 00),
|
28
|
+
DateTime.new(2020, 1, 1, 00, 02, 00),
|
29
|
+
]
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns the matched offset' do
|
33
|
+
expect(offset).to be_an_instance_of(Daru::Offsets::Minute)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context 'when the offset is a multiple of seconds' do
|
38
|
+
let(:data) do
|
39
|
+
[
|
40
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
41
|
+
DateTime.new(2020, 1, 1, 00, 00, 03),
|
42
|
+
DateTime.new(2020, 1, 1, 00, 00, 06),
|
43
|
+
]
|
44
|
+
end
|
45
|
+
|
46
|
+
let(:expected_offset) { Daru::Offsets::Second.new(3) }
|
47
|
+
|
48
|
+
it 'returns a Second offset' do
|
49
|
+
expect(offset).to be_an_instance_of(Daru::Offsets::Second)
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'has the correct multiplier' do
|
53
|
+
expect(offset.freq_string).to eql(expected_offset.freq_string)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
context 'when the offset is less than a second' do
|
58
|
+
let(:data) do
|
59
|
+
[
|
60
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00001,
|
61
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00002,
|
62
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00003,
|
63
|
+
]
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'returns nil' do
|
67
|
+
expect(offset).to be_nil
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|