daru 0.2.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +6 -9
- data/CONTRIBUTING.md +12 -16
- data/History.md +17 -0
- data/README.md +3 -2
- data/daru.gemspec +9 -17
- data/lib/daru.rb +1 -1
- data/lib/daru/configuration.rb +34 -0
- data/lib/daru/core/group_by.rb +23 -22
- data/lib/daru/core/query.rb +16 -0
- data/lib/daru/dataframe.rb +37 -4
- data/lib/daru/date_time/index.rb +6 -5
- data/lib/daru/date_time/offsets.rb +8 -0
- data/lib/daru/formatters/table.rb +2 -5
- data/lib/daru/io/io.rb +16 -8
- data/lib/daru/vector.rb +42 -0
- data/lib/daru/version.rb +1 -1
- data/spec/accessors/gsl_wrapper_spec.rb +38 -35
- data/spec/accessors/nmatrix_wrapper_spec.rb +25 -22
- data/spec/core/merge_spec.rb +1 -1
- data/spec/core/query_spec.rb +10 -0
- data/spec/dataframe_spec.rb +80 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/offsets_spec.rb +14 -0
- data/spec/fixtures/matrix_test.csv +55 -55
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/io/io_spec.rb +33 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +1 -1
- data/spec/spec_helper.rb +8 -6
- data/spec/vector_spec.rb +29 -0
- metadata +19 -28
@@ -111,6 +111,14 @@ module Daru
|
|
111
111
|
def - date_time
|
112
112
|
date_time - @n*multiplier
|
113
113
|
end
|
114
|
+
|
115
|
+
def ==(other_obj)
|
116
|
+
other_obj.is_a?(Tick) && period == other_obj.period
|
117
|
+
end
|
118
|
+
|
119
|
+
def period
|
120
|
+
@n * multiplier
|
121
|
+
end
|
114
122
|
end
|
115
123
|
|
116
124
|
# Create a seconds offset
|
@@ -13,13 +13,10 @@ module Daru
|
|
13
13
|
@row_headers = [''] * @data.to_a.size if @row_headers.empty?
|
14
14
|
end
|
15
15
|
|
16
|
-
DEFAULT_SPACING = 10
|
17
|
-
DEFAULT_THRESHOLD = 15
|
18
|
-
|
19
16
|
def format threshold=nil, spacing=nil
|
20
|
-
rows = build_rows(threshold ||
|
17
|
+
rows = build_rows(threshold || Daru.max_rows)
|
21
18
|
|
22
|
-
formatter = construct_formatter rows, spacing ||
|
19
|
+
formatter = construct_formatter rows, spacing || Daru.spacing
|
23
20
|
|
24
21
|
rows.map { |r| formatter % r }.join("\n")
|
25
22
|
end
|
data/lib/daru/io/io.rb
CHANGED
@@ -39,16 +39,12 @@ module Daru
|
|
39
39
|
# Functions for loading/writing Excel files.
|
40
40
|
|
41
41
|
def from_excel path, opts={}
|
42
|
-
optional_gem 'spreadsheet', '~>1.1.1'
|
43
42
|
opts = {
|
44
|
-
worksheet_id: 0
|
43
|
+
worksheet_id: 0,
|
44
|
+
row_id: 0
|
45
45
|
}.merge opts
|
46
46
|
|
47
|
-
|
48
|
-
book = Spreadsheet.open path
|
49
|
-
worksheet = book.worksheet worksheet_id
|
50
|
-
headers = ArrayHelper.recode_repeated(worksheet.row(0)).map(&:to_sym)
|
51
|
-
|
47
|
+
worksheet, headers = read_from_excel(path, opts)
|
52
48
|
df = Daru::DataFrame.new({})
|
53
49
|
headers.each_with_index do |h,i|
|
54
50
|
col = worksheet.column(i).to_a
|
@@ -59,6 +55,18 @@ module Daru
|
|
59
55
|
df
|
60
56
|
end
|
61
57
|
|
58
|
+
def read_from_excel path, opts
|
59
|
+
optional_gem 'spreadsheet', '~>1.1.1'
|
60
|
+
|
61
|
+
worksheet_id = opts[:worksheet_id]
|
62
|
+
row_id = opts[:row_id]
|
63
|
+
book = Spreadsheet.open path
|
64
|
+
worksheet = book.worksheet worksheet_id
|
65
|
+
headers = ArrayHelper.recode_repeated(worksheet.row(row_id)).map(&:to_sym)
|
66
|
+
|
67
|
+
[worksheet, headers]
|
68
|
+
end
|
69
|
+
|
62
70
|
def dataframe_write_excel dataframe, path, _opts={}
|
63
71
|
book = Spreadsheet::Workbook.new
|
64
72
|
sheet = book.create_worksheet
|
@@ -231,7 +239,7 @@ module Daru
|
|
231
239
|
def from_csv_hash(path, opts)
|
232
240
|
csv_as_arrays =
|
233
241
|
::CSV
|
234
|
-
.parse(open(path), opts)
|
242
|
+
.parse(open(path), **opts)
|
235
243
|
.tap { |c| yield c if block_given? }
|
236
244
|
.to_a
|
237
245
|
headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
|
data/lib/daru/vector.rb
CHANGED
@@ -444,6 +444,27 @@ module Daru
|
|
444
444
|
Daru::Core::Query.vector_where self, bool_array
|
445
445
|
end
|
446
446
|
|
447
|
+
# Return a new vector based on the contents of a boolean array and &block.
|
448
|
+
#
|
449
|
+
# @param bool_array [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
|
450
|
+
# collection containing the true of false values. Each element in the Vector
|
451
|
+
# corresponding to a `true` in the bool_array will be returned along with it's
|
452
|
+
# index. The &block may contain manipulative functions for the Vector elements.
|
453
|
+
#
|
454
|
+
# @return [Daru::Vector]
|
455
|
+
#
|
456
|
+
# @example Usage of #apply_where.
|
457
|
+
# dv = Daru::Vector.new ['3 days', '5 weeks', '2 weeks']
|
458
|
+
# dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
|
459
|
+
# # =>
|
460
|
+
# ##<Daru::Vector(3)>
|
461
|
+
# # 0 3 days
|
462
|
+
# # 1 35 days
|
463
|
+
# # 2 14 days
|
464
|
+
def apply_where bool_array, &block
|
465
|
+
Daru::Core::Query.vector_apply_where self, bool_array, &block
|
466
|
+
end
|
467
|
+
|
447
468
|
def head q=10
|
448
469
|
self[0..(q-1)]
|
449
470
|
end
|
@@ -453,6 +474,11 @@ module Daru
|
|
453
474
|
self[start..(size-1)]
|
454
475
|
end
|
455
476
|
|
477
|
+
def last q=1
|
478
|
+
# The Enumerable mixin dose not provide the last method.
|
479
|
+
tail(q)
|
480
|
+
end
|
481
|
+
|
456
482
|
def empty?
|
457
483
|
@index.empty?
|
458
484
|
end
|
@@ -1252,6 +1278,22 @@ module Daru
|
|
1252
1278
|
Daru::DataFrame.new ps
|
1253
1279
|
end
|
1254
1280
|
|
1281
|
+
# Returns an array of either none or integer values, indicating the
|
1282
|
+
# +regexp+ matching with the given array.
|
1283
|
+
#
|
1284
|
+
# @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
|
1285
|
+
#
|
1286
|
+
# @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
|
1287
|
+
#
|
1288
|
+
# @example
|
1289
|
+
# dv = Daru::Vector.new(['3 days', '5 weeks', '2 weeks'])
|
1290
|
+
# dv.match(/weeks/)
|
1291
|
+
#
|
1292
|
+
# # => [false, true, true]
|
1293
|
+
def match(regexp)
|
1294
|
+
@data.map { |value| !!(value =~ regexp) }
|
1295
|
+
end
|
1296
|
+
|
1255
1297
|
# Creates a new vector consisting only of non-nil data
|
1256
1298
|
#
|
1257
1299
|
# == Arguments
|
data/lib/daru/version.rb
CHANGED
@@ -1,50 +1,53 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
if Daru.has_gsl?
|
2
|
+
describe Daru::Accessors::GSLWrapper do
|
3
|
+
before :each do
|
4
|
+
@stub_context = Object.new
|
5
|
+
@gsl_wrapper = Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6], @stub_context)
|
6
|
+
end
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
8
|
+
context ".new" do
|
9
|
+
it "actually creates a GSL Vector" do
|
10
|
+
expect(@gsl_wrapper.data.class).to eq(GSL::Vector)
|
11
|
+
end
|
10
12
|
end
|
11
|
-
end
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
context "#mean" do
|
15
|
+
it "computes mean" do
|
16
|
+
expect(@gsl_wrapper.mean).to eq(3.5)
|
17
|
+
end
|
16
18
|
end
|
17
|
-
end
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
20
|
+
context "#map!" do
|
21
|
+
it "destructively maps" do
|
22
|
+
expect(@gsl_wrapper.map! { |a| a += 1 }).to eq(
|
23
|
+
Daru::Accessors::GSLWrapper.new([2,3,4,5,6,7], @stub_context)
|
24
|
+
)
|
25
|
+
end
|
24
26
|
end
|
25
|
-
end
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
context "#delete_at" do
|
29
|
+
it "deletes at key" do
|
30
|
+
expect(@gsl_wrapper.delete_at(2)).to eq(3)
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
expect(@gsl_wrapper).to eq(
|
33
|
+
Daru::Accessors::GSLWrapper.new([1,2,4,5,6], @stub_context)
|
34
|
+
)
|
35
|
+
end
|
34
36
|
end
|
35
|
-
end
|
36
37
|
|
37
|
-
|
38
|
-
|
39
|
-
|
38
|
+
context "#index" do
|
39
|
+
it "returns index of value" do
|
40
|
+
expect(@gsl_wrapper.index(3)).to eq(2)
|
41
|
+
end
|
40
42
|
end
|
41
|
-
end
|
42
43
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
context "#push" do
|
45
|
+
it "appends element" do
|
46
|
+
expect(@gsl_wrapper.push(15)).to eq(
|
47
|
+
Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6,15], @stub_context)
|
48
|
+
)
|
49
|
+
end
|
48
50
|
end
|
49
51
|
end
|
50
52
|
end
|
53
|
+
|
@@ -1,32 +1,35 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
if Daru.has_nmatrix?
|
4
|
+
describe Daru::Accessors::NMatrixWrapper do
|
5
|
+
before :each do
|
6
|
+
stub_context = Object.new
|
7
|
+
@nm_wrapper = Daru::Accessors::NMatrixWrapper.new([1,2,3,4,5], stub_context, :float32)
|
8
|
+
end
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
it "checks for actual NMatrix creation" do
|
11
|
+
expect(@nm_wrapper.data.class).to eq(NMatrix)
|
12
|
+
end
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
it "checks the actual size of the NMatrix object" do
|
15
|
+
expect(@nm_wrapper.data.size).to eq(10)
|
16
|
+
end
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
18
|
+
it "checks that @size is the number of elements in the vector" do
|
19
|
+
expect(@nm_wrapper.size).to eq(5)
|
20
|
+
end
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
|
22
|
+
it "checks for underlying NMatrix data type" do
|
23
|
+
expect(@nm_wrapper.data.dtype).to eq(:float32)
|
24
|
+
end
|
24
25
|
|
25
|
-
|
26
|
-
|
26
|
+
it "resizes" do
|
27
|
+
@nm_wrapper.resize(100)
|
27
28
|
|
28
|
-
|
29
|
-
|
30
|
-
|
29
|
+
expect(@nm_wrapper.size).to eq(5)
|
30
|
+
expect(@nm_wrapper.data.size).to eq(100)
|
31
|
+
expect(@nm_wrapper.data).to eq(NMatrix.new [100], [1,2,3,4,5])
|
32
|
+
end
|
31
33
|
end
|
32
34
|
end
|
35
|
+
|
data/spec/core/merge_spec.rb
CHANGED
@@ -112,7 +112,7 @@ describe Daru::DataFrame do
|
|
112
112
|
expect(@left.join(@right, how: :outer, on: [:name])).to eq(answer)
|
113
113
|
end
|
114
114
|
|
115
|
-
it "performs a left outer join"
|
115
|
+
it "performs a left outer join" do
|
116
116
|
answer = Daru::DataFrame.new({
|
117
117
|
:id_1 => [2,3,1,4],
|
118
118
|
:name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
|
data/spec/core/query_spec.rb
CHANGED
@@ -334,4 +334,14 @@ describe "Arel-like syntax" do
|
|
334
334
|
end
|
335
335
|
end
|
336
336
|
end
|
337
|
+
|
338
|
+
describe "apply_where" do
|
339
|
+
context "matches regexp with block input" do
|
340
|
+
subject { dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" } }
|
341
|
+
|
342
|
+
let(:dv) { Daru::Vector.new ['3 days', '5 weeks', '2 weeks'] }
|
343
|
+
|
344
|
+
it { is_expected.to eq(Daru::Vector.new ['3 days', '35 days', '14 days']) }
|
345
|
+
end
|
346
|
+
end
|
337
347
|
end
|
data/spec/dataframe_spec.rb
CHANGED
@@ -738,6 +738,47 @@ describe Daru::DataFrame do
|
|
738
738
|
}
|
739
739
|
end
|
740
740
|
|
741
|
+
context "#insert_vector" do
|
742
|
+
subject(:data_frame) {
|
743
|
+
Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
744
|
+
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
745
|
+
index: [:one, :two, :three, :four, :five])
|
746
|
+
}
|
747
|
+
|
748
|
+
it "insert a new vector at the desired slot" do
|
749
|
+
df = Daru::DataFrame.new({
|
750
|
+
a: [1,2,3,4,5],
|
751
|
+
d: [710, 720, 730, 740, 750],
|
752
|
+
b: [11, 12, 13, 14, 15],
|
753
|
+
c: [11,22,33,44,55]}, order: [:a, :d, :b, :c],
|
754
|
+
index: [:one, :two, :three, :four, :five]
|
755
|
+
)
|
756
|
+
data_frame.insert_vector 1, :d, [710, 720, 730, 740, 750]
|
757
|
+
expect(subject).to eq df
|
758
|
+
end
|
759
|
+
|
760
|
+
it "raises error for data array being too big" do
|
761
|
+
expect {
|
762
|
+
source = (1..8).to_a
|
763
|
+
data_frame.insert_vector 1, :d, source
|
764
|
+
}.to raise_error(IndexError)
|
765
|
+
end
|
766
|
+
|
767
|
+
it "raises error for invalid index value" do
|
768
|
+
expect {
|
769
|
+
source = (1..5).to_a
|
770
|
+
data_frame.insert_vector 4, :d, source
|
771
|
+
}.to raise_error(ArgumentError)
|
772
|
+
end
|
773
|
+
|
774
|
+
it "raises error for invalid source type" do
|
775
|
+
expect {
|
776
|
+
source = 14
|
777
|
+
data_frame.insert_vector 3, :d, source
|
778
|
+
}.to raise_error(ArgumentError)
|
779
|
+
end
|
780
|
+
end
|
781
|
+
|
741
782
|
context "#row[]=" do
|
742
783
|
context Daru::Index do
|
743
784
|
before :each do
|
@@ -2720,6 +2761,26 @@ describe Daru::DataFrame do
|
|
2720
2761
|
end
|
2721
2762
|
end
|
2722
2763
|
|
2764
|
+
context "#rename_vectors!" do
|
2765
|
+
before do
|
2766
|
+
@df = Daru::DataFrame.new({
|
2767
|
+
a: [1,2,3,4,5],
|
2768
|
+
b: [11,22,33,44,55],
|
2769
|
+
c: %w(a b c d e)
|
2770
|
+
})
|
2771
|
+
end
|
2772
|
+
|
2773
|
+
it "returns self as modified dataframe" do
|
2774
|
+
expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
|
2775
|
+
end
|
2776
|
+
|
2777
|
+
it "re-uses rename_vectors method" do
|
2778
|
+
name_map = { :a => :alpha, :c => :gamma }
|
2779
|
+
expect(@df).to receive(:rename_vectors).with(name_map)
|
2780
|
+
@df.rename_vectors! name_map
|
2781
|
+
end
|
2782
|
+
end
|
2783
|
+
|
2723
2784
|
context "#rename_vectors" do
|
2724
2785
|
before do
|
2725
2786
|
@df = Daru::DataFrame.new({
|
@@ -2729,6 +2790,10 @@ describe Daru::DataFrame do
|
|
2729
2790
|
})
|
2730
2791
|
end
|
2731
2792
|
|
2793
|
+
it "returns Daru::Index" do
|
2794
|
+
expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(Daru::Index)
|
2795
|
+
end
|
2796
|
+
|
2732
2797
|
it "renames vectors using a hash map" do
|
2733
2798
|
@df.rename_vectors :a => :alpha, :c => :gamma
|
2734
2799
|
expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
|
@@ -3972,6 +4037,21 @@ describe Daru::DataFrame do
|
|
3972
4037
|
| 12 1 1 1
|
3973
4038
|
| 13 1 1 1
|
3974
4039
|
| 14 1 1 1
|
4040
|
+
| 15 1 1 1
|
4041
|
+
| 16 1 1 1
|
4042
|
+
| 17 1 1 1
|
4043
|
+
| 18 1 1 1
|
4044
|
+
| 19 1 1 1
|
4045
|
+
| 20 1 1 1
|
4046
|
+
| 21 1 1 1
|
4047
|
+
| 22 1 1 1
|
4048
|
+
| 23 1 1 1
|
4049
|
+
| 24 1 1 1
|
4050
|
+
| 25 1 1 1
|
4051
|
+
| 26 1 1 1
|
4052
|
+
| 27 1 1 1
|
4053
|
+
| 28 1 1 1
|
4054
|
+
| 29 1 1 1
|
3975
4055
|
| ... ... ... ...
|
3976
4056
|
}.unindent}
|
3977
4057
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
include Daru
|
2
|
+
|
3
|
+
describe Daru::DateTimeIndexHelper do
|
4
|
+
|
5
|
+
|
6
|
+
describe '.infer_offset' do
|
7
|
+
subject(:offset) { Daru::DateTimeIndexHelper.infer_offset(data) }
|
8
|
+
|
9
|
+
context 'when the dataset does not have a regular offset' do
|
10
|
+
let(:data) do
|
11
|
+
[
|
12
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
13
|
+
DateTime.new(2020, 1, 1, 00, 01, 00),
|
14
|
+
DateTime.new(2020, 1, 1, 00, 05, 00),
|
15
|
+
]
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns nil' do
|
19
|
+
expect(offset).to be_nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context 'when the dataset matches a defined offset' do
|
24
|
+
let(:data) do
|
25
|
+
[
|
26
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
27
|
+
DateTime.new(2020, 1, 1, 00, 01, 00),
|
28
|
+
DateTime.new(2020, 1, 1, 00, 02, 00),
|
29
|
+
]
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns the matched offset' do
|
33
|
+
expect(offset).to be_an_instance_of(Daru::Offsets::Minute)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context 'when the offset is a multiple of seconds' do
|
38
|
+
let(:data) do
|
39
|
+
[
|
40
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
41
|
+
DateTime.new(2020, 1, 1, 00, 00, 03),
|
42
|
+
DateTime.new(2020, 1, 1, 00, 00, 06),
|
43
|
+
]
|
44
|
+
end
|
45
|
+
|
46
|
+
let(:expected_offset) { Daru::Offsets::Second.new(3) }
|
47
|
+
|
48
|
+
it 'returns a Second offset' do
|
49
|
+
expect(offset).to be_an_instance_of(Daru::Offsets::Second)
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'has the correct multiplier' do
|
53
|
+
expect(offset.freq_string).to eql(expected_offset.freq_string)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
context 'when the offset is less than a second' do
|
58
|
+
let(:data) do
|
59
|
+
[
|
60
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00001,
|
61
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00002,
|
62
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00003,
|
63
|
+
]
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'returns nil' do
|
67
|
+
expect(offset).to be_nil
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|