daru 0.2.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -111,6 +111,14 @@ module Daru
111
111
  def - date_time
112
112
  date_time - @n*multiplier
113
113
  end
114
+
115
+ def ==(other_obj)
116
+ other_obj.is_a?(Tick) && period == other_obj.period
117
+ end
118
+
119
+ def period
120
+ @n * multiplier
121
+ end
114
122
  end
115
123
 
116
124
  # Create a seconds offset
@@ -13,13 +13,10 @@ module Daru
13
13
  @row_headers = [''] * @data.to_a.size if @row_headers.empty?
14
14
  end
15
15
 
16
- DEFAULT_SPACING = 10
17
- DEFAULT_THRESHOLD = 15
18
-
19
16
  def format threshold=nil, spacing=nil
20
- rows = build_rows(threshold || DEFAULT_THRESHOLD)
17
+ rows = build_rows(threshold || Daru.max_rows)
21
18
 
22
- formatter = construct_formatter rows, spacing || DEFAULT_SPACING
19
+ formatter = construct_formatter rows, spacing || Daru.spacing
23
20
 
24
21
  rows.map { |r| formatter % r }.join("\n")
25
22
  end
@@ -39,16 +39,12 @@ module Daru
39
39
  # Functions for loading/writing Excel files.
40
40
 
41
41
  def from_excel path, opts={}
42
- optional_gem 'spreadsheet', '~>1.1.1'
43
42
  opts = {
44
- worksheet_id: 0
43
+ worksheet_id: 0,
44
+ row_id: 0
45
45
  }.merge opts
46
46
 
47
- worksheet_id = opts[:worksheet_id]
48
- book = Spreadsheet.open path
49
- worksheet = book.worksheet worksheet_id
50
- headers = ArrayHelper.recode_repeated(worksheet.row(0)).map(&:to_sym)
51
-
47
+ worksheet, headers = read_from_excel(path, opts)
52
48
  df = Daru::DataFrame.new({})
53
49
  headers.each_with_index do |h,i|
54
50
  col = worksheet.column(i).to_a
@@ -59,6 +55,18 @@ module Daru
59
55
  df
60
56
  end
61
57
 
58
+ def read_from_excel path, opts
59
+ optional_gem 'spreadsheet', '~>1.1.1'
60
+
61
+ worksheet_id = opts[:worksheet_id]
62
+ row_id = opts[:row_id]
63
+ book = Spreadsheet.open path
64
+ worksheet = book.worksheet worksheet_id
65
+ headers = ArrayHelper.recode_repeated(worksheet.row(row_id)).map(&:to_sym)
66
+
67
+ [worksheet, headers]
68
+ end
69
+
62
70
  def dataframe_write_excel dataframe, path, _opts={}
63
71
  book = Spreadsheet::Workbook.new
64
72
  sheet = book.create_worksheet
@@ -231,7 +239,7 @@ module Daru
231
239
  def from_csv_hash(path, opts)
232
240
  csv_as_arrays =
233
241
  ::CSV
234
- .parse(open(path), opts)
242
+ .parse(open(path), **opts)
235
243
  .tap { |c| yield c if block_given? }
236
244
  .to_a
237
245
  headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
@@ -444,6 +444,27 @@ module Daru
444
444
  Daru::Core::Query.vector_where self, bool_array
445
445
  end
446
446
 
447
+ # Return a new vector based on the contents of a boolean array and &block.
448
+ #
449
+ # @param bool_array [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
450
+ # collection containing the true of false values. Each element in the Vector
451
+ # corresponding to a `true` in the bool_array will be returned along with it's
452
+ # index. The &block may contain manipulative functions for the Vector elements.
453
+ #
454
+ # @return [Daru::Vector]
455
+ #
456
+ # @example Usage of #apply_where.
457
+ # dv = Daru::Vector.new ['3 days', '5 weeks', '2 weeks']
458
+ # dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
459
+ # # =>
460
+ # ##<Daru::Vector(3)>
461
+ # # 0 3 days
462
+ # # 1 35 days
463
+ # # 2 14 days
464
+ def apply_where bool_array, &block
465
+ Daru::Core::Query.vector_apply_where self, bool_array, &block
466
+ end
467
+
447
468
  def head q=10
448
469
  self[0..(q-1)]
449
470
  end
@@ -453,6 +474,11 @@ module Daru
453
474
  self[start..(size-1)]
454
475
  end
455
476
 
477
+ def last q=1
478
+ # The Enumerable mixin dose not provide the last method.
479
+ tail(q)
480
+ end
481
+
456
482
  def empty?
457
483
  @index.empty?
458
484
  end
@@ -1252,6 +1278,22 @@ module Daru
1252
1278
  Daru::DataFrame.new ps
1253
1279
  end
1254
1280
 
1281
+ # Returns an array of either none or integer values, indicating the
1282
+ # +regexp+ matching with the given array.
1283
+ #
1284
+ # @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
1285
+ #
1286
+ # @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
1287
+ #
1288
+ # @example
1289
+ # dv = Daru::Vector.new(['3 days', '5 weeks', '2 weeks'])
1290
+ # dv.match(/weeks/)
1291
+ #
1292
+ # # => [false, true, true]
1293
+ def match(regexp)
1294
+ @data.map { |value| !!(value =~ regexp) }
1295
+ end
1296
+
1255
1297
  # Creates a new vector consisting only of non-nil data
1256
1298
  #
1257
1299
  # == Arguments
@@ -1,3 +1,3 @@
1
1
  module Daru
2
- VERSION = '0.2.2'.freeze
2
+ VERSION = '0.3'.freeze
3
3
  end
@@ -1,50 +1,53 @@
1
- describe Daru::Accessors::GSLWrapper do
2
- before :each do
3
- @stub_context = Object.new
4
- @gsl_wrapper = Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6], @stub_context)
5
- end
1
+ if Daru.has_gsl?
2
+ describe Daru::Accessors::GSLWrapper do
3
+ before :each do
4
+ @stub_context = Object.new
5
+ @gsl_wrapper = Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6], @stub_context)
6
+ end
6
7
 
7
- context ".new" do
8
- it "actually creates a GSL Vector" do
9
- expect(@gsl_wrapper.data.class).to eq(GSL::Vector)
8
+ context ".new" do
9
+ it "actually creates a GSL Vector" do
10
+ expect(@gsl_wrapper.data.class).to eq(GSL::Vector)
11
+ end
10
12
  end
11
- end
12
13
 
13
- context "#mean" do
14
- it "computes mean" do
15
- expect(@gsl_wrapper.mean).to eq(3.5)
14
+ context "#mean" do
15
+ it "computes mean" do
16
+ expect(@gsl_wrapper.mean).to eq(3.5)
17
+ end
16
18
  end
17
- end
18
19
 
19
- context "#map!" do
20
- it "destructively maps" do
21
- expect(@gsl_wrapper.map! { |a| a += 1 }).to eq(
22
- Daru::Accessors::GSLWrapper.new([2,3,4,5,6,7], @stub_context)
23
- )
20
+ context "#map!" do
21
+ it "destructively maps" do
22
+ expect(@gsl_wrapper.map! { |a| a += 1 }).to eq(
23
+ Daru::Accessors::GSLWrapper.new([2,3,4,5,6,7], @stub_context)
24
+ )
25
+ end
24
26
  end
25
- end
26
27
 
27
- context "#delete_at" do
28
- it "deletes at key" do
29
- expect(@gsl_wrapper.delete_at(2)).to eq(3)
28
+ context "#delete_at" do
29
+ it "deletes at key" do
30
+ expect(@gsl_wrapper.delete_at(2)).to eq(3)
30
31
 
31
- expect(@gsl_wrapper).to eq(
32
- Daru::Accessors::GSLWrapper.new([1,2,4,5,6], @stub_context)
33
- )
32
+ expect(@gsl_wrapper).to eq(
33
+ Daru::Accessors::GSLWrapper.new([1,2,4,5,6], @stub_context)
34
+ )
35
+ end
34
36
  end
35
- end
36
37
 
37
- context "#index" do
38
- it "returns index of value" do
39
- expect(@gsl_wrapper.index(3)).to eq(2)
38
+ context "#index" do
39
+ it "returns index of value" do
40
+ expect(@gsl_wrapper.index(3)).to eq(2)
41
+ end
40
42
  end
41
- end
42
43
 
43
- context "#push" do
44
- it "appends element" do
45
- expect(@gsl_wrapper.push(15)).to eq(
46
- Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6,15], @stub_context)
47
- )
44
+ context "#push" do
45
+ it "appends element" do
46
+ expect(@gsl_wrapper.push(15)).to eq(
47
+ Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6,15], @stub_context)
48
+ )
49
+ end
48
50
  end
49
51
  end
50
52
  end
53
+
@@ -1,32 +1,35 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
- describe Daru::Accessors::NMatrixWrapper do
4
- before :each do
5
- stub_context = Object.new
6
- @nm_wrapper = Daru::Accessors::NMatrixWrapper.new([1,2,3,4,5], stub_context, :float32)
7
- end
3
+ if Daru.has_nmatrix?
4
+ describe Daru::Accessors::NMatrixWrapper do
5
+ before :each do
6
+ stub_context = Object.new
7
+ @nm_wrapper = Daru::Accessors::NMatrixWrapper.new([1,2,3,4,5], stub_context, :float32)
8
+ end
8
9
 
9
- it "checks for actual NMatrix creation" do
10
- expect(@nm_wrapper.data.class).to eq(NMatrix)
11
- end
10
+ it "checks for actual NMatrix creation" do
11
+ expect(@nm_wrapper.data.class).to eq(NMatrix)
12
+ end
12
13
 
13
- it "checks the actual size of the NMatrix object" do
14
- expect(@nm_wrapper.data.size).to eq(10)
15
- end
14
+ it "checks the actual size of the NMatrix object" do
15
+ expect(@nm_wrapper.data.size).to eq(10)
16
+ end
16
17
 
17
- it "checks that @size is the number of elements in the vector" do
18
- expect(@nm_wrapper.size).to eq(5)
19
- end
18
+ it "checks that @size is the number of elements in the vector" do
19
+ expect(@nm_wrapper.size).to eq(5)
20
+ end
20
21
 
21
- it "checks for underlying NMatrix data type" do
22
- expect(@nm_wrapper.data.dtype).to eq(:float32)
23
- end
22
+ it "checks for underlying NMatrix data type" do
23
+ expect(@nm_wrapper.data.dtype).to eq(:float32)
24
+ end
24
25
 
25
- it "resizes" do
26
- @nm_wrapper.resize(100)
26
+ it "resizes" do
27
+ @nm_wrapper.resize(100)
27
28
 
28
- expect(@nm_wrapper.size).to eq(5)
29
- expect(@nm_wrapper.data.size).to eq(100)
30
- expect(@nm_wrapper.data).to eq(NMatrix.new [100], [1,2,3,4,5])
29
+ expect(@nm_wrapper.size).to eq(5)
30
+ expect(@nm_wrapper.data.size).to eq(100)
31
+ expect(@nm_wrapper.data).to eq(NMatrix.new [100], [1,2,3,4,5])
32
+ end
31
33
  end
32
34
  end
35
+
@@ -112,7 +112,7 @@ describe Daru::DataFrame do
112
112
  expect(@left.join(@right, how: :outer, on: [:name])).to eq(answer)
113
113
  end
114
114
 
115
- it "performs a left outer join", focus: true do
115
+ it "performs a left outer join" do
116
116
  answer = Daru::DataFrame.new({
117
117
  :id_1 => [2,3,1,4],
118
118
  :name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
@@ -334,4 +334,14 @@ describe "Arel-like syntax" do
334
334
  end
335
335
  end
336
336
  end
337
+
338
+ describe "apply_where" do
339
+ context "matches regexp with block input" do
340
+ subject { dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" } }
341
+
342
+ let(:dv) { Daru::Vector.new ['3 days', '5 weeks', '2 weeks'] }
343
+
344
+ it { is_expected.to eq(Daru::Vector.new ['3 days', '35 days', '14 days']) }
345
+ end
346
+ end
337
347
  end
@@ -738,6 +738,47 @@ describe Daru::DataFrame do
738
738
  }
739
739
  end
740
740
 
741
+ context "#insert_vector" do
742
+ subject(:data_frame) {
743
+ Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
744
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
745
+ index: [:one, :two, :three, :four, :five])
746
+ }
747
+
748
+ it "insert a new vector at the desired slot" do
749
+ df = Daru::DataFrame.new({
750
+ a: [1,2,3,4,5],
751
+ d: [710, 720, 730, 740, 750],
752
+ b: [11, 12, 13, 14, 15],
753
+ c: [11,22,33,44,55]}, order: [:a, :d, :b, :c],
754
+ index: [:one, :two, :three, :four, :five]
755
+ )
756
+ data_frame.insert_vector 1, :d, [710, 720, 730, 740, 750]
757
+ expect(subject).to eq df
758
+ end
759
+
760
+ it "raises error for data array being too big" do
761
+ expect {
762
+ source = (1..8).to_a
763
+ data_frame.insert_vector 1, :d, source
764
+ }.to raise_error(IndexError)
765
+ end
766
+
767
+ it "raises error for invalid index value" do
768
+ expect {
769
+ source = (1..5).to_a
770
+ data_frame.insert_vector 4, :d, source
771
+ }.to raise_error(ArgumentError)
772
+ end
773
+
774
+ it "raises error for invalid source type" do
775
+ expect {
776
+ source = 14
777
+ data_frame.insert_vector 3, :d, source
778
+ }.to raise_error(ArgumentError)
779
+ end
780
+ end
781
+
741
782
  context "#row[]=" do
742
783
  context Daru::Index do
743
784
  before :each do
@@ -2720,6 +2761,26 @@ describe Daru::DataFrame do
2720
2761
  end
2721
2762
  end
2722
2763
 
2764
+ context "#rename_vectors!" do
2765
+ before do
2766
+ @df = Daru::DataFrame.new({
2767
+ a: [1,2,3,4,5],
2768
+ b: [11,22,33,44,55],
2769
+ c: %w(a b c d e)
2770
+ })
2771
+ end
2772
+
2773
+ it "returns self as modified dataframe" do
2774
+ expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
2775
+ end
2776
+
2777
+ it "re-uses rename_vectors method" do
2778
+ name_map = { :a => :alpha, :c => :gamma }
2779
+ expect(@df).to receive(:rename_vectors).with(name_map)
2780
+ @df.rename_vectors! name_map
2781
+ end
2782
+ end
2783
+
2723
2784
  context "#rename_vectors" do
2724
2785
  before do
2725
2786
  @df = Daru::DataFrame.new({
@@ -2729,6 +2790,10 @@ describe Daru::DataFrame do
2729
2790
  })
2730
2791
  end
2731
2792
 
2793
+ it "returns Daru::Index" do
2794
+ expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(Daru::Index)
2795
+ end
2796
+
2732
2797
  it "renames vectors using a hash map" do
2733
2798
  @df.rename_vectors :a => :alpha, :c => :gamma
2734
2799
  expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
@@ -3972,6 +4037,21 @@ describe Daru::DataFrame do
3972
4037
  | 12 1 1 1
3973
4038
  | 13 1 1 1
3974
4039
  | 14 1 1 1
4040
+ | 15 1 1 1
4041
+ | 16 1 1 1
4042
+ | 17 1 1 1
4043
+ | 18 1 1 1
4044
+ | 19 1 1 1
4045
+ | 20 1 1 1
4046
+ | 21 1 1 1
4047
+ | 22 1 1 1
4048
+ | 23 1 1 1
4049
+ | 24 1 1 1
4050
+ | 25 1 1 1
4051
+ | 26 1 1 1
4052
+ | 27 1 1 1
4053
+ | 28 1 1 1
4054
+ | 29 1 1 1
3975
4055
  | ... ... ... ...
3976
4056
  }.unindent}
3977
4057
  end
@@ -0,0 +1,72 @@
1
+ include Daru
2
+
3
+ describe Daru::DateTimeIndexHelper do
4
+
5
+
6
+ describe '.infer_offset' do
7
+ subject(:offset) { Daru::DateTimeIndexHelper.infer_offset(data) }
8
+
9
+ context 'when the dataset does not have a regular offset' do
10
+ let(:data) do
11
+ [
12
+ DateTime.new(2020, 1, 1, 00, 00, 00),
13
+ DateTime.new(2020, 1, 1, 00, 01, 00),
14
+ DateTime.new(2020, 1, 1, 00, 05, 00),
15
+ ]
16
+ end
17
+
18
+ it 'returns nil' do
19
+ expect(offset).to be_nil
20
+ end
21
+ end
22
+
23
+ context 'when the dataset matches a defined offset' do
24
+ let(:data) do
25
+ [
26
+ DateTime.new(2020, 1, 1, 00, 00, 00),
27
+ DateTime.new(2020, 1, 1, 00, 01, 00),
28
+ DateTime.new(2020, 1, 1, 00, 02, 00),
29
+ ]
30
+ end
31
+
32
+ it 'returns the matched offset' do
33
+ expect(offset).to be_an_instance_of(Daru::Offsets::Minute)
34
+ end
35
+ end
36
+
37
+ context 'when the offset is a multiple of seconds' do
38
+ let(:data) do
39
+ [
40
+ DateTime.new(2020, 1, 1, 00, 00, 00),
41
+ DateTime.new(2020, 1, 1, 00, 00, 03),
42
+ DateTime.new(2020, 1, 1, 00, 00, 06),
43
+ ]
44
+ end
45
+
46
+ let(:expected_offset) { Daru::Offsets::Second.new(3) }
47
+
48
+ it 'returns a Second offset' do
49
+ expect(offset).to be_an_instance_of(Daru::Offsets::Second)
50
+ end
51
+
52
+ it 'has the correct multiplier' do
53
+ expect(offset.freq_string).to eql(expected_offset.freq_string)
54
+ end
55
+ end
56
+
57
+ context 'when the offset is less than a second' do
58
+ let(:data) do
59
+ [
60
+ DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00001,
61
+ DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00002,
62
+ DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00003,
63
+ ]
64
+ end
65
+
66
+ it 'returns nil' do
67
+ expect(offset).to be_nil
68
+ end
69
+ end
70
+ end
71
+
72
+ end