daru 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +99 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.travis.yml +3 -1
  6. data/CONTRIBUTING.md +5 -1
  7. data/History.md +43 -0
  8. data/README.md +3 -4
  9. data/benchmarks/duplicating.rb +45 -0
  10. data/benchmarks/group_by.rb +7 -7
  11. data/benchmarks/joining.rb +52 -0
  12. data/benchmarks/sorting.rb +9 -2
  13. data/benchmarks/statistics.rb +39 -0
  14. data/daru.gemspec +4 -4
  15. data/lib/daru.rb +9 -9
  16. data/lib/daru/accessors/array_wrapper.rb +15 -11
  17. data/lib/daru/accessors/dataframe_by_row.rb +1 -1
  18. data/lib/daru/accessors/gsl_wrapper.rb +30 -19
  19. data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
  20. data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
  21. data/lib/daru/core/group_by.rb +69 -16
  22. data/lib/daru/core/merge.rb +135 -151
  23. data/lib/daru/core/query.rb +9 -30
  24. data/lib/daru/dataframe.rb +476 -439
  25. data/lib/daru/date_time/index.rb +150 -137
  26. data/lib/daru/date_time/offsets.rb +45 -41
  27. data/lib/daru/extensions/rserve.rb +4 -4
  28. data/lib/daru/index.rb +88 -64
  29. data/lib/daru/io/io.rb +33 -34
  30. data/lib/daru/io/sql_data_source.rb +11 -11
  31. data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
  32. data/lib/daru/maths/arithmetic/vector.rb +9 -14
  33. data/lib/daru/maths/statistics/dataframe.rb +89 -61
  34. data/lib/daru/maths/statistics/vector.rb +226 -97
  35. data/lib/daru/monkeys.rb +23 -30
  36. data/lib/daru/plotting/dataframe.rb +27 -28
  37. data/lib/daru/plotting/vector.rb +12 -13
  38. data/lib/daru/vector.rb +221 -330
  39. data/lib/daru/version.rb +2 -2
  40. data/spec/core/group_by_spec.rb +16 -0
  41. data/spec/core/merge_spec.rb +30 -14
  42. data/spec/dataframe_spec.rb +268 -14
  43. data/spec/index_spec.rb +23 -5
  44. data/spec/io/io_spec.rb +37 -16
  45. data/spec/math/statistics/dataframe_spec.rb +40 -8
  46. data/spec/math/statistics/vector_spec.rb +135 -10
  47. data/spec/monkeys_spec.rb +3 -3
  48. data/spec/vector_spec.rb +157 -25
  49. metadata +41 -21
@@ -3,8 +3,8 @@ require 'spec_helper.rb'
3
3
  describe "Monkeys" do
4
4
  context Array do
5
5
  it "#recode_repeated" do
6
- expect([1,1,'a','a','b',:c,2].recode_repeated).to eq(
7
- ['1_1', '1_2','a_1','a_2','b',:c,2])
6
+ expect([1,'a',1,'a','b',:c,2].recode_repeated).to eq(
7
+ ['1_1','a_1', '1_2','a_2','b',:c,2])
8
8
  end
9
9
  end
10
10
 
@@ -16,4 +16,4 @@ describe "Monkeys" do
16
16
  expect(left.elementwise_division(right)).to eq(Matrix[[1,1,1],[1,1,1],[1,1,1]])
17
17
  end
18
18
  end
19
- end
19
+ end
@@ -83,6 +83,13 @@ describe Daru::Vector do
83
83
  expect(dv.to_a).to eq([1,2,3,4])
84
84
  expect(dv.index.to_a).to eq(['a', 'b', :r, 0])
85
85
  end
86
+
87
+ it "accepts a metadata attribute" do
88
+ dv = Daru::Vector.new [1,2,3,4,5], metadata: { cdc_type: 2 }
89
+
90
+ expect(dv.metadata) .to eq({ cdc_type: 2 })
91
+ end
92
+
86
93
  end
87
94
 
88
95
  context ".new_with_size" do
@@ -122,7 +129,7 @@ describe Daru::Vector do
122
129
  context "#[]" do
123
130
  context Daru::Index do
124
131
  before :each do
125
- @dv = Daru::Vector.new [1,2,3,4,5], name: :yoga,
132
+ @dv = Daru::Vector.new [1,2,3,4,5], name: :yoga, metadata: { cdc_type: 2 },
126
133
  index: [:yoda, :anakin, :obi, :padme, :r2d2], dtype: dtype
127
134
  end
128
135
 
@@ -161,6 +168,15 @@ describe Daru::Vector do
161
168
  expect(v[1]).to eq(3)
162
169
  expect(v[0]).to eq(1)
163
170
  end
171
+
172
+ it "raises exception for invalid index" do
173
+ expect { @dv[:foo] }.to raise_error(IndexError)
174
+ expect { @dv[:obi, :foo] }.to raise_error(IndexError)
175
+ end
176
+
177
+ it "retains the original vector metadata" do
178
+ expect(@dv[:yoda, :anakin].metadata).to eq({ cdc_type: 2 })
179
+ end
164
180
  end
165
181
 
166
182
  context Daru::MultiIndex do
@@ -177,11 +193,12 @@ describe Daru::Vector do
177
193
  [:c,:one,:bar],
178
194
  [:c,:one,:baz],
179
195
  [:c,:two,:foo],
180
- [:c,:two,:bar]
196
+ [:c,:two,:bar],
197
+ [:d,:one,:foo]
181
198
  ]
182
199
  @multi_index = Daru::MultiIndex.from_tuples(@tuples)
183
200
  @vector = Daru::Vector.new(
184
- Array.new(12) { |i| i }, index: @multi_index,
201
+ Array.new(13) { |i| i }, index: @multi_index,
185
202
  dtype: dtype, name: :mi_vector)
186
203
  end
187
204
 
@@ -211,6 +228,12 @@ describe Daru::Vector do
211
228
  dtype: dtype, name: :sub_sub_vector))
212
229
  end
213
230
 
231
+ it "returns sub vector not a single element when passed the partial tuple" do
232
+ mi = Daru::MultiIndex.from_tuples([[:foo]])
233
+ expect(@vector[:d, :one]).to eq(Daru::Vector.new([12], index: mi,
234
+ dtype: dtype, name: :sub_sub_vector))
235
+ end
236
+
214
237
  it "returns a vector with corresponding MultiIndex when specified numeric Range" do
215
238
  mi = Daru::MultiIndex.from_tuples([
216
239
  [:a,:two,:baz],
@@ -224,6 +247,12 @@ describe Daru::Vector do
224
247
  expect(@vector[3..9]).to eq(Daru::Vector.new([3,4,5,6,7,8,9], index: mi,
225
248
  dtype: dtype, name: :slice))
226
249
  end
250
+
251
+ it "raises exception for invalid index" do
252
+ expect { @vector[:foo] }.to raise_error(IndexError)
253
+ expect { @vector[:a, :two, :foo] }.to raise_error(IndexError)
254
+ expect { @vector[:x, :one] }.to raise_error(IndexError)
255
+ end
227
256
  end
228
257
  end
229
258
 
@@ -417,13 +446,13 @@ describe Daru::Vector do
417
446
  end
418
447
  end
419
448
 
420
- context "#to_hash" do
449
+ context "#to_h" do
421
450
  context Daru::Index do
422
451
  it "returns the vector as a hash" do
423
452
  dv = Daru::Vector.new [1,2,3,4,5], name: :a,
424
453
  index: [:one, :two, :three, :four, :five], dtype: dtype
425
454
 
426
- expect(dv.to_hash).to eq({one: 1, two: 2, three: 3, four: 4, five: 5})
455
+ expect(dv.to_h).to eq({one: 1, two: 2, three: 3, four: 4, five: 5})
427
456
  end
428
457
  end
429
458
 
@@ -438,7 +467,7 @@ describe Daru::Vector do
438
467
  # [:b,:two,:bar]
439
468
  # ])
440
469
  # vector = Daru::Vector.new([1,2,3,4], index: mi, dtype: dtype)
441
- # expect(vector.to_hash).to eq({
470
+ # expect(vector.to_h).to eq({
442
471
  # [:a,:two,:bar] => 1,
443
472
  # [:a,:two,:baz] => 2,
444
473
  # [:b,:one,:bar] => 3,
@@ -449,8 +478,15 @@ describe Daru::Vector do
449
478
  end
450
479
 
451
480
  context "#uniq" do
481
+ before do
482
+ @v = Daru::Vector.new [1, 2, 2, 2.0, 3, 3.0], index:[:a, :b, :c, :d, :e, :f], metadata: { cdc_type: 2 }
483
+ end
452
484
  it "keeps only unique values" do
453
- # TODO: fill this in
485
+ expect(@v.uniq).to eq(Daru::Vector.new [1, 2, 2.0, 3, 3.0], index: [:a, :b, :d, :e, :f])
486
+ end
487
+
488
+ it "retains the original vector metadata" do
489
+ expect(@v.uniq.metadata).to eq({ cdc_type: 2 })
454
490
  end
455
491
  end
456
492
 
@@ -467,7 +503,7 @@ describe Daru::Vector do
467
503
  context "#sort" do
468
504
  context Daru::Index do
469
505
  before do
470
- @dv = Daru::Vector.new [33,2,15,332,1], name: :dv, index: [:a, :b, :c, :d, :e]
506
+ @dv = Daru::Vector.new [33,2,15,332,1], name: :dv, index: [:a, :b, :c, :d, :e], metadata: { cdc_type: 2 }
471
507
  end
472
508
 
473
509
  it "sorts the vector with defaults and returns a new vector, preserving indexing" do
@@ -485,11 +521,36 @@ describe Daru::Vector do
485
521
  expect(sorted).to eq(Daru::Vector.new(["My", "Jazz", "Guitar", "My Jazz Guitar"], index: [2,1,3,0]))
486
522
  end
487
523
 
488
- it "places nils near the beginning of the vector" do
524
+ it "places nils near the beginning of the vector when sorting ascendingly" do
489
525
  with_nils = Daru::Vector.new [22,4,nil,111,nil,2]
490
526
 
491
527
  expect(with_nils.sort).to eq(Daru::Vector.new([nil,nil,2,4,22,111], index: [2,4,5,1,0,3]))
492
528
  end if dtype == :array
529
+
530
+ it "places nils near the beginning of the vector when sorting descendingly" do
531
+ with_nils = Daru::Vector.new [22,4,nil,111,nil,2]
532
+
533
+ expect(with_nils.sort(ascending: false)).to eq(
534
+ Daru::Vector.new [nil,nil,111,22,4,2], index: [4,2,3,0,1,5])
535
+ end
536
+
537
+ it "correctly sorts vector in ascending order with non-numeric data and nils" do
538
+ non_numeric = Daru::Vector.new ['a','b', nil, 'aa', '1234', nil]
539
+
540
+ expect(non_numeric.sort(ascending: true)).to eq(
541
+ Daru::Vector.new [nil,nil,'1234','a','aa','b'], index: [2,5,4,0,3,1])
542
+ end
543
+
544
+ it "correctly sorts vector in descending order with non-numeric data and nils" do
545
+ non_numeric = Daru::Vector.new ['a','b', nil, 'aa', '1234', nil]
546
+
547
+ expect(non_numeric.sort(ascending: false)).to eq(
548
+ Daru::Vector.new [nil,nil,'b','aa','a','1234'], index: [5,2,1,3,0,4])
549
+ end
550
+
551
+ it "retains the original vector metadata" do
552
+ expect(@dv.sort.metadata).to eq({ cdc_type: 2 })
553
+ end
493
554
  end
494
555
 
495
556
  context Daru::MultiIndex do
@@ -552,7 +613,7 @@ describe Daru::Vector do
552
613
  index = Daru::DateTimeIndex.date_range(:start => '2012', :periods => 5)
553
614
  @vector.index = index
554
615
 
555
- expect(@vector.index.class).to eq(DateTimeIndex)
616
+ expect(@vector.index.class).to eq(Daru::DateTimeIndex)
556
617
  expect(@vector['2012-1-1']).to eq(1)
557
618
  end
558
619
 
@@ -564,12 +625,50 @@ describe Daru::Vector do
564
625
  end
565
626
 
566
627
  context "#reindex" do
628
+ before do
629
+ @vector = Daru::Vector.new([1,2,3,4,5], metadata: { cdc_type: 2 })
630
+ @index = Daru::Index.new([3,4,1,0,6])
631
+ end
567
632
  it "intelligently reindexes" do
568
- vector = Daru::Vector.new([1,2,3,4,5])
569
- index = Daru::Index.new([3,4,1,0,6])
633
+ expect(@vector.reindex(@index)).to eq(
634
+ Daru::Vector.new([4,5,2,1,nil], index: @index))
635
+ end
636
+ it "retains the original vector metadata" do
637
+ expect(@vector.reindex(@index).metadata).to eq({ cdc_type: 2 })
638
+ end
639
+ end
570
640
 
571
- expect(vector.reindex(index)).to eq(
572
- Daru::Vector.new([4,5,2,1,nil], index: index))
641
+ context "#dup" do
642
+ before do
643
+ @dv = Daru::Vector.new [1,2], name: :yoda, metadata: { cdc_type: 2 }, index: [:happy, :lightsaber]
644
+ end
645
+
646
+ it "copies the original data" do
647
+ expect(@dv.dup.send(:data)).to eq([1,2])
648
+ end
649
+
650
+ it "creates a new data object" do
651
+ expect(@dv.dup.send(:data).object_id).not_to eq(@dv.send(:data).object_id)
652
+ end
653
+
654
+ it "copies the name" do
655
+ expect(@dv.dup.name).to eq(:yoda)
656
+ end
657
+
658
+ it "copies the original vector metadata" do
659
+ expect(@dv.dup.metadata).to eq({ cdc_type: 2 })
660
+ end
661
+
662
+ it "creates a new metadata object" do
663
+ expect(@dv.dup.metadata.object_id).not_to eq(@dv.metadata.object_id)
664
+ end
665
+
666
+ it "copies the original index" do
667
+ expect(@dv.dup.index).to eq(Daru::Index.new([:happy, :lightsaber]))
668
+ end
669
+
670
+ it "creates a new index object" do
671
+ expect(@dv.dup.index.object_id).not_to eq(@dv.index.object_id)
573
672
  end
574
673
  end
575
674
 
@@ -740,9 +839,16 @@ describe Daru::Vector do
740
839
 
741
840
  context "#clone_structure" do
742
841
  context Daru::Index do
842
+ before do
843
+ @vec = Daru::Vector.new([1,2,3,4,5], metadata: { cdc_type: 2 }, index: [:a,:b,:c,:d,:e])
844
+ end
845
+
743
846
  it "clones a vector with its index and fills it with nils" do
744
- vec = Daru::Vector.new([1,2,3,4,5], index: [:a,:b,:c,:d,:e])
745
- expect(vec.clone_structure).to eq(Daru::Vector.new([nil,nil,nil,nil,nil], index: [:a,:b,:c,:d,:e]))
847
+ expect(@vec.clone_structure).to eq(Daru::Vector.new([nil,nil,nil,nil,nil], index: [:a,:b,:c,:d,:e]))
848
+ end
849
+
850
+ it "retains the original vector metadata" do
851
+ expect(@vec.clone_structure.metadata).to eq({ cdc_type: 2 })
746
852
  end
747
853
  end
748
854
 
@@ -864,11 +970,22 @@ describe Daru::Vector do
864
970
  end
865
971
 
866
972
  context "#only_valid" do
867
- it "returns a Vector of only non-nil data" do
868
- vector = Daru::Vector.new [1,2,3,4,nil,3,nil],
869
- index: [:a, :b, :c, :d, :e, :f, :g]
870
- expect(vector.only_valid).to eq(Daru::Vector.new([1,2,3,4,3],
871
- index: [:a, :b, :c, :d, :f]))
973
+ [:array, :gsl].each do |dtype|
974
+ describe dtype do
975
+ before do
976
+ @vector = Daru::Vector.new [1,2,3,4,5,3,5], metadata: { cdc_type: 2 },
977
+ index: [:a, :b, :c, :d, :e, :f, :g], dtype: dtype, missing_values: [3, 5]
978
+ end
979
+
980
+ it "returns a Vector of only non-missing data" do
981
+ expect(@vector.only_valid).to eq(Daru::Vector.new([1,2,4],
982
+ index: [:a, :b, :d], dtype: dtype))
983
+ end
984
+
985
+ it "retains the original vector metadata" do
986
+ expect(@vector.only_valid.metadata).to eq({ cdc_type: 2 })
987
+ end
988
+ end
872
989
  end
873
990
  end
874
991
 
@@ -1022,19 +1139,34 @@ describe Daru::Vector do
1022
1139
  end
1023
1140
 
1024
1141
  context "#lag" do
1142
+ before do
1143
+ @xiu = Daru::Vector.new([17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
1144
+ 16.86, 16.86, 16.56, 16.36, 16.66, 16.77], metadata: { cdc_type: 2 })
1145
+ end
1146
+
1025
1147
  it "lags the vector by specified amount" do
1026
- xiu = Daru::Vector.new([17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
1027
- 16.86, 16.86, 16.56, 16.36, 16.66, 16.77])
1028
- lag1 = xiu.lag
1148
+ lag1 = @xiu.lag
1029
1149
 
1030
1150
  expect(lag1[lag1.size - 1]).to be_within(0.001).of(16.66)
1031
1151
  expect(lag1[lag1.size - 2]).to be_within(0.001).of(16.36)
1032
1152
 
1033
1153
  #test with different lagging unit
1034
- lag2 = xiu.lag(2)
1154
+ lag2 = @xiu.lag(2)
1035
1155
 
1036
1156
  expect(lag2[lag2.size - 1]).to be_within(0.001).of(16.36)
1037
1157
  expect(lag2[lag2.size - 2]).to be_within(0.001).of(16.56)
1038
1158
  end
1159
+
1160
+ it "retains the original vector metadata" do
1161
+ expect(@xiu.lag(1).metadata).to eq({ cdc_type: 2 })
1162
+ end
1039
1163
  end
1164
+
1165
+ context "#metadata" do
1166
+ it "defaults to an empty hash for metadata" do
1167
+ dv = Daru::Vector.new [1,2,3,4,5]
1168
+ expect(dv.metadata).to eq({})
1169
+ end
1170
+ end
1171
+
1040
1172
  end if mri?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: daru
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sameer Deshmukh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-17 00:00:00.000000000 Z
11
+ date: 2016-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: reportbuilder
@@ -192,20 +192,6 @@ dependencies:
192
192
  - - "~>"
193
193
  - !ruby/object:Gem::Version
194
194
  version: '1.16'
195
- - !ruby/object:Gem::Dependency
196
- name: bloomfilter-rb
197
- requirement: !ruby/object:Gem::Requirement
198
- requirements:
199
- - - "~>"
200
- - !ruby/object:Gem::Version
201
- version: '2.1'
202
- type: :development
203
- prerelease: false
204
- version_requirements: !ruby/object:Gem::Requirement
205
- requirements:
206
- - - "~>"
207
- - !ruby/object:Gem::Version
208
- version: '2.1'
209
195
  - !ruby/object:Gem::Dependency
210
196
  name: dbd-sqlite3
211
197
  requirement: !ruby/object:Gem::Requirement
@@ -262,6 +248,20 @@ dependencies:
262
248
  - - ">="
263
249
  - !ruby/object:Gem::Version
264
250
  version: '0'
251
+ - !ruby/object:Gem::Dependency
252
+ name: rubocop
253
+ requirement: !ruby/object:Gem::Requirement
254
+ requirements:
255
+ - - ">="
256
+ - !ruby/object:Gem::Version
257
+ version: 0.40.0
258
+ type: :development
259
+ prerelease: false
260
+ version_requirements: !ruby/object:Gem::Requirement
261
+ requirements:
262
+ - - ">="
263
+ - !ruby/object:Gem::Version
264
+ version: 0.40.0
265
265
  description: |
266
266
  Daru (Data Analysis in RUby) is a library for analysis, manipulation and visualization
267
267
  of data. Daru works seamlessly accross interpreters and leverages interpreter-specific
@@ -277,6 +277,8 @@ extra_rdoc_files: []
277
277
  files:
278
278
  - ".gitignore"
279
279
  - ".rspec"
280
+ - ".rubocop.yml"
281
+ - ".rubocop_todo.yml"
280
282
  - ".travis.yml"
281
283
  - CONTRIBUTING.md
282
284
  - Gemfile
@@ -286,10 +288,13 @@ files:
286
288
  - Rakefile
287
289
  - benchmarks/TradeoffData.csv
288
290
  - benchmarks/dataframe_creation.rb
291
+ - benchmarks/duplicating.rb
289
292
  - benchmarks/group_by.rb
293
+ - benchmarks/joining.rb
290
294
  - benchmarks/row_access.rb
291
295
  - benchmarks/row_assign.rb
292
296
  - benchmarks/sorting.rb
297
+ - benchmarks/statistics.rb
293
298
  - benchmarks/vector_access.rb
294
299
  - benchmarks/vector_assign.rb
295
300
  - benchmarks/where_clause.rb
@@ -368,11 +373,26 @@ homepage: http://github.com/v0dro/daru
368
373
  licenses:
369
374
  - BSD-2
370
375
  metadata: {}
371
- post_install_message: "*************************************************************************\nThank
372
- you for installing daru!\n\n oOOOOOo \n ,| oO\n//| |\n\\\\| |\n `| |\n
373
- \ `-----`\n\n\nHope you love daru! For enhanced interactivity and better visualizations,
374
- \nconsider using gnuplotrb and nyaplot with iruby. For statistics use the \nstatsample
375
- family.\n\nRead the README for interesting use cases and examples.\n\nCheers!\n*************************************************************************\n"
376
+ post_install_message: |
377
+ *************************************************************************
378
+ Thank you for installing daru!
379
+
380
+ oOOOOOo
381
+ ,| oO
382
+ //| |
383
+ \\| |
384
+ `| |
385
+ `-----`
386
+
387
+
388
+ Hope you love daru! For enhanced interactivity and better visualizations,
389
+ consider using gnuplotrb and nyaplot with iruby. For statistics use the
390
+ statsample family.
391
+
392
+ Read the README for interesting use cases and examples.
393
+
394
+ Cheers!
395
+ *************************************************************************
376
396
  rdoc_options: []
377
397
  require_paths:
378
398
  - lib