daru 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +99 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.travis.yml +3 -1
  6. data/CONTRIBUTING.md +5 -1
  7. data/History.md +43 -0
  8. data/README.md +3 -4
  9. data/benchmarks/duplicating.rb +45 -0
  10. data/benchmarks/group_by.rb +7 -7
  11. data/benchmarks/joining.rb +52 -0
  12. data/benchmarks/sorting.rb +9 -2
  13. data/benchmarks/statistics.rb +39 -0
  14. data/daru.gemspec +4 -4
  15. data/lib/daru.rb +9 -9
  16. data/lib/daru/accessors/array_wrapper.rb +15 -11
  17. data/lib/daru/accessors/dataframe_by_row.rb +1 -1
  18. data/lib/daru/accessors/gsl_wrapper.rb +30 -19
  19. data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
  20. data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
  21. data/lib/daru/core/group_by.rb +69 -16
  22. data/lib/daru/core/merge.rb +135 -151
  23. data/lib/daru/core/query.rb +9 -30
  24. data/lib/daru/dataframe.rb +476 -439
  25. data/lib/daru/date_time/index.rb +150 -137
  26. data/lib/daru/date_time/offsets.rb +45 -41
  27. data/lib/daru/extensions/rserve.rb +4 -4
  28. data/lib/daru/index.rb +88 -64
  29. data/lib/daru/io/io.rb +33 -34
  30. data/lib/daru/io/sql_data_source.rb +11 -11
  31. data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
  32. data/lib/daru/maths/arithmetic/vector.rb +9 -14
  33. data/lib/daru/maths/statistics/dataframe.rb +89 -61
  34. data/lib/daru/maths/statistics/vector.rb +226 -97
  35. data/lib/daru/monkeys.rb +23 -30
  36. data/lib/daru/plotting/dataframe.rb +27 -28
  37. data/lib/daru/plotting/vector.rb +12 -13
  38. data/lib/daru/vector.rb +221 -330
  39. data/lib/daru/version.rb +2 -2
  40. data/spec/core/group_by_spec.rb +16 -0
  41. data/spec/core/merge_spec.rb +30 -14
  42. data/spec/dataframe_spec.rb +268 -14
  43. data/spec/index_spec.rb +23 -5
  44. data/spec/io/io_spec.rb +37 -16
  45. data/spec/math/statistics/dataframe_spec.rb +40 -8
  46. data/spec/math/statistics/vector_spec.rb +135 -10
  47. data/spec/monkeys_spec.rb +3 -3
  48. data/spec/vector_spec.rb +157 -25
  49. metadata +41 -21
@@ -3,8 +3,8 @@ require 'spec_helper.rb'
3
3
  describe "Monkeys" do
4
4
  context Array do
5
5
  it "#recode_repeated" do
6
- expect([1,1,'a','a','b',:c,2].recode_repeated).to eq(
7
- ['1_1', '1_2','a_1','a_2','b',:c,2])
6
+ expect([1,'a',1,'a','b',:c,2].recode_repeated).to eq(
7
+ ['1_1','a_1', '1_2','a_2','b',:c,2])
8
8
  end
9
9
  end
10
10
 
@@ -16,4 +16,4 @@ describe "Monkeys" do
16
16
  expect(left.elementwise_division(right)).to eq(Matrix[[1,1,1],[1,1,1],[1,1,1]])
17
17
  end
18
18
  end
19
- end
19
+ end
@@ -83,6 +83,13 @@ describe Daru::Vector do
83
83
  expect(dv.to_a).to eq([1,2,3,4])
84
84
  expect(dv.index.to_a).to eq(['a', 'b', :r, 0])
85
85
  end
86
+
87
+ it "accepts a metadata attribute" do
88
+ dv = Daru::Vector.new [1,2,3,4,5], metadata: { cdc_type: 2 }
89
+
90
+ expect(dv.metadata) .to eq({ cdc_type: 2 })
91
+ end
92
+
86
93
  end
87
94
 
88
95
  context ".new_with_size" do
@@ -122,7 +129,7 @@ describe Daru::Vector do
122
129
  context "#[]" do
123
130
  context Daru::Index do
124
131
  before :each do
125
- @dv = Daru::Vector.new [1,2,3,4,5], name: :yoga,
132
+ @dv = Daru::Vector.new [1,2,3,4,5], name: :yoga, metadata: { cdc_type: 2 },
126
133
  index: [:yoda, :anakin, :obi, :padme, :r2d2], dtype: dtype
127
134
  end
128
135
 
@@ -161,6 +168,15 @@ describe Daru::Vector do
161
168
  expect(v[1]).to eq(3)
162
169
  expect(v[0]).to eq(1)
163
170
  end
171
+
172
+ it "raises exception for invalid index" do
173
+ expect { @dv[:foo] }.to raise_error(IndexError)
174
+ expect { @dv[:obi, :foo] }.to raise_error(IndexError)
175
+ end
176
+
177
+ it "retains the original vector metadata" do
178
+ expect(@dv[:yoda, :anakin].metadata).to eq({ cdc_type: 2 })
179
+ end
164
180
  end
165
181
 
166
182
  context Daru::MultiIndex do
@@ -177,11 +193,12 @@ describe Daru::Vector do
177
193
  [:c,:one,:bar],
178
194
  [:c,:one,:baz],
179
195
  [:c,:two,:foo],
180
- [:c,:two,:bar]
196
+ [:c,:two,:bar],
197
+ [:d,:one,:foo]
181
198
  ]
182
199
  @multi_index = Daru::MultiIndex.from_tuples(@tuples)
183
200
  @vector = Daru::Vector.new(
184
- Array.new(12) { |i| i }, index: @multi_index,
201
+ Array.new(13) { |i| i }, index: @multi_index,
185
202
  dtype: dtype, name: :mi_vector)
186
203
  end
187
204
 
@@ -211,6 +228,12 @@ describe Daru::Vector do
211
228
  dtype: dtype, name: :sub_sub_vector))
212
229
  end
213
230
 
231
+ it "returns sub vector not a single element when passed the partial tuple" do
232
+ mi = Daru::MultiIndex.from_tuples([[:foo]])
233
+ expect(@vector[:d, :one]).to eq(Daru::Vector.new([12], index: mi,
234
+ dtype: dtype, name: :sub_sub_vector))
235
+ end
236
+
214
237
  it "returns a vector with corresponding MultiIndex when specified numeric Range" do
215
238
  mi = Daru::MultiIndex.from_tuples([
216
239
  [:a,:two,:baz],
@@ -224,6 +247,12 @@ describe Daru::Vector do
224
247
  expect(@vector[3..9]).to eq(Daru::Vector.new([3,4,5,6,7,8,9], index: mi,
225
248
  dtype: dtype, name: :slice))
226
249
  end
250
+
251
+ it "raises exception for invalid index" do
252
+ expect { @vector[:foo] }.to raise_error(IndexError)
253
+ expect { @vector[:a, :two, :foo] }.to raise_error(IndexError)
254
+ expect { @vector[:x, :one] }.to raise_error(IndexError)
255
+ end
227
256
  end
228
257
  end
229
258
 
@@ -417,13 +446,13 @@ describe Daru::Vector do
417
446
  end
418
447
  end
419
448
 
420
- context "#to_hash" do
449
+ context "#to_h" do
421
450
  context Daru::Index do
422
451
  it "returns the vector as a hash" do
423
452
  dv = Daru::Vector.new [1,2,3,4,5], name: :a,
424
453
  index: [:one, :two, :three, :four, :five], dtype: dtype
425
454
 
426
- expect(dv.to_hash).to eq({one: 1, two: 2, three: 3, four: 4, five: 5})
455
+ expect(dv.to_h).to eq({one: 1, two: 2, three: 3, four: 4, five: 5})
427
456
  end
428
457
  end
429
458
 
@@ -438,7 +467,7 @@ describe Daru::Vector do
438
467
  # [:b,:two,:bar]
439
468
  # ])
440
469
  # vector = Daru::Vector.new([1,2,3,4], index: mi, dtype: dtype)
441
- # expect(vector.to_hash).to eq({
470
+ # expect(vector.to_h).to eq({
442
471
  # [:a,:two,:bar] => 1,
443
472
  # [:a,:two,:baz] => 2,
444
473
  # [:b,:one,:bar] => 3,
@@ -449,8 +478,15 @@ describe Daru::Vector do
449
478
  end
450
479
 
451
480
  context "#uniq" do
481
+ before do
482
+ @v = Daru::Vector.new [1, 2, 2, 2.0, 3, 3.0], index:[:a, :b, :c, :d, :e, :f], metadata: { cdc_type: 2 }
483
+ end
452
484
  it "keeps only unique values" do
453
- # TODO: fill this in
485
+ expect(@v.uniq).to eq(Daru::Vector.new [1, 2, 2.0, 3, 3.0], index: [:a, :b, :d, :e, :f])
486
+ end
487
+
488
+ it "retains the original vector metadata" do
489
+ expect(@v.uniq.metadata).to eq({ cdc_type: 2 })
454
490
  end
455
491
  end
456
492
 
@@ -467,7 +503,7 @@ describe Daru::Vector do
467
503
  context "#sort" do
468
504
  context Daru::Index do
469
505
  before do
470
- @dv = Daru::Vector.new [33,2,15,332,1], name: :dv, index: [:a, :b, :c, :d, :e]
506
+ @dv = Daru::Vector.new [33,2,15,332,1], name: :dv, index: [:a, :b, :c, :d, :e], metadata: { cdc_type: 2 }
471
507
  end
472
508
 
473
509
  it "sorts the vector with defaults and returns a new vector, preserving indexing" do
@@ -485,11 +521,36 @@ describe Daru::Vector do
485
521
  expect(sorted).to eq(Daru::Vector.new(["My", "Jazz", "Guitar", "My Jazz Guitar"], index: [2,1,3,0]))
486
522
  end
487
523
 
488
- it "places nils near the beginning of the vector" do
524
+ it "places nils near the beginning of the vector when sorting ascendingly" do
489
525
  with_nils = Daru::Vector.new [22,4,nil,111,nil,2]
490
526
 
491
527
  expect(with_nils.sort).to eq(Daru::Vector.new([nil,nil,2,4,22,111], index: [2,4,5,1,0,3]))
492
528
  end if dtype == :array
529
+
530
+ it "places nils near the beginning of the vector when sorting descendingly" do
531
+ with_nils = Daru::Vector.new [22,4,nil,111,nil,2]
532
+
533
+ expect(with_nils.sort(ascending: false)).to eq(
534
+ Daru::Vector.new [nil,nil,111,22,4,2], index: [4,2,3,0,1,5])
535
+ end
536
+
537
+ it "correctly sorts vector in ascending order with non-numeric data and nils" do
538
+ non_numeric = Daru::Vector.new ['a','b', nil, 'aa', '1234', nil]
539
+
540
+ expect(non_numeric.sort(ascending: true)).to eq(
541
+ Daru::Vector.new [nil,nil,'1234','a','aa','b'], index: [2,5,4,0,3,1])
542
+ end
543
+
544
+ it "correctly sorts vector in descending order with non-numeric data and nils" do
545
+ non_numeric = Daru::Vector.new ['a','b', nil, 'aa', '1234', nil]
546
+
547
+ expect(non_numeric.sort(ascending: false)).to eq(
548
+ Daru::Vector.new [nil,nil,'b','aa','a','1234'], index: [5,2,1,3,0,4])
549
+ end
550
+
551
+ it "retains the original vector metadata" do
552
+ expect(@dv.sort.metadata).to eq({ cdc_type: 2 })
553
+ end
493
554
  end
494
555
 
495
556
  context Daru::MultiIndex do
@@ -552,7 +613,7 @@ describe Daru::Vector do
552
613
  index = Daru::DateTimeIndex.date_range(:start => '2012', :periods => 5)
553
614
  @vector.index = index
554
615
 
555
- expect(@vector.index.class).to eq(DateTimeIndex)
616
+ expect(@vector.index.class).to eq(Daru::DateTimeIndex)
556
617
  expect(@vector['2012-1-1']).to eq(1)
557
618
  end
558
619
 
@@ -564,12 +625,50 @@ describe Daru::Vector do
564
625
  end
565
626
 
566
627
  context "#reindex" do
628
+ before do
629
+ @vector = Daru::Vector.new([1,2,3,4,5], metadata: { cdc_type: 2 })
630
+ @index = Daru::Index.new([3,4,1,0,6])
631
+ end
567
632
  it "intelligently reindexes" do
568
- vector = Daru::Vector.new([1,2,3,4,5])
569
- index = Daru::Index.new([3,4,1,0,6])
633
+ expect(@vector.reindex(@index)).to eq(
634
+ Daru::Vector.new([4,5,2,1,nil], index: @index))
635
+ end
636
+ it "retains the original vector metadata" do
637
+ expect(@vector.reindex(@index).metadata).to eq({ cdc_type: 2 })
638
+ end
639
+ end
570
640
 
571
- expect(vector.reindex(index)).to eq(
572
- Daru::Vector.new([4,5,2,1,nil], index: index))
641
+ context "#dup" do
642
+ before do
643
+ @dv = Daru::Vector.new [1,2], name: :yoda, metadata: { cdc_type: 2 }, index: [:happy, :lightsaber]
644
+ end
645
+
646
+ it "copies the original data" do
647
+ expect(@dv.dup.send(:data)).to eq([1,2])
648
+ end
649
+
650
+ it "creates a new data object" do
651
+ expect(@dv.dup.send(:data).object_id).not_to eq(@dv.send(:data).object_id)
652
+ end
653
+
654
+ it "copies the name" do
655
+ expect(@dv.dup.name).to eq(:yoda)
656
+ end
657
+
658
+ it "copies the original vector metadata" do
659
+ expect(@dv.dup.metadata).to eq({ cdc_type: 2 })
660
+ end
661
+
662
+ it "creates a new metadata object" do
663
+ expect(@dv.dup.metadata.object_id).not_to eq(@dv.metadata.object_id)
664
+ end
665
+
666
+ it "copies the original index" do
667
+ expect(@dv.dup.index).to eq(Daru::Index.new([:happy, :lightsaber]))
668
+ end
669
+
670
+ it "creates a new index object" do
671
+ expect(@dv.dup.index.object_id).not_to eq(@dv.index.object_id)
573
672
  end
574
673
  end
575
674
 
@@ -740,9 +839,16 @@ describe Daru::Vector do
740
839
 
741
840
  context "#clone_structure" do
742
841
  context Daru::Index do
842
+ before do
843
+ @vec = Daru::Vector.new([1,2,3,4,5], metadata: { cdc_type: 2 }, index: [:a,:b,:c,:d,:e])
844
+ end
845
+
743
846
  it "clones a vector with its index and fills it with nils" do
744
- vec = Daru::Vector.new([1,2,3,4,5], index: [:a,:b,:c,:d,:e])
745
- expect(vec.clone_structure).to eq(Daru::Vector.new([nil,nil,nil,nil,nil], index: [:a,:b,:c,:d,:e]))
847
+ expect(@vec.clone_structure).to eq(Daru::Vector.new([nil,nil,nil,nil,nil], index: [:a,:b,:c,:d,:e]))
848
+ end
849
+
850
+ it "retains the original vector metadata" do
851
+ expect(@vec.clone_structure.metadata).to eq({ cdc_type: 2 })
746
852
  end
747
853
  end
748
854
 
@@ -864,11 +970,22 @@ describe Daru::Vector do
864
970
  end
865
971
 
866
972
  context "#only_valid" do
867
- it "returns a Vector of only non-nil data" do
868
- vector = Daru::Vector.new [1,2,3,4,nil,3,nil],
869
- index: [:a, :b, :c, :d, :e, :f, :g]
870
- expect(vector.only_valid).to eq(Daru::Vector.new([1,2,3,4,3],
871
- index: [:a, :b, :c, :d, :f]))
973
+ [:array, :gsl].each do |dtype|
974
+ describe dtype do
975
+ before do
976
+ @vector = Daru::Vector.new [1,2,3,4,5,3,5], metadata: { cdc_type: 2 },
977
+ index: [:a, :b, :c, :d, :e, :f, :g], dtype: dtype, missing_values: [3, 5]
978
+ end
979
+
980
+ it "returns a Vector of only non-missing data" do
981
+ expect(@vector.only_valid).to eq(Daru::Vector.new([1,2,4],
982
+ index: [:a, :b, :d], dtype: dtype))
983
+ end
984
+
985
+ it "retains the original vector metadata" do
986
+ expect(@vector.only_valid.metadata).to eq({ cdc_type: 2 })
987
+ end
988
+ end
872
989
  end
873
990
  end
874
991
 
@@ -1022,19 +1139,34 @@ describe Daru::Vector do
1022
1139
  end
1023
1140
 
1024
1141
  context "#lag" do
1142
+ before do
1143
+ @xiu = Daru::Vector.new([17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
1144
+ 16.86, 16.86, 16.56, 16.36, 16.66, 16.77], metadata: { cdc_type: 2 })
1145
+ end
1146
+
1025
1147
  it "lags the vector by specified amount" do
1026
- xiu = Daru::Vector.new([17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,
1027
- 16.86, 16.86, 16.56, 16.36, 16.66, 16.77])
1028
- lag1 = xiu.lag
1148
+ lag1 = @xiu.lag
1029
1149
 
1030
1150
  expect(lag1[lag1.size - 1]).to be_within(0.001).of(16.66)
1031
1151
  expect(lag1[lag1.size - 2]).to be_within(0.001).of(16.36)
1032
1152
 
1033
1153
  #test with different lagging unit
1034
- lag2 = xiu.lag(2)
1154
+ lag2 = @xiu.lag(2)
1035
1155
 
1036
1156
  expect(lag2[lag2.size - 1]).to be_within(0.001).of(16.36)
1037
1157
  expect(lag2[lag2.size - 2]).to be_within(0.001).of(16.56)
1038
1158
  end
1159
+
1160
+ it "retains the original vector metadata" do
1161
+ expect(@xiu.lag(1).metadata).to eq({ cdc_type: 2 })
1162
+ end
1039
1163
  end
1164
+
1165
+ context "#metadata" do
1166
+ it "defaults to an empty hash for metadata" do
1167
+ dv = Daru::Vector.new [1,2,3,4,5]
1168
+ expect(dv.metadata).to eq({})
1169
+ end
1170
+ end
1171
+
1040
1172
  end if mri?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: daru
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sameer Deshmukh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-17 00:00:00.000000000 Z
11
+ date: 2016-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: reportbuilder
@@ -192,20 +192,6 @@ dependencies:
192
192
  - - "~>"
193
193
  - !ruby/object:Gem::Version
194
194
  version: '1.16'
195
- - !ruby/object:Gem::Dependency
196
- name: bloomfilter-rb
197
- requirement: !ruby/object:Gem::Requirement
198
- requirements:
199
- - - "~>"
200
- - !ruby/object:Gem::Version
201
- version: '2.1'
202
- type: :development
203
- prerelease: false
204
- version_requirements: !ruby/object:Gem::Requirement
205
- requirements:
206
- - - "~>"
207
- - !ruby/object:Gem::Version
208
- version: '2.1'
209
195
  - !ruby/object:Gem::Dependency
210
196
  name: dbd-sqlite3
211
197
  requirement: !ruby/object:Gem::Requirement
@@ -262,6 +248,20 @@ dependencies:
262
248
  - - ">="
263
249
  - !ruby/object:Gem::Version
264
250
  version: '0'
251
+ - !ruby/object:Gem::Dependency
252
+ name: rubocop
253
+ requirement: !ruby/object:Gem::Requirement
254
+ requirements:
255
+ - - ">="
256
+ - !ruby/object:Gem::Version
257
+ version: 0.40.0
258
+ type: :development
259
+ prerelease: false
260
+ version_requirements: !ruby/object:Gem::Requirement
261
+ requirements:
262
+ - - ">="
263
+ - !ruby/object:Gem::Version
264
+ version: 0.40.0
265
265
  description: |
266
266
  Daru (Data Analysis in RUby) is a library for analysis, manipulation and visualization
267
267
  of data. Daru works seamlessly accross interpreters and leverages interpreter-specific
@@ -277,6 +277,8 @@ extra_rdoc_files: []
277
277
  files:
278
278
  - ".gitignore"
279
279
  - ".rspec"
280
+ - ".rubocop.yml"
281
+ - ".rubocop_todo.yml"
280
282
  - ".travis.yml"
281
283
  - CONTRIBUTING.md
282
284
  - Gemfile
@@ -286,10 +288,13 @@ files:
286
288
  - Rakefile
287
289
  - benchmarks/TradeoffData.csv
288
290
  - benchmarks/dataframe_creation.rb
291
+ - benchmarks/duplicating.rb
289
292
  - benchmarks/group_by.rb
293
+ - benchmarks/joining.rb
290
294
  - benchmarks/row_access.rb
291
295
  - benchmarks/row_assign.rb
292
296
  - benchmarks/sorting.rb
297
+ - benchmarks/statistics.rb
293
298
  - benchmarks/vector_access.rb
294
299
  - benchmarks/vector_assign.rb
295
300
  - benchmarks/where_clause.rb
@@ -368,11 +373,26 @@ homepage: http://github.com/v0dro/daru
368
373
  licenses:
369
374
  - BSD-2
370
375
  metadata: {}
371
- post_install_message: "*************************************************************************\nThank
372
- you for installing daru!\n\n oOOOOOo \n ,| oO\n//| |\n\\\\| |\n `| |\n
373
- \ `-----`\n\n\nHope you love daru! For enhanced interactivity and better visualizations,
374
- \nconsider using gnuplotrb and nyaplot with iruby. For statistics use the \nstatsample
375
- family.\n\nRead the README for interesting use cases and examples.\n\nCheers!\n*************************************************************************\n"
376
+ post_install_message: |
377
+ *************************************************************************
378
+ Thank you for installing daru!
379
+
380
+ oOOOOOo
381
+ ,| oO
382
+ //| |
383
+ \\| |
384
+ `| |
385
+ `-----`
386
+
387
+
388
+ Hope you love daru! For enhanced interactivity and better visualizations,
389
+ consider using gnuplotrb and nyaplot with iruby. For statistics use the
390
+ statsample family.
391
+
392
+ Read the README for interesting use cases and examples.
393
+
394
+ Cheers!
395
+ *************************************************************************
376
396
  rdoc_options: []
377
397
  require_paths:
378
398
  - lib