daru_lite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -1,9 +1,37 @@
1
+ require 'data_frame/aggregatable_example'
2
+ require 'data_frame/buildable_example'
3
+ require 'data_frame/calculatable_example'
4
+ require 'data_frame/convertible_example'
5
+ require 'data_frame/duplicatable_example'
6
+ require 'data_frame/fetchable_example'
7
+ require 'data_frame/filterable_example'
8
+ require 'data_frame/indexable_example'
9
+ require 'data_frame/iterable_example'
10
+ require 'data_frame/joinable_example'
11
+ require 'data_frame/missable_example'
12
+ require 'data_frame/pivotable_example'
13
+ require 'data_frame/queryable_example'
14
+ require 'data_frame/setable_example'
15
+ require 'data_frame/sortable_example'
16
+
1
17
  describe DaruLite::DataFrame do
2
- before :each do
3
- @data_frame = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
4
- c: [11,22,33,44,55]},
18
+ let(:df) do
19
+ DaruLite::DataFrame.new(
20
+ { b: [11,12,13,14,15], a: [1,2,3,4,5], c: [11,22,33,44,55] },
5
21
  order: [:a, :b, :c],
6
- index: [:one, :two, :three, :four, :five])
22
+ index: [:one, :two, :three, :four, :five]
23
+ )
24
+ end
25
+ let(:df_mi) do
26
+ DaruLite::DataFrame.new(
27
+ [vector_arry1, vector_arry2, vector_arry1, vector_arry2],
28
+ order: order_mi,
29
+ index: multi_index
30
+ )
31
+ end
32
+ let(:vector_arry1) { [11,12,13,14,11,12,13,14,11,12,13,14] }
33
+ let(:vector_arry2) { [1,2,3,4,1,2,3,4,1,2,3,4] }
34
+ let(:multi_index) do
7
35
  tuples = [
8
36
  [:a,:one,:bar],
9
37
  [:a,:one,:baz],
@@ -18,134 +46,36 @@ describe DaruLite::DataFrame do
18
46
  [:c,:two,:foo],
19
47
  [:c,:two,:bar]
20
48
  ]
21
- @multi_index = DaruLite::MultiIndex.from_tuples(tuples)
22
-
23
- @vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14]
24
- @vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4]
25
-
26
- @order_mi = DaruLite::MultiIndex.from_tuples([
27
- [:a,:one,:bar],
28
- [:a,:two,:baz],
29
- [:b,:two,:foo],
30
- [:b,:one,:foo]])
31
-
32
- @df_mi = DaruLite::DataFrame.new([
33
- @vector_arry1,
34
- @vector_arry2,
35
- @vector_arry1,
36
- @vector_arry2], order: @order_mi, index: @multi_index)
49
+ DaruLite::MultiIndex.from_tuples(tuples)
37
50
  end
38
-
39
- context ".rows" do
40
- before do
41
- @rows = [
42
- [1,2,3,4,5],
43
- [1,2,3,4,5],
44
- [1,2,3,4,5],
45
- [1,2,3,4,5]
51
+ let(:order_mi) do
52
+ DaruLite::MultiIndex.from_tuples(
53
+ [
54
+ [:a,:one,:bar],
55
+ [:a,:two,:baz],
56
+ [:b,:two,:foo],
57
+ [:b,:one,:foo]
46
58
  ]
47
- end
48
-
49
- context DaruLite::Index do
50
- it "creates a DataFrame from Array rows" do
51
- df = DaruLite::DataFrame.rows @rows, order: [:a,:b,:c,:d,:e]
52
-
53
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
54
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
55
- expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
56
- end
57
-
58
- it "creates empty dataframe" do
59
- df = DaruLite::DataFrame.rows [], order: [:a, :b, :c]
60
-
61
- expect(df.vectors).to eq(DaruLite::Index.new [:a,:b,:c])
62
- expect(df.index).to be_empty
63
- end
64
-
65
- it "creates a DataFrame from Vector rows" do
66
- rows = @rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
67
-
68
- df = DaruLite::DataFrame.rows rows, order: [:a,:b,:c,:d,:e]
69
-
70
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
71
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
72
- expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
73
- end
74
-
75
- it 'derives index & order from arrays' do
76
- df = DaruLite::DataFrame.rows @rows
77
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
78
- expect(df.vectors) .to eq(DaruLite::Index.new %w[0 1 2 3 4])
79
- end
80
-
81
- it 'derives index & order from vectors' do
82
- rows = @rows.zip(%w[w x y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
83
- df = DaruLite::DataFrame.rows rows
84
- expect(df.index) .to eq(DaruLite::Index.new %w[w x y z])
85
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
86
- end
87
-
88
- it 'behaves, when rows are repeated' do
89
- rows = @rows.zip(%w[w w y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
90
- df = DaruLite::DataFrame.rows rows
91
- expect(df.index) .to eq(DaruLite::Index.new %w[w_1 w_2 y z])
92
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
93
- end
94
-
95
- it 'behaves, when vectors are unnamed' do
96
- rows = @rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
97
- df = DaruLite::DataFrame.rows rows
98
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
99
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
100
- end
101
- end
102
-
103
- context DaruLite::MultiIndex do
104
- it "creates a DataFrame from rows" do
105
- df = DaruLite::DataFrame.rows(
106
- @rows*3, index: @multi_index, order: [:a,:b,:c,:d,:e])
107
-
108
- expect(df.index) .to eq(@multi_index)
109
- expect(df.vectors) .to eq(DaruLite::Index.new([:a,:b,:c,:d,:e]))
110
- expect(df[:a]).to eq(DaruLite::Vector.new([1]*12, index: @multi_index))
111
- end
112
-
113
- it "crates a DataFrame from rows (MultiIndex order)" do
114
- rows = [
115
- [11, 1, 11, 1],
116
- [12, 2, 12, 2],
117
- [13, 3, 13, 3],
118
- [14, 4, 14, 4]
119
- ]
120
- index = DaruLite::MultiIndex.from_tuples([
121
- [:one,:bar],
122
- [:one,:baz],
123
- [:two,:foo],
124
- [:two,:bar]
125
- ])
126
-
127
- df = DaruLite::DataFrame.rows(rows, index: index, order: @order_mi)
128
- expect(df.index) .to eq(index)
129
- expect(df.vectors).to eq(@order_mi)
130
- expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([11,12,13,14],
131
- index: index))
132
- end
133
-
134
- it "creates a DataFrame from Vector rows" do
135
- rows = @rows*3
136
- rows.map! { |r| DaruLite::Vector.new(r, index: @multi_index) }
137
-
138
- df = DaruLite::DataFrame.rows rows, order: @multi_index
139
-
140
- expect(df.index).to eq(DaruLite::Index.new(Array.new(rows.size) { |i| i }))
141
- expect(df.vectors).to eq(@multi_index)
142
- expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new([1]*12))
143
- end
144
- end
145
- end
59
+ )
60
+ end
61
+
62
+ it_behaves_like 'an aggregatable DataFrame'
63
+ it_behaves_like 'a buildable DataFrame'
64
+ it_behaves_like 'a calculatable DataFrame'
65
+ it_behaves_like 'a convertible DataFrame'
66
+ it_behaves_like 'a duplicatable DataFrame'
67
+ it_behaves_like 'a fetchable DataFrame'
68
+ it_behaves_like 'a filterable DataFrame'
69
+ it_behaves_like 'an indexable DataFrame'
70
+ it_behaves_like 'an iterable DataFrame'
71
+ it_behaves_like 'a joinable DataFrame'
72
+ it_behaves_like 'a missable DataFrame'
73
+ it_behaves_like 'a pivotable DataFrame'
74
+ it_behaves_like 'a queryable DataFrame'
75
+ it_behaves_like 'a setable DataFrame'
76
+ it_behaves_like 'a sortable DataFrame'
146
77
 
147
78
  context "#initialize" do
148
-
149
79
  it "initializes an empty DataFrame with no arguments" do
150
80
  df = DaruLite::DataFrame.new
151
81
  expect(df.nrows).to eq(0)
@@ -370,24 +300,24 @@ describe DaruLite::DataFrame do
370
300
 
371
301
  context DaruLite::MultiIndex do
372
302
  it "creates empty DataFrame" do
373
- df = DaruLite::DataFrame.new({}, order: @order_mi)
303
+ df = DaruLite::DataFrame.new({}, order: order_mi)
374
304
 
375
- expect(df.vectors).to eq(@order_mi)
305
+ expect(df.vectors).to eq(order_mi)
376
306
  expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([]))
377
307
  end
378
308
 
379
309
  it "creates from Hash" do
380
310
  df = DaruLite::DataFrame.new({
381
- [:a,:one,:bar] => @vector_arry1,
382
- [:a,:two,:baz] => @vector_arry2,
383
- [:b,:one,:foo] => @vector_arry1,
384
- [:b,:two,:foo] => @vector_arry2
385
- }, order: @order_mi, index: @multi_index)
386
-
387
- expect(df.index) .to eq(@multi_index)
388
- expect(df.vectors) .to eq(@order_mi)
389
- expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new(@vector_arry1,
390
- index: @multi_index))
311
+ [:a,:one,:bar] => vector_arry1,
312
+ [:a,:two,:baz] => vector_arry2,
313
+ [:b,:one,:foo] => vector_arry1,
314
+ [:b,:two,:foo] => vector_arry2
315
+ }, order: order_mi, index: multi_index)
316
+
317
+ expect(df.index) .to eq(multi_index)
318
+ expect(df.vectors) .to eq(order_mi)
319
+ expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new(vector_arry1,
320
+ index: multi_index))
391
321
  end
392
322
 
393
323
  it "creates from Array of Hashes" do
@@ -395,25 +325,25 @@ describe DaruLite::DataFrame do
395
325
  end
396
326
 
397
327
  it "creates from Array of Arrays" do
398
- df = DaruLite::DataFrame.new([@vector_arry1, @vector_arry2, @vector_arry1,
399
- @vector_arry2], index: @multi_index, order: @order_mi)
328
+ df = DaruLite::DataFrame.new([vector_arry1, vector_arry2, vector_arry1,
329
+ vector_arry2], index: multi_index, order: order_mi)
400
330
 
401
- expect(df.index) .to eq(@multi_index)
402
- expect(df.vectors).to eq(@order_mi)
403
- expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new(@vector_arry1,
404
- index: @multi_index))
331
+ expect(df.index) .to eq(multi_index)
332
+ expect(df.vectors).to eq(order_mi)
333
+ expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new(vector_arry1,
334
+ index: multi_index))
405
335
  end
406
336
 
407
337
  it "raises error for order MultiIndex of different size than supplied Array" do
408
338
  expect {
409
- df = DaruLite::DataFrame.new([@vector_arry1, @vector_arry2], order: @order_mi,
410
- index: @multi_index)
339
+ df = DaruLite::DataFrame.new([vector_arry1, vector_arry2], order: order_mi,
340
+ index: multi_index)
411
341
  }.to raise_error
412
342
  end
413
343
 
414
344
  it "aligns MultiIndexes properly" do
415
345
  pending
416
- mi_a = @order_mi
346
+ mi_a = order_mi
417
347
  mi_b = DaruLite::MultiIndex.from_tuples([
418
348
  [:b,:one,:foo],
419
349
  [:a,:one,:bar],
@@ -450,215 +380,6 @@ describe DaruLite::DataFrame do
450
380
  end
451
381
  end
452
382
 
453
- context "#[]" do
454
- context DaruLite::Index do
455
- before :each do
456
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
457
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
458
- index: [:one, :two, :three, :four, :five])
459
- end
460
-
461
- it "returns a Vector" do
462
- expect(@df[:a]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
463
- end
464
-
465
- it "returns a Vector by default" do
466
- expect(@df[:a]).to eq(DaruLite::Vector.new([1,2,3,4,5], name: :a,
467
- index: [:one, :two, :three, :four, :five]))
468
- end
469
-
470
- it "returns a DataFrame" do
471
- temp = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
472
- order: [:a, :b], index: [:one, :two, :three, :four, :five])
473
-
474
- expect(@df[:a, :b]).to eq(temp)
475
- end
476
-
477
- it "accesses vector with Integer index" do
478
- expect(@df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
479
- end
480
-
481
- it "returns a subset of DataFrame when specified range" do
482
- subset = @df[:b..:c]
483
- expect(subset).to eq(DaruLite::DataFrame.new({
484
- b: [11,12,13,14,15],
485
- c: [11,22,33,44,55]
486
- }, index: [:one, :two, :three, :four, :five]))
487
- end
488
-
489
- it 'accepts axis parameter as a last argument' do
490
- expect(@df[:a, :vector]).to eq @df[:a]
491
- expect(@df[:one, :row]).to eq [1, 11, 11].dv(:one, [:a, :b, :c])
492
- end
493
- end
494
-
495
- context DaruLite::MultiIndex do
496
- it "accesses vector with an integer index" do
497
- expect(@df_mi[0]).to eq(
498
- DaruLite::Vector.new(@vector_arry1, index: @multi_index))
499
- end
500
-
501
- it "returns a vector when specifying full tuple" do
502
- expect(@df_mi[:a, :one, :bar]).to eq(
503
- DaruLite::Vector.new(@vector_arry1, index: @multi_index))
504
- end
505
-
506
- it "returns DataFrame when specified first layer of MultiIndex" do
507
- sub_order = DaruLite::MultiIndex.from_tuples([
508
- [:one, :bar],
509
- [:two, :baz]
510
- ])
511
- expect(@df_mi[:a]).to eq(DaruLite::DataFrame.new([
512
- @vector_arry1,
513
- @vector_arry2
514
- ], index: @multi_index, order: sub_order))
515
- end
516
-
517
- it "returns a Vector if the last level of MultiIndex is tracked" do
518
- expect(@df_mi[:a, :one, :bar]).to eq(
519
- DaruLite::Vector.new(@vector_arry1, index: @multi_index))
520
- end
521
- end
522
- end
523
-
524
- context "#[]=" do
525
- context DaruLite::Index do
526
- before :each do
527
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
528
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
529
- index: [:one, :two, :three, :four, :five])
530
- end
531
-
532
- it "assigns directly with the []= operator" do
533
- @data_frame[:a] = [100,200,300,400,500]
534
- expect(@data_frame).to eq(DaruLite::DataFrame.new({
535
- b: [11,12,13,14,15],
536
- a: [100,200,300,400,500],
537
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
538
- index: [:one, :two, :three, :four, :five]))
539
- end
540
-
541
- it "assigns new vector with default length if given just a value" do
542
- @df[:d] = 1.0
543
- expect(@df[:d]).to eq(DaruLite::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
544
- index: [:one, :two, :three, :four, :five], name: :d))
545
- end
546
-
547
- it "updates vector with default length if given just a value" do
548
- @df[:c] = 1.0
549
- expect(@df[:c]).to eq(DaruLite::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
550
- index: [:one, :two, :three, :four, :five], name: :c))
551
- end
552
-
553
- it "appends an Array as a DaruLite::Vector" do
554
- @df[:d] = [69,99,108,85,49]
555
-
556
- expect(@df.d.class).to eq(DaruLite::Vector)
557
- end
558
-
559
- it "appends an arbitrary enumerable as a DaruLite::Vector" do
560
- @df[:d] = Set.new([69,99,108,85,49])
561
-
562
- expect(@df[:d]).to eq(DaruLite::Vector.new([69, 99, 108, 85, 49],
563
- index: [:one, :two, :three, :four, :five], name: :c))
564
- end
565
-
566
- it "replaces an already present vector" do
567
- @df[:a] = [69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five])
568
-
569
- expect(@df.a).to eq([69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five]))
570
- end
571
-
572
- it "appends a new vector to the DataFrame" do
573
- @df[:woo] = [69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five])
574
-
575
- expect(@df.vectors).to eq([:a, :b, :c, :woo].to_index)
576
- end
577
-
578
- it "creates an index for the new vector if not specified" do
579
- @df[:woo] = [69,99,108,85,49]
580
-
581
- expect(@df.woo.index).to eq([:one, :two, :three, :four, :five].to_index)
582
- end
583
-
584
- it "matches index of vector to be inserted with the DataFrame index" do
585
- @df[:shankar] = [69,99,108,85,49].dv(:shankar, [:two, :one, :three, :five, :four])
586
-
587
- expect(@df.shankar).to eq([99,69,108,49,85].dv(:shankar,
588
- [:one, :two, :three, :four, :five]))
589
- end
590
-
591
- it "matches index of vector to be inserted, inserting nils where no match found" do
592
- @df[:shankar] = [1,2,3].dv(:shankar, [:one, :james, :hetfield])
593
-
594
- expect(@df.shankar).to eq([1,nil,nil,nil,nil].dv(:shankar, [:one, :two, :three, :four, :five]))
595
- end
596
-
597
- it "raises error for Array assignment of wrong length" do
598
- expect{
599
- @df[:shiva] = [1,2,3]
600
- }.to raise_error
601
- end
602
-
603
- it "assigns correct name given empty dataframe" do
604
- df_empty = DaruLite::DataFrame.new({})
605
- df_empty[:a] = 1..5
606
- df_empty[:b] = 1..5
607
-
608
- expect(df_empty[:a].name).to equal(:a)
609
- expect(df_empty[:b].name).to equal(:b)
610
- end
611
-
612
- it "appends multiple vectors at a time" do
613
- # TODO
614
- end
615
- end
616
-
617
- context DaruLite::MultiIndex do
618
- it "raises error when incomplete index specified but index is absent" do
619
- expect {
620
- @df_mi[:d] = [100,200,300,400,100,200,300,400,100,200,300,400]
621
- }.to raise_error
622
- end
623
-
624
- it "assigns all sub-indexes when a top level index is specified" do
625
- @df_mi[:a] = [100,200,300,400,100,200,300,400,100,200,300,400]
626
-
627
- expect(@df_mi).to eq(DaruLite::DataFrame.new([
628
- [100,200,300,400,100,200,300,400,100,200,300,400],
629
- [100,200,300,400,100,200,300,400,100,200,300,400],
630
- @vector_arry1,
631
- @vector_arry2], index: @multi_index, order: @order_mi))
632
- end
633
-
634
- it "creates a new vector when full index specfied" do
635
- order = DaruLite::MultiIndex.from_tuples([
636
- [:a,:one,:bar],
637
- [:a,:two,:baz],
638
- [:b,:two,:foo],
639
- [:b,:one,:foo],
640
- [:c,:one,:bar]])
641
- answer = DaruLite::DataFrame.new([
642
- @vector_arry1,
643
- @vector_arry2,
644
- @vector_arry1,
645
- @vector_arry2,
646
- [100,200,300,400,100,200,300,400,100,200,300,400]
647
- ], index: @multi_index, order: order)
648
- @df_mi[:c,:one,:bar] = [100,200,300,400,100,200,300,400,100,200,300,400]
649
-
650
- expect(@df_mi).to eq(answer)
651
- end
652
-
653
- it "assigns correct name given empty dataframe" do
654
- df_empty = DaruLite::DataFrame.new([], index: @multi_index, order: @order_mi)
655
- df_empty[:c, :one, :bar] = 1..12
656
-
657
- expect(df_empty[:c, :one, :bar].name).to eq "conebar"
658
- end
659
- end
660
- end
661
-
662
383
  context '#method_missing' do
663
384
  let(:df) { DaruLite::DataFrame.new({
664
385
  :a => [1, 2, 3, 4, 5],
@@ -720,197 +441,6 @@ describe DaruLite::DataFrame do
720
441
  end
721
442
  end
722
443
 
723
- context '#add_vector' do
724
- subject(:data_frame) {
725
- DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
726
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
727
- index: [:one, :two, :three, :four, :five])
728
- }
729
- before {
730
- data_frame.add_vector :a, [100,200,300,400,500]
731
- }
732
-
733
- it { is_expected.to eq(DaruLite::DataFrame.new({
734
- b: [11,12,13,14,15],
735
- a: [100,200,300,400,500],
736
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
737
- index: [:one, :two, :three, :four, :five]))
738
- }
739
- end
740
-
741
- context "#insert_vector" do
742
- subject(:data_frame) {
743
- DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
744
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
745
- index: [:one, :two, :three, :four, :five])
746
- }
747
-
748
- it "insert a new vector at the desired slot" do
749
- df = DaruLite::DataFrame.new({
750
- a: [1,2,3,4,5],
751
- d: [710, 720, 730, 740, 750],
752
- b: [11, 12, 13, 14, 15],
753
- c: [11,22,33,44,55]}, order: [:a, :d, :b, :c],
754
- index: [:one, :two, :three, :four, :five]
755
- )
756
- data_frame.insert_vector 1, :d, [710, 720, 730, 740, 750]
757
- expect(subject).to eq df
758
- end
759
-
760
- it "raises error for data array being too big" do
761
- expect {
762
- source = (1..8).to_a
763
- data_frame.insert_vector 1, :d, source
764
- }.to raise_error(IndexError)
765
- end
766
-
767
- it "raises error for invalid index value" do
768
- expect {
769
- source = (1..5).to_a
770
- data_frame.insert_vector 4, :d, source
771
- }.to raise_error(ArgumentError)
772
- end
773
-
774
- it "raises error for invalid source type" do
775
- expect {
776
- source = 14
777
- data_frame.insert_vector 3, :d, source
778
- }.to raise_error(ArgumentError)
779
- end
780
- end
781
-
782
- context "#row[]=" do
783
- context DaruLite::Index do
784
- before :each do
785
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
786
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
787
- index: [:one, :two, :three, :four, :five])
788
- end
789
-
790
- it "assigns specified row when Array" do
791
- @df.row[:one] = [49, 99, 59]
792
-
793
- expect(@df.row[:one]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
794
- expect(@df.row[:one].index).to eq([:a, :b, :c].to_index)
795
- expect(@df.row[:one].name) .to eq(:one)
796
- end
797
-
798
- it "assigns specified row when DV" do
799
- @df.row[:one] = [49, 99, 59].dv(nil, [:a, :b, :c])
800
-
801
- expect(@df.row[:one]).to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
802
- end
803
-
804
- it "assigns correct elements when Vector of different index" do
805
- @df.row[:one] = DaruLite::Vector.new([44,62,11], index: [:b,:f,:a])
806
-
807
- expect(@df.row[:one]).to eq(DaruLite::Vector.new([11,44,nil], index: [:a,:b,:c]))
808
- end
809
-
810
- it "creates a new row from an Array" do
811
- @df.row[:patekar] = [9,2,11]
812
-
813
- expect(@df.row[:patekar]).to eq([9,2,11].dv(:patekar, [:a, :b, :c]))
814
- end
815
-
816
- it "creates a new row from a DV" do
817
- @df.row[:patekar] = [9,2,11].dv(nil, [:a, :b, :c])
818
-
819
- expect(@df.row[:patekar]).to eq([9,2,11].dv(:patekar, [:a, :b, :c]))
820
- end
821
-
822
- it "creates a new row from numeric row index and named DV" do
823
- @df.row[2] = [9,2,11].dv(nil, [:a, :b, :c])
824
-
825
- expect(@df.row[2]).to eq([9,2,11].dv(nil, [:a, :b, :c]))
826
- end
827
-
828
- it "correctly aligns assigned DV by index" do
829
- @df.row[:two] = [9,2,11].dv(nil, [:b, :a, :c])
830
-
831
- expect(@df.row[:two]).to eq([2,9,11].dv(:two, [:a, :b, :c]))
832
- end
833
-
834
- it "correctlu aligns assinged DV by index for new rows" do
835
- @df.row[:latest] = DaruLite::Vector.new([2,3,1], index: [:b,:c,:a])
836
-
837
- expect(@df.row[:latest]).to eq(DaruLite::Vector.new([1,2,3], index: [:a,:b,:c]))
838
- end
839
-
840
- it "inserts nils for indexes that dont exist in the DataFrame" do
841
- @df.row[:two] = [49, 99, 59].dv(nil, [:oo, :aah, :gaah])
842
-
843
- expect(@df.row[:two]).to eq([nil,nil,nil].dv(nil, [:a, :b, :c]))
844
- end
845
-
846
- it "correctly inserts row of a different length by matching indexes" do
847
- @df.row[:four] = [5,4,3,2,1,3].dv(nil, [:you, :have, :a, :big, :appetite, :spock])
848
-
849
- expect(@df.row[:four]).to eq([3,nil,nil].dv(:four, [:a, :b, :c]))
850
- end
851
-
852
- it "raises error for row insertion by Array of wrong length" do
853
- expect{
854
- @df.row[:one] = [1,2,3,4,5,6,7]
855
- }.to raise_error
856
- end
857
- end
858
-
859
- context DaruLite::MultiIndex do
860
- pending
861
- # TO DO
862
- end
863
-
864
- context DaruLite::CategoricalIndex do
865
- let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
866
- let(:df) do
867
- DaruLite::DataFrame.new({
868
- a: 'a'..'e',
869
- b: 1..5
870
- }, index: idx)
871
- end
872
-
873
- context "modify exiting row" do
874
- context "single category" do
875
- subject { df }
876
- before { df.row[:a] = ['x', 'y'] }
877
-
878
- it { is_expected.to be_a DaruLite::DataFrame }
879
- its(:index) { is_expected.to eq idx }
880
- its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
881
- its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
882
- its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
883
- end
884
-
885
- context "multiple categories" do
886
- subject { df }
887
- before { df.row[:a, 1] = ['x', 'y'] }
888
-
889
- it { is_expected.to be_a DaruLite::DataFrame }
890
- its(:index) { is_expected.to eq idx }
891
- its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
892
- its(:'a.to_a') { is_expected.to eq ['x', 'x', 'x', 'x', 'e'] }
893
- its(:'b.to_a') { is_expected.to eq ['y', 'y', 'y', 'y', 5] }
894
- end
895
-
896
- context "positional index" do
897
- subject { df }
898
- before { df.row[0, 2] = ['x', 'y'] }
899
-
900
- it { is_expected.to be_a DaruLite::DataFrame }
901
- its(:index) { is_expected.to eq idx }
902
- its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
903
- its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
904
- its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
905
- end
906
- end
907
-
908
- context "add new row" do
909
- # TODO
910
- end
911
- end
912
- end
913
-
914
444
  context "#row.at" do
915
445
  context DaruLite::Index do
916
446
  let(:idx) { DaruLite::Index.new [1, 0, :c] }
@@ -1139,311 +669,21 @@ describe DaruLite::DataFrame do
1139
669
  end
1140
670
  end
1141
671
 
1142
- context "#row.set_at" do
1143
- let(:df) do
1144
- DaruLite::DataFrame.new({
1145
- a: 1..3,
1146
- b: 'a'..'c'
1147
- })
1148
- end
672
+ context "#row[]" do
673
+ context DaruLite::Index do
674
+ before :each do
675
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
676
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
677
+ index: [:one, :two, :three, :four, :five])
678
+ end
1149
679
 
1150
- context "single position" do
1151
- subject { df }
1152
- before { df.row.set_at [1], ['x', 'y'] }
680
+ it "creates an index for assignment if not already specified" do
681
+ @df.row[:one] = [49, 99, 59]
1153
682
 
1154
- its(:size) { is_expected.to eq 3 }
1155
- its(:'a.to_a') { is_expected.to eq [1, 'x', 3] }
1156
- its(:'b.to_a') { is_expected.to eq ['a', 'y', 'c'] }
1157
- end
1158
-
1159
- context "multiple position" do
1160
- subject { df }
1161
- before { df.row.set_at [0, 2], ['x', 'y'] }
1162
-
1163
- its(:size) { is_expected.to eq 3 }
1164
- its(:'a.to_a') { is_expected.to eq ['x', 2, 'x'] }
1165
- its(:'b.to_a') { is_expected.to eq ['y', 'b', 'y'] }
1166
- end
1167
-
1168
- context "invalid position" do
1169
- it { expect { df.row.set_at [3], ['x', 'y'] }.to raise_error IndexError }
1170
- end
1171
-
1172
- context "invalid positions" do
1173
- it { expect { df.row.set_at [2, 3], ['x', 'y'] }.to raise_error IndexError }
1174
- end
1175
-
1176
- context "incorrect size" do
1177
- it { expect { df.row.set_at [1], ['x', 'y', 'z'] }.to raise_error SizeError }
1178
- end
1179
- end
1180
-
1181
- context "#at" do
1182
- context DaruLite::Index do
1183
- let(:idx) { DaruLite::Index.new [:a, :b, :c] }
1184
- let(:df) do
1185
- DaruLite::DataFrame.new({
1186
- 1 => 1..3,
1187
- a: 'a'..'c',
1188
- b: 11..13
1189
- }, index: idx)
1190
- end
1191
-
1192
- context "single position" do
1193
- subject { df.at 1 }
1194
-
1195
- it { is_expected.to be_a DaruLite::Vector }
1196
- its(:size) { is_expected.to eq 3 }
1197
- its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1198
- its(:index) { is_expected.to eq idx }
1199
- end
1200
-
1201
- context "multiple positions" do
1202
- subject { df.at 0, 2 }
1203
-
1204
- it { is_expected.to be_a DaruLite::DataFrame }
1205
- its(:shape) { is_expected.to eq [3, 2] }
1206
- its(:index) { is_expected.to eq idx }
1207
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1208
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1209
- end
1210
-
1211
- context "single invalid position" do
1212
- it { expect { df. at 3 }.to raise_error IndexError }
1213
- end
1214
-
1215
- context "multiple invalid positions" do
1216
- it { expect { df.at 2, 3 }.to raise_error IndexError }
1217
- end
1218
-
1219
- context "range" do
1220
- subject { df.at 0..1 }
1221
-
1222
- it { is_expected.to be_a DaruLite::DataFrame }
1223
- its(:shape) { is_expected.to eq [3, 2] }
1224
- its(:index) { is_expected.to eq idx }
1225
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1226
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1227
- end
1228
-
1229
- context "range with negative end" do
1230
- subject { df.at 0..-2 }
1231
-
1232
- it { is_expected.to be_a DaruLite::DataFrame }
1233
- its(:shape) { is_expected.to eq [3, 2] }
1234
- its(:index) { is_expected.to eq idx }
1235
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1236
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1237
- end
1238
-
1239
- context "range with single element" do
1240
- subject { df.at 1..1 }
1241
-
1242
- it { is_expected.to be_a DaruLite::DataFrame }
1243
- its(:shape) { is_expected.to eq [3, 1] }
1244
- its(:index) { is_expected.to eq idx }
1245
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1246
- end
1247
- end
1248
-
1249
- context DaruLite::MultiIndex do
1250
- let (:idx) do
1251
- DaruLite::MultiIndex.from_tuples [
1252
- [:a,:one,:bar],
1253
- [:a,:one,:baz],
1254
- [:b,:two,:bar],
1255
- ]
1256
- end
1257
- let(:df) do
1258
- DaruLite::DataFrame.new({
1259
- 1 => 1..3,
1260
- a: 'a'..'c',
1261
- b: 11..13
1262
- }, index: idx)
1263
- end
1264
-
1265
- context "single position" do
1266
- subject { df.at 1 }
1267
-
1268
- it { is_expected.to be_a DaruLite::Vector }
1269
- its(:size) { is_expected.to eq 3 }
1270
- its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1271
- its(:index) { is_expected.to eq idx }
1272
- end
1273
-
1274
- context "multiple positions" do
1275
- subject { df.at 0, 2 }
1276
-
1277
- it { is_expected.to be_a DaruLite::DataFrame }
1278
- its(:shape) { is_expected.to eq [3, 2] }
1279
- its(:index) { is_expected.to eq idx }
1280
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1281
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1282
- end
1283
-
1284
- context "single invalid position" do
1285
- it { expect { df. at 3 }.to raise_error IndexError }
1286
- end
1287
-
1288
- context "multiple invalid positions" do
1289
- it { expect { df.at 2, 3 }.to raise_error IndexError }
1290
- end
1291
-
1292
- context "range" do
1293
- subject { df.at 0..1 }
1294
-
1295
- it { is_expected.to be_a DaruLite::DataFrame }
1296
- its(:shape) { is_expected.to eq [3, 2] }
1297
- its(:index) { is_expected.to eq idx }
1298
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1299
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1300
- end
1301
-
1302
- context "range with negative end" do
1303
- subject { df.at 0..-2 }
1304
-
1305
- it { is_expected.to be_a DaruLite::DataFrame }
1306
- its(:shape) { is_expected.to eq [3, 2] }
1307
- its(:index) { is_expected.to eq idx }
1308
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1309
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1310
- end
1311
-
1312
- context "range with single element" do
1313
- subject { df.at 1..1 }
1314
-
1315
- it { is_expected.to be_a DaruLite::DataFrame }
1316
- its(:shape) { is_expected.to eq [3, 1] }
1317
- its(:index) { is_expected.to eq idx }
1318
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1319
- end
1320
- end
1321
-
1322
- context DaruLite::CategoricalIndex do
1323
- let (:idx) { DaruLite::CategoricalIndex.new [:a, 1, 1] }
1324
- let(:df) do
1325
- DaruLite::DataFrame.new({
1326
- 1 => 1..3,
1327
- a: 'a'..'c',
1328
- b: 11..13
1329
- }, index: idx)
1330
- end
1331
-
1332
- context "single position" do
1333
- subject { df.at 1 }
1334
-
1335
- it { is_expected.to be_a DaruLite::Vector }
1336
- its(:size) { is_expected.to eq 3 }
1337
- its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1338
- its(:index) { is_expected.to eq idx }
1339
- end
1340
-
1341
- context "multiple positions" do
1342
- subject { df.at 0, 2 }
1343
-
1344
- it { is_expected.to be_a DaruLite::DataFrame }
1345
- its(:shape) { is_expected.to eq [3, 2] }
1346
- its(:index) { is_expected.to eq idx }
1347
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1348
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1349
- end
1350
-
1351
- context "single invalid position" do
1352
- it { expect { df. at 3 }.to raise_error IndexError }
1353
- end
1354
-
1355
- context "multiple invalid positions" do
1356
- it { expect { df.at 2, 3 }.to raise_error IndexError }
1357
- end
1358
-
1359
- context "range" do
1360
- subject { df.at 0..1 }
1361
-
1362
- it { is_expected.to be_a DaruLite::DataFrame }
1363
- its(:shape) { is_expected.to eq [3, 2] }
1364
- its(:index) { is_expected.to eq idx }
1365
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1366
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1367
- end
1368
-
1369
- context "range with negative index" do
1370
- subject { df.at 0..-2 }
1371
-
1372
- it { is_expected.to be_a DaruLite::DataFrame }
1373
- its(:shape) { is_expected.to eq [3, 2] }
1374
- its(:index) { is_expected.to eq idx }
1375
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1376
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1377
- end
1378
-
1379
- context "range with single element" do
1380
- subject { df.at 1..1 }
1381
-
1382
- it { is_expected.to be_a DaruLite::DataFrame }
1383
- its(:shape) { is_expected.to eq [3, 1] }
1384
- its(:index) { is_expected.to eq idx }
1385
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1386
- end
1387
- end
1388
- end
1389
-
1390
- context "#set_at" do
1391
- let(:df) do
1392
- DaruLite::DataFrame.new({
1393
- 1 => 1..3,
1394
- a: 'a'..'c',
1395
- b: 11..13
1396
- })
1397
- end
1398
-
1399
- context "single position" do
1400
- subject { df }
1401
- before { df.set_at [1], ['x', 'y', 'z'] }
1402
-
1403
- its(:shape) { is_expected.to eq [3, 3] }
1404
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1405
- its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1406
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1407
- end
1408
-
1409
- context "multiple position" do
1410
- subject { df }
1411
- before { df.set_at [1, 2], ['x', 'y', 'z'] }
1412
-
1413
- its(:shape) { is_expected.to eq [3, 3] }
1414
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1415
- its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1416
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1417
- end
1418
-
1419
- context "invalid position" do
1420
- it { expect { df.set_at [3], ['x', 'y', 'z'] }.to raise_error IndexError }
1421
- end
1422
-
1423
- context "invalid positions" do
1424
- it { expect { df.set_at [2, 3], ['x', 'y', 'z'] }.to raise_error IndexError }
1425
- end
1426
-
1427
- context "incorrect size" do
1428
- it { expect { df.set_at [1], ['x', 'y'] }.to raise_error SizeError }
1429
- end
1430
- end
1431
-
1432
- context "#row[]" do
1433
- context DaruLite::Index do
1434
- before :each do
1435
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1436
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
1437
- index: [:one, :two, :three, :four, :five])
1438
- end
1439
-
1440
- it "creates an index for assignment if not already specified" do
1441
- @df.row[:one] = [49, 99, 59]
1442
-
1443
- expect(@df[:one, :row]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
1444
- expect(@df[:one, :row].index).to eq([:a, :b, :c].to_index)
1445
- expect(@df[:one, :row].name) .to eq(:one)
1446
- end
683
+ expect(@df[:one, :row]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
684
+ expect(@df[:one, :row].index).to eq([:a, :b, :c].to_index)
685
+ expect(@df[:one, :row].name) .to eq(:one)
686
+ end
1447
687
 
1448
688
  it "returns a DataFrame when specifying numeric Range" do
1449
689
  expect(@df.row[0..2]).to eq(
@@ -1488,7 +728,7 @@ describe DaruLite::DataFrame do
1488
728
 
1489
729
  context DaruLite::MultiIndex do
1490
730
  it "returns a Vector when specifying integer index" do
1491
- expect(@df_mi.row[0]).to eq(DaruLite::Vector.new([11,1,11,1], index: @order_mi))
731
+ expect(df_mi.row[0]).to eq(DaruLite::Vector.new([11,1,11,1], index: order_mi))
1492
732
  end
1493
733
 
1494
734
  it "returns a DataFrame whecn specifying numeric range" do
@@ -1497,16 +737,16 @@ describe DaruLite::DataFrame do
1497
737
  [:a,:one,:baz]
1498
738
  ])
1499
739
 
1500
- expect(@df_mi.row[0..1]).to eq(DaruLite::DataFrame.new([
740
+ expect(df_mi.row[0..1]).to eq(DaruLite::DataFrame.new([
1501
741
  [11,12],
1502
742
  [1,2],
1503
743
  [11,12],
1504
744
  [1,2]
1505
- ], order: @order_mi, index: sub_index, name: :numeric_range))
745
+ ], order: order_mi, index: sub_index, name: :numeric_range))
1506
746
  end
1507
747
 
1508
748
  it "returns a Vector when specifying complete tuple" do
1509
- expect(@df_mi.row[:c,:two,:foo]).to eq(DaruLite::Vector.new([13,3,13,3], index: @order_mi))
749
+ expect(df_mi.row[:c,:two,:foo]).to eq(DaruLite::Vector.new([13,3,13,3], index: order_mi))
1510
750
  end
1511
751
 
1512
752
  it "returns DataFrame when specifying first layer of MultiIndex" do
@@ -1516,12 +756,12 @@ describe DaruLite::DataFrame do
1516
756
  [:two,:foo],
1517
757
  [:two,:bar]
1518
758
  ])
1519
- expect(@df_mi.row[:c]).to eq(DaruLite::DataFrame.new([
759
+ expect(df_mi.row[:c]).to eq(DaruLite::DataFrame.new([
1520
760
  [11,12,13,14],
1521
761
  [1,2,3,4],
1522
762
  [11,12,13,14],
1523
763
  [1,2,3,4]
1524
- ], index: sub_index, order: @order_mi))
764
+ ], index: sub_index, order: order_mi))
1525
765
  end
1526
766
 
1527
767
  it "returns DataFrame when specifying first and second layer of MultiIndex" do
@@ -1529,12 +769,12 @@ describe DaruLite::DataFrame do
1529
769
  [:bar],
1530
770
  [:baz]
1531
771
  ])
1532
- expect(@df_mi.row[:c,:one]).to eq(DaruLite::DataFrame.new([
772
+ expect(df_mi.row[:c,:one]).to eq(DaruLite::DataFrame.new([
1533
773
  [11,12],
1534
774
  [1,2],
1535
775
  [11,12],
1536
776
  [1,2]
1537
- ], index: sub_index, order: @order_mi))
777
+ ], index: sub_index, order: order_mi))
1538
778
  end
1539
779
  end
1540
780
 
@@ -1596,107 +836,6 @@ describe DaruLite::DataFrame do
1596
836
  end
1597
837
  end
1598
838
 
1599
- context "#add_row" do
1600
- subject(:data_frame) {
1601
- DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1602
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
1603
- index: [:one, :two, :three, :four, :five])
1604
- }
1605
- context 'named' do
1606
- before {
1607
- data_frame.add_row [100,200,300], :six
1608
- }
1609
-
1610
- it { is_expected.to eq(DaruLite::DataFrame.new({
1611
- a: [1,2,3,4,5,100],
1612
- b: [11,12,13,14,15,200],
1613
- c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1614
- index: [:one, :two, :three, :four, :five, :six]))
1615
- }
1616
- end
1617
-
1618
- context 'unnamed' do
1619
- before {
1620
- data_frame.add_row [100,200,300]
1621
- }
1622
-
1623
- it { is_expected.to eq(DaruLite::DataFrame.new({
1624
- a: [1,2,3,4,5,100],
1625
- b: [11,12,13,14,15,200],
1626
- c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1627
- index: [:one, :two, :three, :four, :five, 5]))
1628
- }
1629
- end
1630
-
1631
- context 'with mulitiindex DF' do
1632
- subject(:data_frame) {
1633
- DaruLite::DataFrame.new({b: [11,12,13], a: [1,2,3],
1634
- c: [11,22,33]}, order: [:a, :b, :c],
1635
- index: DaruLite::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four]]))
1636
- }
1637
-
1638
- before { data_frame.add_row [100,200,300], [:two, :five] }
1639
-
1640
- it { is_expected.to eq(DaruLite::DataFrame.new({
1641
- b: [11,12,13,200], a: [1,2,3,100],
1642
- c: [11,22,33,300]}, order: [:a, :b, :c],
1643
- index: DaruLite::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four], [:two, :five]])))
1644
- }
1645
- end
1646
-
1647
- it "allows adding rows after making empty DF by specfying only order" do
1648
- df = DaruLite::DataFrame.new({}, order: [:a, :b, :c])
1649
- df.add_row [1,2,3]
1650
- df.add_row [5,6,7]
1651
-
1652
- expect(df[:a]).to eq(DaruLite::Vector.new([1,5]))
1653
- expect(df[:b]).to eq(DaruLite::Vector.new([2,6]))
1654
- expect(df[:c]).to eq(DaruLite::Vector.new([3,7]))
1655
- expect(df.index).to eq(DaruLite::Index.new([0,1]))
1656
- end
1657
- end
1658
-
1659
- context "#first" do
1660
- it 'works' do
1661
- expect(@data_frame.first(2)).to eq(
1662
- DaruLite::DataFrame.new({b: [11,12], a: [1,2], c: [11,22]},
1663
- order: [:a, :b, :c],
1664
- index: [:one, :two]))
1665
- end
1666
-
1667
- it 'works with too large values' do
1668
- expect(@data_frame.first(200)).to eq(@data_frame)
1669
- end
1670
-
1671
- it 'has synonym' do
1672
- expect(@data_frame.first(2)).to eq(@data_frame.head(2))
1673
- end
1674
-
1675
- it 'works on DateTime indexes' do
1676
- idx = DaruLite::DateTimeIndex.new(['2017-01-01', '2017-02-01', '2017-03-01'])
1677
- df = DaruLite::DataFrame.new({col1: ['a', 'b', 'c']}, index: idx)
1678
- first = DaruLite::DataFrame.new({col1: ['a']}, index: DaruLite::DateTimeIndex.new(['2017-01-01']))
1679
- expect(df.head(1)).to eq(first)
1680
- end
1681
- end
1682
-
1683
- context "#last" do
1684
- it 'works' do
1685
- expect(@data_frame.last(2)).to eq(
1686
- DaruLite::DataFrame.new({b: [14,15], a: [4,5], c: [44,55]},
1687
- order: [:a, :b, :c],
1688
- index: [:four, :five]))
1689
- end
1690
-
1691
- it 'works with too large values' do
1692
- expect(@data_frame.last(200)).to eq(@data_frame)
1693
- end
1694
-
1695
- it 'has synonym' do
1696
- expect(@data_frame.last(2)).to eq(@data_frame.tail(2))
1697
- end
1698
- end
1699
-
1700
839
  context "#==" do
1701
840
  it "compares by vectors, index and values of a DataFrame (ignores name)" do
1702
841
  a = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
@@ -1710,1100 +849,81 @@ describe DaruLite::DataFrame do
1710
849
  end
1711
850
 
1712
851
  context '#rename' do
1713
- subject { @data_frame.rename 'other' }
852
+ subject { df.rename 'other' }
1714
853
 
1715
854
  it { is_expected.to be_a DaruLite::DataFrame }
1716
855
  its(:name) { is_expected.to eq 'other' }
1717
856
  end
1718
857
 
1719
- context "#dup" do
858
+ context "#delete_vector" do
1720
859
  context DaruLite::Index do
1721
- it "dups every data structure inside DataFrame" do
1722
- clo = @data_frame.dup
1723
-
1724
- expect(clo.object_id) .not_to eq(@data_frame.object_id)
1725
- expect(clo.vectors.object_id).not_to eq(@data_frame.vectors.object_id)
1726
- expect(clo.index.object_id) .not_to eq(@data_frame.index.object_id)
860
+ it "deletes the specified vector" do
861
+ df.delete_vector :a
1727
862
 
1728
- @data_frame.each_vector_with_index do |vector, index|
1729
- expect(vector.object_id).not_to eq(clo[index].object_id)
1730
- expect(vector.to_a.object_id).not_to eq(clo[index].to_a.object_id)
1731
- end
863
+ expect(df).to eq(DaruLite::DataFrame.new({b: [11,12,13,14,15],
864
+ c: [11,22,33,44,55]}, order: [:b, :c],
865
+ index: [:one, :two, :three, :four, :five]))
1732
866
  end
1733
867
  end
868
+ end
1734
869
 
1735
- context DaruLite::MultiIndex do
1736
- it "duplicates with multi index" do
1737
- clo = @df_mi.dup
870
+ context "#delete_vectors" do
871
+ context DaruLite::Index do
872
+ it "deletes the specified vectors" do
873
+ df.delete_vectors :a, :b
1738
874
 
1739
- expect(clo) .to eq(@df_mi)
1740
- expect(clo.vectors.object_id).not_to eq(@df_mi.vectors.object_id)
1741
- expect(clo.index.object_id) .not_to eq(@df_mi.index.object_id)
875
+ expect(df).to eq(DaruLite::DataFrame.new({
876
+ c: [11,22,33,44,55]}, order: [:c],
877
+ index: [:one, :two, :three, :four, :five]))
1742
878
  end
1743
879
  end
1744
880
  end
1745
881
 
1746
- context '#reject_values' do
1747
- let(:df) do
1748
- DaruLite::DataFrame.new({
1749
- a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1750
- b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1751
- c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1752
- }, index: 11..18)
1753
- end
1754
- before { df.to_category :b }
1755
-
1756
- context 'remove nils only' do
1757
- subject { df.reject_values nil }
1758
- it { is_expected.to be_a DaruLite::DataFrame }
1759
- its(:'b.type') { is_expected.to eq :category }
1760
- its(:'a.to_a') { is_expected.to eq [1, 2, 7] }
1761
- its(:'b.to_a') { is_expected.to eq [:a, :b, 8] }
1762
- its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 7] }
1763
- its(:'index.to_a') { is_expected.to eq [11, 12, 18] }
1764
- end
1765
-
1766
- context 'remove Float::NAN only' do
1767
- subject { df.reject_values Float::NAN }
1768
- it { is_expected.to be_a DaruLite::DataFrame }
1769
- its(:'b.type') { is_expected.to eq :category }
1770
- its(:'a.to_a') { is_expected.to eq [1, 3, nil, 1, 7] }
1771
- its(:'b.to_a') { is_expected.to eq [:a, nil, 3, 5, 8] }
1772
- its(:'c.to_a') { is_expected.to eq ['a', 3, 5, nil, 7] }
1773
- its(:'index.to_a') { is_expected.to eq [11, 13, 16, 17, 18] }
1774
- end
882
+ context "#delete_row" do
883
+ it "deletes the specified row" do
884
+ df.delete_row :three
1775
885
 
1776
- context 'remove both nil and Float::NAN' do
1777
- subject { df.reject_values nil, Float::NAN }
1778
- it { is_expected.to be_a DaruLite::DataFrame }
1779
- its(:'b.type') { is_expected.to eq :category }
1780
- its(:'a.to_a') { is_expected.to eq [1, 7] }
1781
- its(:'b.to_a') { is_expected.to eq [:a, 8] }
1782
- its(:'c.to_a') { is_expected.to eq ['a', 7] }
1783
- its(:'index.to_a') { is_expected.to eq [11, 18] }
886
+ expect(df).to eq(DaruLite::DataFrame.new({b: [11,12,14,15], a: [1,2,4,5],
887
+ c: [11,22,44,55]}, order: [:a, :b, :c], index: [:one, :two, :four, :five]))
1784
888
  end
889
+ end
1785
890
 
1786
- context 'any other values' do
1787
- subject { df.reject_values 1, 5 }
1788
- it { is_expected.to be_a DaruLite::DataFrame }
1789
- its(:'b.type') { is_expected.to eq :category }
1790
- its(:'a.to_a') { is_expected.to eq [2, 3, nil, Float::NAN, 7] }
1791
- its(:'b.to_a') { is_expected.to eq [:b, nil, Float::NAN, nil, 8] }
1792
- its(:'c.to_a') { is_expected.to eq [Float::NAN, 3, 4, 3, 7] }
1793
- its(:'index.to_a') { is_expected.to eq [12, 13, 14, 15, 18] }
891
+ context "#rename_vectors!" do
892
+ before do
893
+ @df = DaruLite::DataFrame.new({
894
+ a: [1,2,3,4,5],
895
+ b: [11,22,33,44,55],
896
+ c: %w(a b c d e)
897
+ })
1794
898
  end
1795
899
 
1796
- context 'when resultant dataframe has one row' do
1797
- subject { df.reject_values 1, 2, 3, 4, 5, nil, Float::NAN }
1798
- it { is_expected.to be_a DaruLite::DataFrame }
1799
- its(:'b.type') { is_expected.to eq :category }
1800
- its(:'a.to_a') { is_expected.to eq [7] }
1801
- its(:'b.to_a') { is_expected.to eq [8] }
1802
- its(:'c.to_a') { is_expected.to eq [7] }
1803
- its(:'index.to_a') { is_expected.to eq [18] }
900
+ it "returns self as modified dataframe" do
901
+ expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
1804
902
  end
1805
903
 
1806
- context 'when resultant dataframe is empty' do
1807
- subject { df.reject_values 1, 2, 3, 4, 5, 6, 7, nil, Float::NAN }
1808
- it { is_expected.to be_a DaruLite::DataFrame }
1809
- its(:'b.type') { is_expected.to eq :category }
1810
- its(:'a.to_a') { is_expected.to eq [] }
1811
- its(:'b.to_a') { is_expected.to eq [] }
1812
- its(:'c.to_a') { is_expected.to eq [] }
1813
- its(:'index.to_a') { is_expected.to eq [] }
904
+ it "re-uses rename_vectors method" do
905
+ name_map = { :a => :alpha, :c => :gamma }
906
+ expect(@df).to receive(:rename_vectors).with(name_map)
907
+ @df.rename_vectors! name_map
1814
908
  end
1815
909
  end
1816
910
 
1817
- context '#replace_values' do
1818
- subject do
1819
- DaruLite::DataFrame.new({
1820
- a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1821
- b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1822
- c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
911
+ context "#rename_vectors" do
912
+ before do
913
+ @df = DaruLite::DataFrame.new({
914
+ a: [1,2,3,4,5],
915
+ b: [11,22,33,44,55],
916
+ c: %w(a b c d e)
1823
917
  })
1824
918
  end
1825
- before { subject.to_category :b }
1826
919
 
1827
- context 'replace nils only' do
1828
- before { subject.replace_values nil, 10 }
1829
- it { is_expected.to be_a DaruLite::DataFrame }
1830
- its(:'b.type') { is_expected.to eq :category }
1831
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, Float::NAN, 10, 1, 7] }
1832
- its(:'b.to_a') { is_expected.to eq [:a, :b, 10, Float::NAN, 10, 3, 5, 8] }
1833
- its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 5, 10, 7] }
920
+ it "returns DaruLite::Index" do
921
+ expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(DaruLite::Index)
1834
922
  end
1835
923
 
1836
- context 'replace Float::NAN only' do
1837
- before { subject.replace_values Float::NAN, 10 }
1838
- it { is_expected.to be_a DaruLite::DataFrame }
1839
- its(:'b.type') { is_expected.to eq :category }
1840
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, nil, 10, nil, 1, 7] }
1841
- its(:'b.to_a') { is_expected.to eq [:a, :b, nil, 10, nil, 3, 5, 8] }
1842
- its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, nil, 7] }
1843
- end
1844
-
1845
- context 'replace both nil and Float::NAN' do
1846
- before { subject.replace_values [nil, Float::NAN], 10 }
1847
- it { is_expected.to be_a DaruLite::DataFrame }
1848
- its(:'b.type') { is_expected.to eq :category }
1849
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, 10, 10, 1, 7] }
1850
- its(:'b.to_a') { is_expected.to eq [:a, :b, 10, 10, 10, 3, 5, 8] }
1851
- its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, 10, 7] }
1852
- end
1853
-
1854
- context 'replace other values' do
1855
- before { subject.replace_values [1, 5], 10 }
1856
- it { is_expected.to be_a DaruLite::DataFrame }
1857
- its(:'b.type') { is_expected.to eq :category }
1858
- its(:'a.to_a') { is_expected.to eq [10, 2, 3, nil, Float::NAN, nil, 10, 7] }
1859
- its(:'b.to_a') { is_expected.to eq [:a, :b, nil, Float::NAN, nil, 3, 10, 8] }
1860
- its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 10, nil, 7] }
1861
- end
1862
- end
1863
-
1864
- describe 'uniq' do
1865
- let(:df) do
1866
- DaruLite::DataFrame.from_csv 'spec/fixtures/duplicates.csv'
1867
- end
1868
-
1869
- context 'with no args' do
1870
- it do
1871
- result = df.uniq
1872
- expect(result.shape.first).to eq 30
1873
- end
1874
- end
1875
-
1876
- context 'given a vector' do
1877
- it do
1878
- result = df.uniq("color")
1879
- expect(result.shape.first).to eq 2
1880
- end
1881
- end
1882
-
1883
- context 'given an array of vectors' do
1884
- it do
1885
- result = df.uniq("color", "director_name")
1886
- expect(result.shape.first).to eq 29
1887
- end
1888
- end
1889
- end
1890
-
1891
- context '#rolling_fillna!' do
1892
- subject do
1893
- DaruLite::DataFrame.new({
1894
- a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1895
- b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
1896
- c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1897
- })
1898
- end
1899
-
1900
- context 'rolling_fillna! forwards' do
1901
- before { subject.rolling_fillna!(:forward) }
1902
- it { expect(subject.rolling_fillna!(:forward)).to eq(subject) }
1903
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
1904
- its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
1905
- its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
1906
- end
1907
-
1908
- context 'rolling_fillna! backwards' do
1909
- before { subject.rolling_fillna!(:backward) }
1910
- it { expect(subject.rolling_fillna!(:backward)).to eq(subject) }
1911
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
1912
- its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
1913
- its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
1914
- end
1915
- end
1916
-
1917
- context "#clone" do
1918
- it "returns a view of the whole dataframe" do
1919
- cloned = @data_frame.clone
1920
- expect(@data_frame.object_id).to_not eq(cloned.object_id)
1921
- expect(@data_frame[:a].object_id).to eq(cloned[:a].object_id)
1922
- expect(@data_frame[:b].object_id).to eq(cloned[:b].object_id)
1923
- expect(@data_frame[:c].object_id).to eq(cloned[:c].object_id)
1924
- end
1925
-
1926
- it "returns a view of selected vectors" do
1927
- cloned = @data_frame.clone(:a, :b)
1928
- expect(cloned.object_id).to_not eq(@data_frame.object_id)
1929
- expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
1930
- expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
1931
- end
1932
-
1933
- it "clones properly when supplied array" do
1934
- cloned = @data_frame.clone([:a, :b])
1935
- expect(cloned.object_id).to_not eq(@data_frame.object_id)
1936
- expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
1937
- expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
1938
- end
1939
-
1940
- it "original dataframe remains unaffected when operations are applied
1941
- on cloned data frame" do
1942
- original = @data_frame.dup
1943
- cloned = @data_frame.clone
1944
- cloned.delete_vector :a
1945
-
1946
- expect(@data_frame).to eq(original)
1947
- end
1948
-
1949
- end
1950
-
1951
- context "#clone_only_valid" do
1952
- let(:df_with_missing) {
1953
- DaruLite::DataFrame.new({
1954
- a: [1 , 2, 3, nil, 4, nil, 5],
1955
- b: [nil, 2, 3, nil, 4, nil, 5],
1956
- c: [1, 2, 3, 43 , 4, nil, 5]
1957
- })
1958
- }
1959
-
1960
- let(:df_without_missing) {
1961
- DaruLite::DataFrame.new({
1962
- a: [2,3,4,5],
1963
- c: [2,3,4,5]
1964
- })
1965
- }
1966
- it 'does the most reasonable thing' do
1967
- expect(df_with_missing.clone_only_valid).to eq(df_with_missing.reject_values(*DaruLite::MISSING_VALUES))
1968
- expect(df_without_missing.clone_only_valid).to eq(df_without_missing.clone)
1969
- end
1970
- end
1971
-
1972
- context "#clone_structure" do
1973
- it "clones only the index and vector structures of the data frame" do
1974
- cs = @data_frame.clone_structure
1975
-
1976
- expect(cs.vectors).to eq(@data_frame.vectors)
1977
- expect(cs.index).to eq(@data_frame.index)
1978
- expect(cs[:a]).to eq(DaruLite::Vector.new([nil] * cs[:a].size, index: @data_frame.index))
1979
- end
1980
- end
1981
-
1982
- context "#each_index" do
1983
- it "iterates over index" do
1984
- idxs = []
1985
- ret = @data_frame.each_index do |index|
1986
- idxs << index
1987
- end
1988
-
1989
- expect(idxs).to eq([:one, :two, :three, :four, :five])
1990
-
1991
- expect(ret).to eq(@data_frame)
1992
- end
1993
- end
1994
-
1995
- context "#each_vector_with_index" do
1996
- it "iterates over vectors with index" do
1997
- idxs = []
1998
- ret = @data_frame.each_vector_with_index do |vector, index|
1999
- idxs << index
2000
- expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
2001
- expect(vector.class).to eq(DaruLite::Vector)
2002
- end
2003
-
2004
- expect(idxs).to eq([:a, :b, :c])
2005
-
2006
- expect(ret).to eq(@data_frame)
2007
- end
2008
- end
2009
-
2010
- context "#each_row_with_index" do
2011
- it "iterates over rows with indexes" do
2012
- idxs = []
2013
- ret = @data_frame.each_row_with_index do |row, idx|
2014
- idxs << idx
2015
- expect(row.index).to eq([:a, :b, :c].to_index)
2016
- expect(row.class).to eq(DaruLite::Vector)
2017
- end
2018
-
2019
- expect(idxs).to eq([:one, :two, :three, :four, :five])
2020
- expect(ret) .to eq(@data_frame)
2021
- end
2022
- end
2023
-
2024
- context "#each" do
2025
- it "iterates over rows" do
2026
- ret = @data_frame.each(:row) do |row|
2027
- expect(row.index).to eq([:a, :b, :c].to_index)
2028
- expect(row.class).to eq(DaruLite::Vector)
2029
- end
2030
-
2031
- expect(ret).to eq(@data_frame)
2032
- end
2033
-
2034
- it "iterates over all vectors" do
2035
- ret = @data_frame.each do |vector|
2036
- expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
2037
- expect(vector.class).to eq(DaruLite::Vector)
2038
- end
2039
-
2040
- expect(ret).to eq(@data_frame)
2041
- end
2042
-
2043
- it "returns Enumerable if no block specified" do
2044
- ret = @data_frame.each
2045
- expect(ret.is_a?(Enumerator)).to eq(true)
2046
- end
2047
-
2048
- it "raises on unknown axis" do
2049
- expect { @data_frame.each(:kitten) }.to raise_error(ArgumentError, /axis/)
2050
- end
2051
- end
2052
-
2053
- context "#recode" do
2054
- before do
2055
- @ans_vector = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2056
- c: [21,32,43,54,65]}, order: [:a, :b, :c],
2057
- index: [:one, :two, :three, :four, :five])
2058
-
2059
- @ans_rows = DaruLite::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
2060
- c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
2061
- index: [:one, :two, :three, :four, :five])
2062
-
2063
- @data_frame_date_time = @data_frame.dup
2064
- @data_frame_date_time.index = DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
2065
-
2066
- @ans_vector_date_time = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2067
- c: [21,32,43,54,65]}, order: [:a, :b, :c],
2068
- index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
2069
-
2070
- @ans_rows_date_time = DaruLite::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
2071
- c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
2072
- index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
2073
- end
2074
-
2075
- it "maps over the vectors of a DataFrame and returns a DataFrame" do
2076
- ret = @data_frame.recode do |vector|
2077
- vector.map! { |e| e += 10}
2078
- end
2079
-
2080
- expect(ret).to eq(@ans_vector)
2081
- end
2082
-
2083
- it "maps over the rows of a DataFrame and returns a DataFrame" do
2084
- ret = @data_frame.recode(:row) do |row|
2085
- expect(row.class).to eq(DaruLite::Vector)
2086
- row.map! { |e| e*e }
2087
- end
2088
-
2089
- expect(ret).to eq(@ans_rows)
2090
- end
2091
-
2092
- it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
2093
- ret = @data_frame_date_time.recode do |vector|
2094
- vector.map! { |e| e += 10}
2095
- end
2096
-
2097
- expect(ret).to eq(@ans_vector_date_time)
2098
- end
2099
-
2100
- it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
2101
- ret = @data_frame_date_time.recode(:row) do |row|
2102
- expect(row.class).to eq(DaruLite::Vector)
2103
- row.map! { |e| e*e }
2104
- end
2105
-
2106
- expect(ret).to eq(@ans_rows_date_time)
2107
- end
2108
-
2109
- end
2110
-
2111
- context "#collect" do
2112
- before do
2113
- @df = DaruLite::DataFrame.new({
2114
- a: [1,2,3,4,5],
2115
- b: [11,22,33,44,55],
2116
- c: [1,2,3,4,5]
2117
- })
2118
- end
2119
-
2120
- it "collects calculation over rows and returns a Vector from the results" do
2121
- expect(@df.collect(:row) { |row| (row[:a] + row[:c]) * row[:c] }).to eq(
2122
- DaruLite::Vector.new([2,8,18,32,50])
2123
- )
2124
- end
2125
-
2126
- it "collects calculation over vectors and returns a Vector from the results" do
2127
- expect(@df.collect { |v| v[0] * v[1] + v[4] }).to eq(
2128
- DaruLite::Vector.new([7,297,7], index: [:a, :b, :c])
2129
- )
2130
- end
2131
- end
2132
-
2133
- context "#map" do
2134
- it "iterates over rows and returns an Array" do
2135
- ret = @data_frame.map(:row) do |row|
2136
- expect(row.class).to eq(DaruLite::Vector)
2137
- row[:a] * row[:c]
2138
- end
2139
-
2140
- expect(ret).to eq([11, 44, 99, 176, 275])
2141
- expect(@data_frame.vectors.to_a).to eq([:a, :b, :c])
2142
- end
2143
-
2144
- it "iterates over vectors and returns an Array" do
2145
- ret = @data_frame.map do |vector|
2146
- vector.mean
2147
- end
2148
- expect(ret).to eq([3.0, 13.0, 33.0])
2149
- end
2150
- end
2151
-
2152
- context "#map!" do
2153
- before do
2154
- @ans_vector = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2155
- c: [21,32,43,54,65]}, order: [:a, :b, :c],
2156
- index: [:one, :two, :three, :four, :five])
2157
-
2158
- @ans_row = DaruLite::DataFrame.new({b: [12,13,14,15,16], a: [2,3,4,5,6],
2159
- c: [12,23,34,45,56]}, order: [:a, :b, :c],
2160
- index: [:one, :two, :three, :four, :five])
2161
- end
2162
-
2163
- it "destructively maps over the vectors and changes the DF" do
2164
- @data_frame.map! do |vector|
2165
- vector + 10
2166
- end
2167
- expect(@data_frame).to eq(@ans_vector)
2168
- end
2169
-
2170
- it "destructively maps over the rows and changes the DF" do
2171
- @data_frame.map!(:row) do |row|
2172
- row + 1
2173
- end
2174
-
2175
- expect(@data_frame).to eq(@ans_row)
2176
- end
2177
- end
2178
-
2179
- context "#map_vectors_with_index" do
2180
- it "iterates over vectors with index and returns an Array" do
2181
- idx = []
2182
- ret = @data_frame.map_vectors_with_index do |vector, index|
2183
- idx << index
2184
- vector.recode { |e| e += 10}
2185
- end
2186
-
2187
- expect(ret).to eq([
2188
- DaruLite::Vector.new([11,12,13,14,15],index: [:one, :two, :three, :four, :five]),
2189
- DaruLite::Vector.new([21,22,23,24,25],index: [:one, :two, :three, :four, :five]),
2190
- DaruLite::Vector.new([21,32,43,54,65],index: [:one, :two, :three, :four, :five])])
2191
- expect(idx).to eq([:a, :b, :c])
2192
- end
2193
- end
2194
-
2195
- # FIXME: collect_VECTORS_with_index, but map_VECTOR_with_index -- ??? -- zverok
2196
- # (Not saying about unfortunate difference between them...)
2197
- context "#collect_vector_with_index" do
2198
- it "iterates over vectors with index and returns an Array" do
2199
- idx = []
2200
- ret = @data_frame.collect_vector_with_index do |vector, index|
2201
- idx << index
2202
- vector.sum
2203
- end
2204
-
2205
- expect(ret).to eq(DaruLite::Vector.new([15, 65, 165], index: [:a, :b, :c]))
2206
- expect(idx).to eq([:a, :b, :c])
2207
- end
2208
- end
2209
-
2210
- context "#map_rows_with_index" do
2211
- it "iterates over rows with index and returns an Array" do
2212
- idx = []
2213
- ret = @data_frame.map_rows_with_index do |row, index|
2214
- idx << index
2215
- expect(row.class).to eq(DaruLite::Vector)
2216
- row[:a] * row[:c]
2217
- end
2218
-
2219
- expect(ret).to eq([11, 44, 99, 176, 275])
2220
- expect(idx).to eq([:one, :two, :three, :four, :five])
2221
- end
2222
- end
2223
-
2224
- context '#collect_row_with_index' do
2225
- it "iterates over rows with index and returns a Vector" do
2226
- idx = []
2227
- ret = @data_frame.collect_row_with_index do |row, index|
2228
- idx << index
2229
- expect(row.class).to eq(DaruLite::Vector)
2230
- row[:a] * row[:c]
2231
- end
2232
-
2233
- expected = DaruLite::Vector.new([11, 44, 99, 176, 275], index: @data_frame.index)
2234
- expect(ret).to eq(expected)
2235
- expect(idx).to eq([:one, :two, :three, :four, :five])
2236
- end
2237
- end
2238
-
2239
- context "#delete_vector" do
2240
- context DaruLite::Index do
2241
- it "deletes the specified vector" do
2242
- @data_frame.delete_vector :a
2243
-
2244
- expect(@data_frame).to eq(DaruLite::DataFrame.new({b: [11,12,13,14,15],
2245
- c: [11,22,33,44,55]}, order: [:b, :c],
2246
- index: [:one, :two, :three, :four, :five]))
2247
- end
2248
- end
2249
- end
2250
-
2251
- context "#delete_vectors" do
2252
- context DaruLite::Index do
2253
- it "deletes the specified vectors" do
2254
- @data_frame.delete_vectors :a, :b
2255
-
2256
- expect(@data_frame).to eq(DaruLite::DataFrame.new({
2257
- c: [11,22,33,44,55]}, order: [:c],
2258
- index: [:one, :two, :three, :four, :five]))
2259
- end
2260
- end
2261
- end
2262
-
2263
- context "#delete_row" do
2264
- it "deletes the specified row" do
2265
- @data_frame.delete_row :three
2266
-
2267
- expect(@data_frame).to eq(DaruLite::DataFrame.new({b: [11,12,14,15], a: [1,2,4,5],
2268
- c: [11,22,44,55]}, order: [:a, :b, :c], index: [:one, :two, :four, :five]))
2269
- end
2270
- end
2271
-
2272
- context "#keep_row_if" do
2273
- pending "changing row from under the iterator trips this"
2274
- it "keeps row if block evaluates to true" do
2275
- df = DaruLite::DataFrame.new({b: [10,12,20,23,30], a: [50,30,30,1,5],
2276
- c: [10,20,30,40,50]}, order: [:a, :b, :c],
2277
- index: [:one, :two, :three, :four, :five])
2278
-
2279
- df.keep_row_if do |row|
2280
- row[:a] % 10 == 0
2281
- end
2282
- # TODO: write expectation
2283
- end
2284
- end
2285
-
2286
- context "#keep_vector_if" do
2287
- it "keeps vector if block evaluates to true" do
2288
- @data_frame.keep_vector_if do |vector|
2289
- vector == [1,2,3,4,5].dv(nil, [:one, :two, :three, :four, :five])
2290
- end
2291
-
2292
- expect(@data_frame).to eq(DaruLite::DataFrame.new({a: [1,2,3,4,5]}, order: [:a],
2293
- index: [:one, :two, :three, :four, :five]))
2294
- end
2295
- end
2296
-
2297
- context "#filter_field" do
2298
- before do
2299
- @df = DaruLite::DataFrame.new({
2300
- :id => DaruLite::Vector.new([1, 2, 3, 4, 5]),
2301
- :name => DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
2302
- :age => DaruLite::Vector.new([20, 23, 25, 27, 5]),
2303
- :city => DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
2304
- :a1 => DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
2305
- order: [:id, :name, :age, :city, :a1])
2306
- end
2307
-
2308
- it "creates new vector with the data of a given field for which block returns true" do
2309
- filtered = @df.filter_vector(:id) { |c| c[:id] == 2 or c[:id] == 4 }
2310
- expect(filtered).to eq(DaruLite::Vector.new([2,4]))
2311
- end
2312
- end
2313
-
2314
- context "#filter_rows" do
2315
- context DaruLite::Index do
2316
- context "when specified no index" do
2317
- it "filters rows" do
2318
- df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]})
2319
-
2320
- a = df.filter_rows do |row|
2321
- row[:a] % 2 == 0
2322
- end
2323
-
2324
- expect(a).to eq(DaruLite::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [1]))
2325
- end
2326
- end
2327
-
2328
- context "when specified numerical index" do
2329
- it "filters rows" do
2330
- df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]}, index: [1,2,3])
2331
-
2332
- a = df.filter_rows do |row|
2333
- row[:a] % 2 == 0
2334
- end
2335
-
2336
- expect(a).to eq(DaruLite::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [2]))
2337
- end
2338
- end
2339
-
2340
- it "preserves names of vectors" do
2341
- df = DaruLite::DataFrame.new a: 1..3, b: 4..6
2342
- df1 = df.filter_rows { |r| r[:a] != 2 }
2343
-
2344
- expect(df1[:a].name).to eq(df[:a].name)
2345
- end
2346
- end
2347
- end
2348
-
2349
- context "#filter_vectors" do
2350
- context DaruLite::Index do
2351
- it "filters vectors" do
2352
- df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]})
2353
-
2354
- a = df.filter_vectors do |vector|
2355
- vector[0] == 1
2356
- end
2357
-
2358
- expect(a).to eq(DaruLite::DataFrame.new({a: [1,2,3]}))
2359
- end
2360
- end
2361
- end
2362
-
2363
- context "#filter" do
2364
- let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]}) }
2365
- it "dispatches" do
2366
- expect(df.filter(:row){|r| r[:a] % 2 == 0 }).to \
2367
- eq df.filter_rows{|r| r[:a] % 2 == 0 }
2368
-
2369
- expect(df.filter(:vector){|v| v[0] == 1}).to \
2370
- eq df.filter_vectors{|v| v[0] == 1}
2371
-
2372
- expect { df.filter(:kitten){} }.to raise_error ArgumentError, /axis/
2373
- end
2374
- end
2375
-
2376
- context "#to_a" do
2377
- context DaruLite::Index do
2378
- it "converts DataFrame into array of hashes" do
2379
- arry = @data_frame.to_a
2380
-
2381
- expect(arry).to eq(
2382
- [
2383
- [
2384
- {a: 1, b: 11, c: 11},
2385
- {a: 2, b: 12, c: 22},
2386
- {a: 3, b: 13, c: 33},
2387
- {a: 4, b: 14, c: 44},
2388
- {a: 5, b: 15, c: 55}
2389
- ],
2390
- [
2391
- :one, :two, :three, :four, :five
2392
- ]
2393
- ])
2394
- end
2395
- end
2396
-
2397
- context DaruLite::MultiIndex do
2398
- pending
2399
- end
2400
- end
2401
-
2402
- context "#to_h" do
2403
- it "converts to a hash" do
2404
- expect(@data_frame.to_h).to eq(
2405
- {
2406
- a: DaruLite::Vector.new([1,2,3,4,5],
2407
- index: [:one, :two, :three, :four, :five]),
2408
- b: DaruLite::Vector.new([11,12,13,14,15],
2409
- index: [:one, :two, :three, :four, :five]),
2410
- c: DaruLite::Vector.new([11,22,33,44,55],
2411
- index: [:one, :two, :three, :four, :five])
2412
- }
2413
- )
2414
- end
2415
- end
2416
-
2417
- context "#sort" do
2418
- context DaruLite::Index do
2419
- before :each do
2420
- @df = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
2421
- end
2422
-
2423
- it "sorts according to given vector order (bang)" do
2424
- a_sorter = lambda { |a| a }
2425
- ans = @df.sort([:a], by: { a: a_sorter })
2426
-
2427
- expect(ans).to eq(
2428
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3], c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']},
2429
- index: [2,1,0,4,5,3])
2430
- )
2431
- expect(ans).to_not eq(@df)
2432
- end
2433
-
2434
- it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
2435
- ans = @df.sort([:a, :b])
2436
- expect(ans).to eq(
2437
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
2438
- index: [2,1,0,5,4,3])
2439
- )
2440
- expect(ans).to_not eq(@df)
2441
- end
2442
- end
2443
-
2444
- context DaruLite::MultiIndex do
2445
- pending
2446
- end
2447
-
2448
- context DaruLite::CategoricalIndex do
2449
- let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
2450
- let(:df) do
2451
- DaruLite::DataFrame.new({
2452
- a: [2, -1, 3, 4, 5],
2453
- b: ['x', 'y', 'x', 'a', 'y'],
2454
- c: [nil, nil, -2, 2, 1]
2455
- }, index: idx)
2456
- end
2457
-
2458
- context "ascending order" do
2459
- context "single vector" do
2460
- subject { df.sort [:a] }
2461
-
2462
- its(:'index.to_a') { is_expected.to eq [1, :a, :a, 1, :c] }
2463
- its(:'a.to_a') { is_expected.to eq [-1, 2, 3, 4, 5] }
2464
- its(:'b.to_a') { is_expected.to eq ['y', 'x', 'x', 'a', 'y'] }
2465
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2466
- end
2467
-
2468
- context "multiple vectors" do
2469
- subject { df.sort [:c, :b] }
2470
-
2471
- its(:'index.to_a') { is_expected.to eq [:a, 1, :a, :c, 1] }
2472
- its(:'a.to_a') { is_expected.to eq [2, -1, 3, 5, 4] }
2473
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'y', 'a'] }
2474
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 1, 2] }
2475
- end
2476
-
2477
- context "block" do
2478
- context "automatic handle nils" do
2479
- subject do
2480
- df.sort [:c], by: {c: lambda { |a| a.abs } }, handle_nils: true
2481
- end
2482
-
2483
- its(:'index.to_a') { is_expected.to eq [:a, 1, :c, :a, 1] }
2484
- its(:'a.to_a') { is_expected.to eq [2, -1, 5, 3, 4] }
2485
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'y', 'x', 'a'] }
2486
- its(:'c.to_a') { is_expected.to eq [nil, nil, 1, -2, 2] }
2487
- end
2488
-
2489
- context "manually handle nils" do
2490
- subject do
2491
- df.sort [:c], by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } }
2492
- end
2493
-
2494
- its(:'index.to_a') { is_expected.to eq [:c, :a, 1, :a, 1] }
2495
- its(:'a.to_a') { is_expected.to eq [5, 3, 4, 2, -1] }
2496
- its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'x', 'y'] }
2497
- its(:'c.to_a') { is_expected.to eq [1, -2, 2, nil, nil] }
2498
- end
2499
- end
2500
- end
2501
-
2502
- context "descending order" do
2503
- context "single vector" do
2504
- subject { df.sort [:a], ascending: false }
2505
-
2506
- its(:'index.to_a') { is_expected.to eq [:c, 1, :a, :a, 1] }
2507
- its(:'a.to_a') { is_expected.to eq [5, 4, 3, 2, -1] }
2508
- its(:'b.to_a') { is_expected.to eq ['y', 'a', 'x', 'x', 'y'] }
2509
- its(:'c.to_a') { is_expected.to eq [1, 2, -2, nil, nil] }
2510
- end
2511
-
2512
- context "multiple vectors" do
2513
- subject { df.sort [:c, :b], ascending: false }
2514
-
2515
- its(:'index.to_a') { is_expected.to eq [1, :a, 1, :c, :a] }
2516
- its(:'a.to_a') { is_expected.to eq [-1, 2, 4, 5, 3] }
2517
- its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'y', 'x'] }
2518
- its(:'c.to_a') { is_expected.to eq [nil, nil, 2, 1, -2] }
2519
- end
2520
-
2521
- context "block" do
2522
- context "automatic handle nils" do
2523
- subject do
2524
- df.sort [:c],
2525
- by: {c: lambda { |a| a.abs } },
2526
- handle_nils: true,
2527
- ascending: false
2528
- end
2529
-
2530
- its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2531
- its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2532
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2533
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2534
- end
2535
-
2536
- context "manually handle nils" do
2537
- subject do
2538
- df.sort [:c],
2539
- by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } },
2540
- ascending: false
2541
- end
2542
-
2543
- its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2544
- its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2545
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2546
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2547
- end
2548
- end
2549
- end
2550
- end
2551
- end
2552
-
2553
- context "#sort!" do
2554
- context DaruLite::Index do
2555
- before :each do
2556
- @df = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1],
2557
- c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
2558
- end
2559
-
2560
- it "sorts according to given vector order (bang)" do
2561
- a_sorter = lambda { |a| a }
2562
-
2563
- expect(@df.sort!([:a], by: { a: a_sorter })).to eq(
2564
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3],
2565
- c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']}, index: [2,1,0,4,5,3])
2566
- )
2567
- end
2568
-
2569
- it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
2570
- expect(@df.sort!([:a, :b])).to eq(
2571
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
2572
- index: [2,1,0,5,4,3])
2573
- )
2574
- end
2575
-
2576
- it "sorts both vectors in descending order" do
2577
- expect(@df.sort!([:a,:b], ascending: [false, false])).to eq(
2578
- DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,9,1,-2,-1,5], c: ['aaaa','aaaaa','aaaaaa', 'a','aa', 'aaa'] },
2579
- index: [3,4,5,0,1,2])
2580
- )
2581
- end
2582
-
2583
- it "sorts one vector in desc and other is asc" do
2584
- expect(@df.sort!([:a, :b], ascending: [false, true])).to eq(
2585
- DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,-2,1,9,-1,5], c: ['aaaa','a','aaaaaa','aaaaa','aa','aaa']},
2586
- index: [3,0,5,4,1,2])
2587
- )
2588
- end
2589
-
2590
- it "sorts many vectors" do
2591
- d = DaruLite::DataFrame.new({a: [1,1,1,222,44,5,5,544], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2592
-
2593
- expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
2594
- DaruLite::DataFrame.new({a: [544,222,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
2595
- index: [7,3,4,6,5,0,1,2])
2596
- )
2597
- end
2598
-
2599
- it "places nils at the beginning when sorting ascedingly" do
2600
- d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2601
-
2602
- expect(d.sort!([:a, :b, :c], ascending: [true, true, false])).to eq(
2603
- DaruLite::DataFrame.new({a: [nil,nil,1,1,1,5,5,44], b: [3,222,44,44,333,22,554,111], c: [5,3,3,2,5,5,1,3]},
2604
- index: [7,3,0,1,2,6,5,4])
2605
- )
2606
- end
2607
-
2608
- it "places nils at the beginning when sorting decendingly" do
2609
- d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2610
-
2611
- expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
2612
- DaruLite::DataFrame.new({a: [nil,nil,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
2613
- index: [7,3,4,6,5,0,1,2])
2614
- )
2615
- end
2616
-
2617
- it "sorts vectors of non-numeric types with nils in ascending order" do
2618
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2619
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2620
-
2621
- expect(non_numeric.sort!([:c], ascending: [true])).to eq(
2622
- DaruLite::DataFrame.new({a: [-6, 5, 5, 1, 7, 5], b: [1, 1, nil, -1, nil, -1],
2623
- c: [nil, nil, "aaa", "aaa", "baaa", "xxx"]},
2624
- index: [2, 5, 0, 1, 3, 4])
2625
- )
2626
- end
2627
-
2628
- it "sorts vectors of non-numeric types with nils in descending order" do
2629
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2630
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2631
-
2632
- expect(non_numeric.sort!([:c], ascending: [false])).to eq(
2633
- DaruLite::DataFrame.new({a: [-6, 5, 5, 7, 5, 1], b: [1, 1, -1, nil, nil, -1],
2634
- c: [nil, nil, "xxx", "baaa", "aaa", "aaa"]},
2635
- index: [2, 5, 4, 3, 0, 1])
2636
- )
2637
- end
2638
-
2639
- it "sorts vectors with block provided and handle nils automatically" do
2640
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2641
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2642
-
2643
- expect(non_numeric.sort!([:b], by: {b: lambda { |a| a.abs } }, handle_nils: true)).to eq(
2644
- DaruLite::DataFrame.new({a: [5, 7, 1, -6, 5, 5], b: [nil, nil, -1, 1, -1, 1],
2645
- c: ["aaa", "baaa", "aaa", nil, "xxx", nil]},
2646
- index: [0, 3, 1, 2, 4, 5])
2647
- )
2648
- end
2649
-
2650
- it "sorts vectors with block provided and nils handled manually" do
2651
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2652
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2653
-
2654
- expect(non_numeric.sort!([:b], by: {b: lambda { |a| (a.nil?)?[1]:[0, a.abs]} }, handle_nils: false)).to eq(
2655
- DaruLite::DataFrame.new({a: [1, -6, 5, 5, 5, 7], b: [-1, 1, -1, 1, nil, nil],
2656
- c: ["aaa", nil, "xxx", nil, "aaa", "baaa"]},
2657
- index: [1, 2, 4, 5, 0, 3])
2658
- )
2659
- end
2660
- end
2661
-
2662
- context DaruLite::MultiIndex do
2663
- pending
2664
- it "sorts the DataFrame when specified full tuple" do
2665
- @df_mi.sort([[:a,:one,:bar]])
2666
- end
2667
- end
2668
- end
2669
-
2670
- context "#index=" do
2671
- before :each do
2672
- @df = DaruLite::DataFrame.new({
2673
- a: [1,2,3,4,5],
2674
- b: [11,22,33,44,55],
2675
- c: %w(a b c d e)
2676
- })
2677
- end
2678
-
2679
- it "simply reassigns the index" do
2680
- @df.index = DaruLite::Index.new(['4','foo', :bar, 0, 23])
2681
- expect(@df.row['foo']).to eq(DaruLite::Vector.new([2,22,'b'], index: [:a,:b,:c]))
2682
- end
2683
-
2684
- it "raises error for improper length index" do
2685
- expect {
2686
- @df.index = DaruLite::Index.new([1,2])
2687
- }.to raise_error(ArgumentError)
2688
- end
2689
-
2690
- it "is able to accept array" do
2691
- @df.index = (1..5).to_a
2692
- expect(@df.index).to eq DaruLite::Index.new (1..5).to_a
2693
- end
2694
- end
2695
-
2696
- context '#order=' do
2697
- let(:df) do
2698
- DaruLite::DataFrame.new({
2699
- a: [1, 2, 3],
2700
- b: [4, 5, 6]
2701
- }, order: [:a, :b])
2702
- end
2703
-
2704
- context 'correct order' do
2705
- before { df.order = [:b, :a] }
2706
- subject { df }
2707
-
2708
- its(:'vectors.to_a') { is_expected.to eq [:b, :a] }
2709
- its(:'b.to_a') { is_expected.to eq [4, 5, 6] }
2710
- its(:'a.to_a') { is_expected.to eq [1, 2, 3] }
2711
- end
2712
-
2713
- context 'insufficient vectors' do
2714
- it { expect { df.order = [:a] }.to raise_error }
2715
- end
2716
-
2717
- context 'wrong vectors' do
2718
- it { expect { df.order = [:a, :b, 'b'] }.to raise_error }
2719
- end
2720
-
2721
- context 'different vector types' do
2722
- subject { df.order = new_order }
2723
-
2724
- let(:df) do
2725
- DaruLite::DataFrame.new({
2726
- 'a' => [1,2,3],
2727
- b: [4,5,6],
2728
- 3 => [5,7,9]
2729
- }, order: ['a', :b, 3])
2730
- end
2731
- let(:new_order) { [3, 'a', :b] }
2732
-
2733
- it "sets correct order" do
2734
- expect { subject }.to change { df.vectors.to_a }.to(new_order)
2735
- end
2736
- end
2737
- end
2738
-
2739
- context "#vectors=" do
2740
- before :each do
2741
- @df = DaruLite::DataFrame.new({
2742
- a: [1,2,3,4,5],
2743
- b: [11,22,33,44,55],
2744
- c: %w(a b c d e)
2745
- })
2746
- end
2747
-
2748
- it "simply reassigns vectors" do
2749
- @df.vectors = DaruLite::Index.new(['b',0,'m'])
2750
-
2751
- expect(@df.vectors).to eq(DaruLite::Index.new(['b',0,'m']))
2752
- expect(@df['b']).to eq(DaruLite::Vector.new([1,2,3,4,5]))
2753
- expect(@df[0]).to eq(DaruLite::Vector.new([11,22,33,44,55]))
2754
- expect(@df['m']).to eq(DaruLite::Vector.new(%w(a b c d e)))
2755
- end
2756
-
2757
- it "raises error for improper length index" do
2758
- expect {
2759
- @df.vectors = DaruLite::Index.new([1,2,'3',4,'5'])
2760
- }.to raise_error(ArgumentError)
2761
- end
2762
-
2763
- it "change name of vectors in @data" do
2764
- new_index_array = [:k, :l, :m]
2765
- @df.vectors = DaruLite::Index.new(new_index_array)
2766
-
2767
- expect(@df.data.map { |vector| vector.name }).to eq(new_index_array)
2768
- end
2769
- end
2770
-
2771
- context "#rename_vectors!" do
2772
- before do
2773
- @df = DaruLite::DataFrame.new({
2774
- a: [1,2,3,4,5],
2775
- b: [11,22,33,44,55],
2776
- c: %w(a b c d e)
2777
- })
2778
- end
2779
-
2780
- it "returns self as modified dataframe" do
2781
- expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
2782
- end
2783
-
2784
- it "re-uses rename_vectors method" do
2785
- name_map = { :a => :alpha, :c => :gamma }
2786
- expect(@df).to receive(:rename_vectors).with(name_map)
2787
- @df.rename_vectors! name_map
2788
- end
2789
- end
2790
-
2791
- context "#rename_vectors" do
2792
- before do
2793
- @df = DaruLite::DataFrame.new({
2794
- a: [1,2,3,4,5],
2795
- b: [11,22,33,44,55],
2796
- c: %w(a b c d e)
2797
- })
2798
- end
2799
-
2800
- it "returns DaruLite::Index" do
2801
- expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(DaruLite::Index)
2802
- end
2803
-
2804
- it "renames vectors using a hash map" do
2805
- @df.rename_vectors :a => :alpha, :c => :gamma
2806
- expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
924
+ it "renames vectors using a hash map" do
925
+ @df.rename_vectors :a => :alpha, :c => :gamma
926
+ expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
2807
927
  end
2808
928
 
2809
929
  it "overwrites vectors if the new name already exists" do
@@ -2849,604 +969,49 @@ describe DaruLite::DataFrame do
2849
969
  end
2850
970
  end
2851
971
 
2852
- context "#reindex" do
2853
- it "re indexes and aligns accordingly" do
2854
- df = DaruLite::DataFrame.new({
2855
- a: [1,2,3,4,5],
2856
- b: [11,22,33,44,55],
2857
- c: %w(a b c d e)
2858
- })
2859
-
2860
- ans = df.reindex(DaruLite::Index.new([1,3,0,8,2]))
2861
- expect(ans).to eq(DaruLite::DataFrame.new({
2862
- a: [2,4,1,nil,3],
2863
- b: [22,44,11,nil,33],
2864
- c: ['b','d','a',nil,'c']
2865
- }, index: DaruLite::Index.new([1,3,0,8,2])))
2866
- expect(ans).to_not eq(df)
2867
- end
2868
- end
2869
-
2870
- context "#reindex_vectors" do
2871
- it "re indexes vectors and aligns accordingly" do
2872
- df = DaruLite::DataFrame.new({
2873
- a: [1,2,3,4,5],
2874
- b: [11,22,33,44,55],
2875
- c: %w(a b c d e)
2876
- })
2877
-
2878
- ans = df.reindex_vectors(DaruLite::Index.new([:b, 'a', :a]))
2879
- expect(ans).to eq(DaruLite::DataFrame.new({
2880
- :b => [11,22,33,44,55],
2881
- 'a' => [nil, nil, nil, nil, nil],
2882
- :a => [1,2,3,4,5]
2883
- }, order: [:b, 'a', :a]))
2884
- end
2885
-
2886
- it 'raises ArgumentError if argument was not an index' do
2887
- df = DaruLite::DataFrame.new([])
2888
- expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
2889
- end
2890
- end
2891
-
2892
- context "#to_matrix" do
2893
- before do
2894
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
2895
- c: [11,22,33,44,55], d: [5,4,nil,2,1], e: ['this', 'has', 'string','data','too']},
2896
- order: [:a, :b, :c,:d,:e],
2897
- index: [:one, :two, :three, :four, :five])
2898
- end
2899
-
2900
- it "concats numeric non-nil vectors to Matrix" do
2901
- expect(@df.to_matrix).to eq(Matrix[
2902
- [1,11,11,5],
2903
- [2,12,22,4],
2904
- [3,13,33,nil],
2905
- [4,14,44,2],
2906
- [5,15,55,1]
2907
- ])
2908
- end
2909
- end
2910
-
2911
- context "#transpose" do
2912
- context DaruLite::Index do
2913
- it "transposes a DataFrame including row and column indexing" do
2914
- expect(@data_frame.transpose).to eq(DaruLite::DataFrame.new({
2915
- one: [1,11,11],
2916
- two: [2,12,22],
2917
- three: [3,13,33],
2918
- four: [4,14,44],
2919
- five: [5,15,55]
2920
- }, index: [:a, :b, :c],
2921
- order: [:one, :two, :three, :four, :five])
2922
- )
2923
- end
2924
- end
2925
-
2926
- context DaruLite::MultiIndex do
2927
- it "transposes a DataFrame including row and column indexing" do
2928
- expect(@df_mi.transpose).to eq(DaruLite::DataFrame.new([
2929
- @vector_arry1,
2930
- @vector_arry2,
2931
- @vector_arry1,
2932
- @vector_arry2].transpose, index: @order_mi, order: @multi_index))
2933
- end
2934
- end
2935
- end
2936
-
2937
- context "#pivot_table" do
2938
- before do
2939
- @df = DaruLite::DataFrame.new({
2940
- a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
2941
- b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
2942
- c: ['small','large','large','small','small','large','small','large','small'],
2943
- d: [1,2,2,3,3,4,5,6,7],
2944
- e: [2,4,4,6,6,8,10,12,14]
2945
- })
2946
- end
2947
-
2948
- it "creates row index as per (single) index argument and default aggregates to mean" do
2949
- expect(@df.pivot_table(index: [:a])).to eq(DaruLite::DataFrame.new({
2950
- d: [5.5,2.2],
2951
- e: [11.0,4.4]
2952
- }, index: ['bar', 'foo']))
2953
- end
2954
-
2955
- it "creates row index as per (double) index argument and default aggregates to mean" do
2956
- agg_mi = DaruLite::MultiIndex.from_tuples(
2957
- [
2958
- ['bar', 'large'],
2959
- ['bar', 'small'],
2960
- ['foo', 'large'],
2961
- ['foo', 'small']
2962
- ]
2963
- )
2964
- expect(@df.pivot_table(index: [:a, :c]).round(2)).to eq(DaruLite::DataFrame.new({
2965
- d: [5.0 , 6.0, 2.0, 2.33],
2966
- e: [10.0, 12.0, 4.0, 4.67]
2967
- }, index: agg_mi))
2968
- end
2969
-
2970
- it "creates row and vector index as per (single) index and (single) vectors args" do
2971
- agg_vectors = DaruLite::MultiIndex.from_tuples([
2972
- [:d, 'one'],
2973
- [:d, 'two'],
2974
- [:e, 'one'],
2975
- [:e, 'two']
2976
- ])
2977
- agg_index = DaruLite::MultiIndex.from_tuples(
2978
- [
2979
- ['bar'],
2980
- ['foo']
2981
- ]
2982
- )
2983
-
2984
- expect(@df.pivot_table(index: [:a], vectors: [:b]).round(2)).to eq(
2985
- DaruLite::DataFrame.new(
2986
- [
2987
- [4.5, 1.67],
2988
- [6.5, 3.0],
2989
- [9.0, 3.33],
2990
- [13, 6]
2991
- ], order: agg_vectors, index: agg_index)
2992
- )
2993
- end
2994
-
2995
- it "creates row and vector index as per (single) index and (double) vector args" do
2996
- agg_vectors = DaruLite::MultiIndex.from_tuples(
2997
- [
2998
- [:d, 'one', 'large'],
2999
- [:d, 'one', 'small'],
3000
- [:d, 'two', 'large'],
3001
- [:d, 'two', 'small'],
3002
- [:e, 'one', 'large'],
3003
- [:e, 'one', 'small'],
3004
- [:e, 'two', 'large'],
3005
- [:e, 'two', 'small']
3006
- ]
3007
- )
3008
-
3009
- agg_index = DaruLite::MultiIndex.from_tuples(
3010
- [
3011
- ['bar'],
3012
- ['foo']
3013
- ]
3014
- )
3015
-
3016
- expect(@df.pivot_table(index: [:a], vectors: [:b, :c])).to eq(DaruLite::DataFrame.new(
3017
- [
3018
- [4.0,2.0],
3019
- [5.0,1.0],
3020
- [6.0,nil],
3021
- [7.0,3.0],
3022
- [8.0,4.0],
3023
- [10.0,2.0],
3024
- [12.0,nil],
3025
- [14.0,6.0]
3026
- ], order: agg_vectors, index: agg_index
3027
- ))
3028
- end
3029
-
3030
- it "creates row and vector index with (double) index and (double) vector args" do
3031
- agg_index = DaruLite::MultiIndex.from_tuples([
3032
- ['bar', 4],
3033
- ['bar', 5],
3034
- ['bar', 6],
3035
- ['bar', 7],
3036
- ['foo', 1],
3037
- ['foo', 2],
3038
- ['foo', 3]
3039
- ])
3040
-
3041
- agg_vectors = DaruLite::MultiIndex.from_tuples([
3042
- [:e, 'one', 'large'],
3043
- [:e, 'one', 'small'],
3044
- [:e, 'two', 'large'],
3045
- [:e, 'two', 'small']
3046
- ])
3047
-
3048
- expect(@df.pivot_table(index: [:a, :d], vectors: [:b, :c])).to eq(
3049
- DaruLite::DataFrame.new(
3050
- [
3051
- [8 ,nil,nil,nil,nil, 4,nil],
3052
- [nil, 10,nil,nil, 2,nil,nil],
3053
- [nil,nil, 12,nil,nil,nil,nil],
3054
- [nil,nil,nil, 14,nil,nil, 6],
3055
- ], index: agg_index, order: agg_vectors)
3056
- )
3057
- end
3058
-
3059
- it "only aggregates over the vector specified in the values argument" do
3060
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3061
- [
3062
- [:e, 'one', 'large'],
3063
- [:e, 'one', 'small'],
3064
- [:e, 'two', 'large'],
3065
- [:e, 'two', 'small']
3066
- ]
3067
- )
3068
- agg_index = DaruLite::MultiIndex.from_tuples(
3069
- [
3070
- ['bar'],
3071
- ['foo']
3072
- ]
3073
- )
3074
- expect(@df.pivot_table(index: [:a], vectors: [:b, :c], values: :e)).to eq(
3075
- DaruLite::DataFrame.new(
3076
- [
3077
- [8, 4],
3078
- [10, 2],
3079
- [12,nil],
3080
- [14, 6]
3081
- ], order: agg_vectors, index: agg_index
3082
- )
3083
- )
3084
-
3085
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3086
- [
3087
- [:d, 'one'],
3088
- [:d, 'two'],
3089
- [:e, 'one'],
3090
- [:e, 'two']
3091
- ]
3092
- )
3093
- expect(@df.pivot_table(index: [:a], vectors: [:b], values: [:d, :e])).to eq(
3094
- DaruLite::DataFrame.new(
3095
- [
3096
- [4.5, 5.0/3],
3097
- [6.5, 3.0],
3098
- [9.0, 10.0/3],
3099
- [13.0, 6.0]
3100
- ], order: agg_vectors, index: agg_index
3101
- )
3102
- )
3103
- end
3104
-
3105
- it "overrides default aggregate function to aggregate over sum" do
3106
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3107
- [
3108
- [:e, 'one', 'large'],
3109
- [:e, 'one', 'small'],
3110
- [:e, 'two', 'large'],
3111
- [:e, 'two', 'small']
3112
- ]
3113
- )
3114
- agg_index = DaruLite::MultiIndex.from_tuples(
3115
- [
3116
- ['bar'],
3117
- ['foo']
3118
- ]
3119
- )
3120
- expect(@df.pivot_table(index: [:a], vectors: [:b, :c], values: :e, agg: :sum)).to eq(
3121
- DaruLite::DataFrame.new(
3122
- [
3123
- [8, 8],
3124
- [10, 2],
3125
- [12,nil],
3126
- [14, 12]
3127
- ], order: agg_vectors, index: agg_index
3128
- )
3129
- )
3130
- end
3131
-
3132
- it "raises error if no non-numeric vectors are present" do
3133
- df = DaruLite::DataFrame.new({a: ['a', 'b', 'c'], b: ['b', 'e', 'd']})
3134
- expect {
3135
- df.pivot_table(index: [:a])
3136
- }.to raise_error
3137
- end
3138
-
3139
- it "raises error if atleast a row index is not specified" do
3140
- expect {
3141
- @df.pivot_table
3142
- }.to raise_error
3143
- end
3144
-
3145
- it "aggregates when nils are present in value vector" do
3146
- df = DaruLite::DataFrame.new({
3147
- a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
3148
- b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
3149
- c: ['small','large','large','small','small','large','small','large','small'],
3150
- d: [1,2,2,3,3,4,5,6,7],
3151
- e: [2,nil,4,6,6,8,10,12,nil]
3152
- })
3153
-
3154
- expect(df.pivot_table index: [:a]).to eq(
3155
- DaruLite::DataFrame.new({
3156
- d: [5.0, 2.2, 7],
3157
- e: [10.0, 4.5, nil]
3158
- }, index: DaruLite::Index.new(['bar', 'foo', 'ice'])))
3159
- end
3160
-
3161
- it "works when nils are present in value vector" do
3162
- df = DaruLite::DataFrame.new({
3163
- a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
3164
- b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
3165
- c: ['small','large','large','small','small','large','small','large','small'],
3166
- d: [1,2,2,3,3,4,5,6,7],
3167
- e: [2,nil,4,6,6,8,10,12,nil]
3168
- })
3169
-
3170
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3171
- [
3172
- [:e, 'one'],
3173
- [:e, 'two']
3174
- ]
3175
- )
3176
-
3177
- agg_index = DaruLite::MultiIndex.from_tuples(
3178
- [
3179
- ['bar'],
3180
- ['foo'],
3181
- ['ice']
3182
- ]
3183
- )
3184
-
3185
- expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
3186
- DaruLite::DataFrame.new(
3187
- [
3188
- [9, 3, nil],
3189
- [12, 6, nil]
3190
- ], order: agg_vectors, index: agg_index
3191
- )
3192
- )
3193
- end
3194
-
3195
- it 'performs date pivoting' do
3196
- categories = %i[jan feb mar apr may jun jul aug sep oct nov dec]
3197
- df = DaruLite::DataFrame.rows([
3198
- [2014, 2, 1600.0, 20.0],
3199
- [2014, 3, 1680.0, 21.0],
3200
- [2016, 2, 1600.0, 20.0],
3201
- [2016, 4, 1520.0, 19.0],
3202
- ], order: [:year, :month, :visitors, :days])
3203
- df[:averages] = df[:visitors] / df[:days]
3204
- df[:month] = df[:month].map{|i| categories[i - 1]}
3205
- actual = df.pivot_table(index: :month, vectors: [:year], values: :averages)
3206
-
3207
- # NB: As you can see, there are some "illogical" parts:
3208
- # months are sorted lexicographically, then made into multi-index
3209
- # with one-element-per-tuple, then order of columns is dependent
3210
- # on which month is lexicographically first (its apr, so, apr-2016
3211
- # is first row to gather, so 2016 is first column).
3212
- #
3213
- # All of it is descendance of our group_by implementation (which
3214
- # always sorts results & always make array keys). I hope that fixing
3215
- # group_by, even to the extend described at https://github.com/v0dro/daru/issues/152,
3216
- # will be fix this case also.
3217
- expected =
3218
- DaruLite::DataFrame.new(
3219
- [
3220
- [80.0, 80.0, nil],
3221
- [nil, 80.0, 80.0],
3222
- ], index: DaruLite::MultiIndex.from_tuples([[:apr], [:feb], [:mar]]),
3223
- order: DaruLite::MultiIndex.from_tuples([[:averages, 2016], [:averages, 2014]])
3224
- )
3225
- # Comparing their parts previous to full comparison allows to
3226
- # find complicated differences.
3227
- expect(actual.vectors).to eq expected.vectors
3228
- expect(actual.index).to eq expected.index
3229
- expect(actual).to eq expected
3230
- end
3231
- end
3232
-
3233
- context "#shape" do
3234
- it "returns an array containing number of rows and columns" do
3235
- expect(@data_frame.shape).to eq([5,3])
3236
- end
3237
- end
3238
-
3239
- context "#nest" do
3240
- it "nests in a hash" do
3241
- df = DaruLite::DataFrame.new({
3242
- :a => DaruLite::Vector.new(%w(a a a b b b)),
3243
- :b => DaruLite::Vector.new(%w(c c d d e e)),
3244
- :c => DaruLite::Vector.new(%w(f g h i j k))
3245
- })
3246
- nest = df.nest :a, :b
3247
- expect(nest['a']['c']).to eq([{ :c => 'f' }, { :c => 'g' }])
3248
- expect(nest['a']['d']).to eq([{ :c => 'h' }])
3249
- expect(nest['b']['e']).to eq([{ :c => 'j' }, { :c => 'k' }])
3250
- end
3251
- end
3252
-
3253
- context "#summary" do
3254
- subject { df.summary }
3255
-
3256
- context "DataFrame" do
3257
- let(:df) { DaruLite::DataFrame.new({a: [1,2,5], b: [1,2,"string"]}, order: [:a, :b], index: [:one, :two, :three], name: 'frame') }
3258
- it { is_expected.to eq %Q{
3259
- |= frame
3260
- | Number of rows: 3
3261
- | Element:[a]
3262
- | == a
3263
- | n :3
3264
- | non-missing:3
3265
- | median: 2
3266
- | mean: 2.6667
3267
- | std.dev.: 2.0817
3268
- | std.err.: 1.2019
3269
- | skew: 0.2874
3270
- | kurtosis: -2.3333
3271
- | Element:[b]
3272
- | == b
3273
- | n :3
3274
- | non-missing:3
3275
- | factors: 1,2,string
3276
- | mode: 1,2,string
3277
- | Distribution
3278
- | 1 1 100.00%
3279
- | 2 1 100.00%
3280
- | string 1 100.00%
3281
- }.unindent }
3282
- end
3283
- end
3284
-
3285
- context '#to_df' do
3286
- it 'returns the dataframe' do
3287
- @data_frame.to_df == @data_frame
3288
- end
3289
- end
3290
-
3291
- context "#merge" do
3292
- it "merges one dataframe with another" do
3293
- a = DaruLite::Vector.new [1, 2, 3]
3294
- b = DaruLite::Vector.new [3, 4, 5]
3295
- c = DaruLite::Vector.new [4, 5, 6]
3296
- d = DaruLite::Vector.new [7, 8, 9]
3297
- e = DaruLite::Vector.new [10, 20, 30]
3298
- ds1 = DaruLite::DataFrame.new({ :a => a, :b => b })
3299
- ds2 = DaruLite::DataFrame.new({ :c => c, :d => d })
3300
- exp = DaruLite::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
3301
-
3302
- expect(ds1.merge(ds2)).to eq(exp)
3303
- expect(ds2.merge(ds1)).to eq(
3304
- DaruLite::DataFrame.new({c: c, d: d, a: a, b: b}, order: [:c, :d, :a, :b]))
3305
-
3306
- ds3 = DaruLite::DataFrame.new({ :a => e })
3307
- exp = DaruLite::DataFrame.new({ :a_1 => a, :a_2 => e, :b => b },
3308
- order: [:a_1, :b, :a_2])
3309
-
3310
- expect(ds1.merge(ds3)).to eq(exp)
3311
- end
3312
-
3313
- context "preserves type of vector names" do
3314
- let(:df1) { DaruLite::DataFrame.new({'a'=> [1, 2, 3]}) }
3315
- let(:df2) { DaruLite::DataFrame.new({:b=> [4, 5, 6]}) }
3316
- subject { df1.merge df2 }
3317
-
3318
- it { is_expected.to be_a DaruLite::DataFrame }
3319
- it { expect(subject['a'].to_a).to eq [1, 2, 3] }
3320
- it { expect(subject[:b].to_a).to eq [4, 5, 6] }
3321
- end
3322
-
3323
- context "preserves indices for dataframes with same index" do
3324
- let(:index) { ['one','two','three'] }
3325
- let(:df1) { DaruLite::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
3326
- let(:df2) { DaruLite::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
3327
- subject { df1.merge df2 }
3328
-
3329
- its(:index) { is_expected.to eq DaruLite::Index.new(index) }
3330
- end
3331
- end
3332
-
3333
- context "#vector_by_calculation" do
3334
- it "DSL for returning vector of each calculation" do
3335
- a1 = DaruLite::Vector.new([1, 2, 3, 4, 5, 6, 7])
3336
- a2 = DaruLite::Vector.new([10, 20, 30, 40, 50, 60, 70])
3337
- a3 = DaruLite::Vector.new([100, 200, 300, 400, 500, 600, 700])
3338
- ds = DaruLite::DataFrame.new({ :a => a1, :b => a2, :c => a3 })
3339
- total = ds.vector_by_calculation { a + b + c }
3340
- expected = DaruLite::Vector.new([111, 222, 333, 444, 555, 666, 777])
3341
- expect(total).to eq(expected)
3342
- end
3343
- end
3344
-
3345
- context "group_by" do
3346
- context "on a single row DataFrame" do
3347
- let(:df){ DaruLite::DataFrame.new(city: %w[Kyiv], year: [2015], value: [1]) }
3348
- it "returns a groupby object" do
3349
- expect(df.group_by([:city])).to be_a(DaruLite::Core::GroupBy)
3350
- end
3351
- it "has the correct index" do
3352
- expect(df.group_by([:city]).groups).to eq({["Kyiv"]=>[0]})
3353
- end
3354
- end
3355
- end
3356
-
3357
- context "#vector_sum" do
3358
- before do
3359
- a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil, nil]
3360
- a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30, nil]
3361
- b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2, nil]
3362
- b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3, nil]
3363
- @df = DaruLite::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2 })
3364
- end
3365
-
3366
- it "calculates complete vector sum" do
3367
- expect(@df.vector_sum).to eq(DaruLite::Vector.new [nil, 15, 26, nil, 28, nil, nil])
3368
- end
3369
-
3370
- it "ignores nils if skipnil is true" do
3371
- expect(@df.vector_sum skipnil: true).to eq(DaruLite::Vector.new [13, 15, 26, 25, 28, 35, 0])
3372
- end
3373
-
3374
- it "calculates partial vector sum" do
3375
- a = @df.vector_sum([:a1, :a2])
3376
- b = @df.vector_sum([:b1, :b2])
3377
-
3378
- expect(a).to eq(DaruLite::Vector.new [11, 12, 23, 24, 25, nil, nil])
3379
- expect(b).to eq(DaruLite::Vector.new [nil, 3, 3, nil, 3, 5, nil])
3380
- end
3381
- end
3382
-
3383
- context "#missing_values_rows" do
3384
- it "returns number of missing values in each row" do
3385
- a1 = DaruLite::Vector.new [1, nil, 3, 4, 5, nil]
3386
- a2 = DaruLite::Vector.new [10, nil, 20, 20, 20, 30]
3387
- b1 = DaruLite::Vector.new [nil, nil, 1, 1, 1, 2]
3388
- b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
3389
- c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
3390
- df = DaruLite::DataFrame.new({
3391
- :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3392
-
3393
- expect(df.missing_values_rows).to eq(DaruLite::Vector.new [2, 3, 0, 1, 0, 1])
3394
- end
3395
- end
3396
-
3397
- context "#vector_count_characters" do
3398
- it "" do
3399
- a1 = DaruLite::Vector.new( [1, 'abcde', 3, 4, 5, nil])
3400
- a2 = DaruLite::Vector.new( [10, 20.3, 20, 20, 20, 30])
3401
- b1 = DaruLite::Vector.new( [nil, '343434', 1, 1, 1, 2])
3402
- b2 = DaruLite::Vector.new( [2, 2, 2, nil, 2, 3])
3403
- c = DaruLite::Vector.new([nil, 2, 'This is a nice example', 2, 2, 2])
3404
- ds = DaruLite::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3405
-
3406
- expect(ds.vector_count_characters).to eq(DaruLite::Vector.new([4, 17, 27, 5, 6, 5]))
3407
- end
3408
- end
3409
-
3410
- context '#include_values?' do
3411
- let(:df) do
3412
- DaruLite::DataFrame.new({
3413
- a: [1, 2, 3, 4, Float::NAN, 6, 1],
3414
- b: [:a, :b, nil, Float::NAN, nil, 3, 5],
3415
- c: ['a', 6, 3, 4, 3, 5, 3],
3416
- d: [1, 2, 3, 5, 1, 2, 5]
3417
- })
3418
- end
3419
- before { df.to_category :b }
3420
-
3421
- context 'true' do
3422
- it { expect(df.include_values? nil).to eq true }
3423
- it { expect(df.include_values? Float::NAN).to eq true }
3424
- it { expect(df.include_values? nil, Float::NAN).to eq true }
3425
- it { expect(df.include_values? 1, 30).to eq true }
972
+ context "#transpose" do
973
+ context DaruLite::Index do
974
+ it "transposes a DataFrame including row and column indexing" do
975
+ expect(df.transpose).to eq(DaruLite::DataFrame.new({
976
+ one: [1,11,11],
977
+ two: [2,12,22],
978
+ three: [3,13,33],
979
+ four: [4,14,44],
980
+ five: [5,15,55]
981
+ }, index: [:a, :b, :c],
982
+ order: [:one, :two, :three, :four, :five])
983
+ )
984
+ end
3426
985
  end
3427
986
 
3428
- context 'false' do
3429
- it { expect(df[:a, :c].include_values? nil).to eq false }
3430
- it { expect(df[:c, :d].include_values? Float::NAN).to eq false }
3431
- it { expect(df[:c, :d].include_values? nil, Float::NAN).to eq false }
3432
- it { expect(df.include_values? 10, 20).to eq false }
987
+ context DaruLite::MultiIndex do
988
+ it "transposes a DataFrame including row and column indexing" do
989
+ expect(df_mi.transpose).to eq(DaruLite::DataFrame.new([
990
+ vector_arry1,
991
+ vector_arry2,
992
+ vector_arry1,
993
+ vector_arry2].transpose, index: order_mi, order: multi_index))
994
+ end
3433
995
  end
3434
996
  end
3435
997
 
3436
- context "#vector_mean" do
3437
- before do
3438
- a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil]
3439
- a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30]
3440
- b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2]
3441
- b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
3442
- c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
3443
- @df = DaruLite::DataFrame.new({
3444
- :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
998
+ context "#shape" do
999
+ it "returns an array containing number of rows and columns" do
1000
+ expect(df.shape).to eq([5,3])
3445
1001
  end
1002
+ end
3446
1003
 
3447
- it "calculates complete vector mean" do
3448
- expect(@df.vector_mean).to eq(
3449
- DaruLite::Vector.new [nil, 3.4, 6, nil, 6.0, nil])
1004
+ context "#nest" do
1005
+ it "nests in a hash" do
1006
+ df = DaruLite::DataFrame.new({
1007
+ :a => DaruLite::Vector.new(%w(a a a b b b)),
1008
+ :b => DaruLite::Vector.new(%w(c c d d e e)),
1009
+ :c => DaruLite::Vector.new(%w(f g h i j k))
1010
+ })
1011
+ nest = df.nest :a, :b
1012
+ expect(nest['a']['c']).to eq([{ :c => 'f' }, { :c => 'g' }])
1013
+ expect(nest['a']['d']).to eq([{ :c => 'h' }])
1014
+ expect(nest['b']['e']).to eq([{ :c => 'j' }, { :c => 'k' }])
3450
1015
  end
3451
1016
  end
3452
1017
 
@@ -3490,64 +1055,6 @@ describe DaruLite::DataFrame do
3490
1055
  end
3491
1056
  end
3492
1057
 
3493
- context "#verify" do
3494
- def create_test(*args, &proc)
3495
- description = args.shift
3496
- fields = args
3497
- [description, fields, proc]
3498
- end
3499
-
3500
- before do
3501
- name = DaruLite::Vector.new %w(r1 r2 r3 r4)
3502
- v1 = DaruLite::Vector.new [1, 2, 3, 4]
3503
- v2 = DaruLite::Vector.new [4, 3, 2, 1]
3504
- v3 = DaruLite::Vector.new [10, 20, 30, 40]
3505
- v4 = DaruLite::Vector.new %w(a b a b)
3506
- @df = DaruLite::DataFrame.new({
3507
- :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :id => name
3508
- }, order: [:v1, :v2, :v3, :v4, :id])
3509
- end
3510
-
3511
- it "correctly verifies data as per the block" do
3512
- # Correct
3513
- t1 = create_test('If v4=a, v1 odd') do |r|
3514
- r[:v4] == 'b' or (r[:v4] == 'a' and r[:v1].odd?)
3515
- end
3516
- t2 = create_test('v3=v1*10') { |r| r[:v3] == r[:v1] * 10 }
3517
- # Fail!
3518
- t3 = create_test("v4='b'") { |r| r[:v4] == 'b' }
3519
- exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
3520
- exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
3521
-
3522
- dataf = @df.verify(t3, t1, t2)
3523
- expect(dataf).to eq(exp1)
3524
- end
3525
-
3526
- it "uses additional fields to extend error messages" do
3527
- t = create_test("v4='b'", :v2, :v3) { |r| r[:v4] == 'b' }
3528
-
3529
- dataf = @df.verify(:id, t)
3530
- expect(dataf).to eq(["1 [r1]: v4='b' (v2=4, v3=10)", "3 [r3]: v4='b' (v2=2, v3=30)"])
3531
- end
3532
- end
3533
-
3534
- context "#compute" do
3535
- it "performs a computation when supplied in a string" do
3536
- v1 = DaruLite::Vector.new [1, 2, 3, 4]
3537
- v2 = DaruLite::Vector.new [4, 3, 2, 1]
3538
- v3 = DaruLite::Vector.new [10, 20, 30, 40]
3539
- vnumeric = DaruLite::Vector.new [0, 0, 1, 4]
3540
- vsum = DaruLite::Vector.new [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0]
3541
- vmult = DaruLite::Vector.new [1 * 4, 2 * 3, 3 * 2, 4 * 1]
3542
-
3543
- df = DaruLite::DataFrame.new({:v1 => v1, :v2 => v2, :v3 => v3})
3544
-
3545
- expect(df.compute("v1/v2")).to eq(vnumeric)
3546
- expect(df.compute("v1+v2+v3")).to eq(vsum)
3547
- expect(df.compute("v1*v2")).to eq(vmult)
3548
- end
3549
- end
3550
-
3551
1058
  context ".crosstab_by_assignation" do
3552
1059
  it "" do
3553
1060
  v1 = DaruLite::Vector.new %w(a a a b b b c c c)
@@ -3571,394 +1078,6 @@ describe DaruLite::DataFrame do
3571
1078
  end
3572
1079
  end
3573
1080
 
3574
- context "#one_to_many" do
3575
- it "" do
3576
- rows = [
3577
- ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
3578
- ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
3579
- ['3', 'alfred', nil, nil, nil, nil, nil, nil]
3580
- ]
3581
-
3582
- df = DaruLite::DataFrame.rows(rows,
3583
- order: ['id', 'name', 'car_color1', 'car_value1', 'car_color2',
3584
- 'car_value2', 'car_color3', 'car_value3'])
3585
-
3586
- ids = DaruLite::Vector.new %w(1 1 2 2 2)
3587
- colors = DaruLite::Vector.new %w(red blue green orange white)
3588
- values = DaruLite::Vector.new [10, 20, 15, 30, 20]
3589
- col_ids = DaruLite::Vector.new [1, 2, 1, 2, 3]
3590
-
3591
- df_expected = DaruLite::DataFrame.new({
3592
- 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values
3593
- }, order: ['id', '_col_id', 'color', 'value'])
3594
-
3595
- expect(df.one_to_many(['id'], 'car_%v%n')).to eq(df_expected)
3596
- end
3597
- end
3598
-
3599
- context "#any?" do
3600
- before do
3601
- @df = DaruLite::DataFrame.new({
3602
- a: [1,2,3,4,5],
3603
- b: [10,20,30,40,50],
3604
- c: [11,22,33,44,55]})
3605
- end
3606
-
3607
- it "returns true if any one of the vectors satisfy condition" do
3608
- expect(@df.any? { |v| v[0] == 1 }).to eq(true)
3609
- end
3610
-
3611
- it "returns false if none of the vectors satisfy the condition" do
3612
- expect(@df.any? { |v| v.mean > 100 }).to eq(false)
3613
- end
3614
-
3615
- it "returns true if any one of the rows satisfy condition" do
3616
- expect(@df.any?(:row) { |r| r[:a] == 1 and r[:c] == 11 }).to eq(true)
3617
- end
3618
-
3619
- it "returns false if none of the rows satisfy the condition" do
3620
- expect(@df.any?(:row) { |r| r.mean > 100 }).to eq(false)
3621
- end
3622
-
3623
- it 'fails on unknown axis' do
3624
- expect { @df.any?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3625
- end
3626
- end
3627
-
3628
- context "#all?" do
3629
- before do
3630
- @df = DaruLite::DataFrame.new({
3631
- a: [1,2,3,4,5],
3632
- b: [10,20,30,40,50],
3633
- c: [11,22,33,44,55]})
3634
- end
3635
-
3636
- it "returns true if all of the vectors satisfy condition" do
3637
- expect(@df.all? { |v| v.mean < 40 }).to eq(true)
3638
- end
3639
-
3640
- it "returns false if any one of the vectors does not satisfy condition" do
3641
- expect(@df.all? { |v| v.mean == 30 }).to eq(false)
3642
- end
3643
-
3644
- it "returns true if all of the rows satisfy condition" do
3645
- expect(@df.all?(:row) { |r| r.mean < 70 }).to eq(true)
3646
- end
3647
-
3648
- it "returns false if any one of the rows does not satisfy condition" do
3649
- expect(@df.all?(:row) { |r| r.mean == 30 }).to eq(false)
3650
- end
3651
-
3652
- it 'fails on unknown axis' do
3653
- expect { @df.all?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3654
- end
3655
- end
3656
-
3657
- context "#only_numerics" do
3658
- before do
3659
- @v1 = DaruLite::Vector.new([1,2,3,4,5])
3660
- @v2 = DaruLite::Vector.new(%w(one two three four five))
3661
- @v3 = DaruLite::Vector.new([11,22,33,44,55])
3662
- @df = DaruLite::DataFrame.new({
3663
- a: @v1, b: @v2, c: @v3 }, clone: false)
3664
- end
3665
-
3666
- it "returns a view of only the numeric vectors" do
3667
- dfon = @df.only_numerics(clone: false)
3668
-
3669
- expect(dfon).to eq(
3670
- DaruLite::DataFrame.new({ a: @v1, c: @v3 }, clone: false))
3671
- expect(dfon[:a].object_id).to eq(@v1.object_id)
3672
- end
3673
-
3674
- it "returns a clone of numeric vectors" do
3675
- dfon = @df.only_numerics
3676
-
3677
- expect(dfon).to eq(
3678
- DaruLite::DataFrame.new({ a: @v1, c: @v3}, clone: false)
3679
- )
3680
- expect(dfon[:a].object_id).to_not eq(@v1.object_id)
3681
- end
3682
-
3683
- context DaruLite::MultiIndex do
3684
- before do
3685
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3686
- [
3687
- [:d, :one, :large],
3688
- [:d, :one, :small],
3689
- [:d, :two, :large],
3690
- [:d, :two, :small],
3691
- [:e, :one, :large],
3692
- [:e, :one, :small],
3693
- [:e, :two, :large],
3694
- [:e, :two, :small]
3695
- ]
3696
- )
3697
-
3698
- agg_index = DaruLite::MultiIndex.from_tuples(
3699
- [
3700
- [:bar],
3701
- [:foo]
3702
- ]
3703
- )
3704
- @df = DaruLite::DataFrame.new(
3705
- [
3706
- [4.112,2.234],
3707
- %w(a b),
3708
- [6.342,nil],
3709
- [7.2344,3.23214],
3710
- [8.234,4.533],
3711
- [10.342,2.3432],
3712
- [12.0,nil],
3713
- %w(a b)
3714
- ], order: agg_vectors, index: agg_index
3715
- )
3716
- end
3717
-
3718
- it "returns numeric vectors" do
3719
- vectors = DaruLite::MultiIndex.from_tuples(
3720
- [
3721
- [:d, :one, :large],
3722
- [:d, :two, :large],
3723
- [:d, :two, :small],
3724
- [:e, :one, :large],
3725
- [:e, :one, :small],
3726
- [:e, :two, :large]
3727
- ]
3728
- )
3729
-
3730
- index = DaruLite::MultiIndex.from_tuples(
3731
- [
3732
- [:bar],
3733
- [:foo]
3734
- ]
3735
- )
3736
- answer = DaruLite::DataFrame.new(
3737
- [
3738
- [4.112,2.234],
3739
- [6.342,nil],
3740
- [7.2344,3.23214],
3741
- [8.234,4.533],
3742
- [10.342,2.3432],
3743
- [12.0,nil],
3744
- ], order: vectors, index: index
3745
- )
3746
-
3747
- expect(@df.only_numerics).to eq(answer)
3748
- end
3749
- end
3750
- end
3751
-
3752
- context '#reset_index' do
3753
- context 'when Index' do
3754
- subject do
3755
- DaruLite::DataFrame.new(
3756
- {'vals' => [1,2,3,4,5]},
3757
- index: DaruLite::Index.new(%w[a b c d e], name: 'indices')
3758
- ).reset_index
3759
- end
3760
-
3761
- it { is_expected.to eq DaruLite::DataFrame.new(
3762
- 'indices' => %w[a b c d e],
3763
- 'vals' => [1,2,3,4,5]
3764
- )}
3765
- end
3766
-
3767
- context 'when MultiIndex' do
3768
- subject do
3769
- mi = DaruLite::MultiIndex.from_tuples([
3770
- [0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
3771
- ])
3772
- mi.name = %w[nums alphas]
3773
- DaruLite::DataFrame.new(
3774
- {'vals' => [1,2,3,4]},
3775
- index: mi
3776
- ).reset_index
3777
- end
3778
-
3779
- it { is_expected.to eq DaruLite::DataFrame.new(
3780
- 'nums' => [0,0,1,1],
3781
- 'alphas' => %w[a b a b],
3782
- 'vals' => [1,2,3,4]
3783
- )}
3784
- end
3785
- end
3786
-
3787
- context "#set_index" do
3788
- before(:each) do
3789
- @df = DaruLite::DataFrame.new({
3790
- a: [1,2,3,4,5],
3791
- b: ['a','b','c','d','e'],
3792
- c: [11,22,33,44,55]
3793
- })
3794
- end
3795
-
3796
- it "sets a particular column as the index and deletes that column" do
3797
- @df.set_index(:b)
3798
- expect(@df).to eq(
3799
- DaruLite::DataFrame.new({
3800
- a: [1,2,3,4,5],
3801
- c: [11,22,33,44,55]
3802
- }, index: ['a','b','c','d','e'])
3803
- )
3804
- end
3805
-
3806
- it "sets a particular column as index but keeps that column" do
3807
- expect(@df.set_index(:c, keep: true)).to eq(
3808
- DaruLite::DataFrame.new({
3809
- a: [1,2,3,4,5],
3810
- b: ['a','b','c','d','e'],
3811
- c: [11,22,33,44,55]
3812
- }, index: [11,22,33,44,55]))
3813
- expect(@df[:c]).to eq(@df[:c])
3814
- end
3815
-
3816
- it "sets categorical index if categorical is true" do
3817
- data = {
3818
- a: [1, 2, 3, 4, 5],
3819
- b: [:a, 1, :a, 1, 'c'],
3820
- c: %w[a b c d e]
3821
- }
3822
- df = DaruLite::DataFrame.new(data)
3823
- df.set_index(:b, categorical: true)
3824
- expected = DaruLite::DataFrame.new(
3825
- data.slice(:a, :c),
3826
- index: DaruLite::CategoricalIndex.new(data[:b])
3827
- )
3828
- expect(df).to eq(expected)
3829
- end
3830
-
3831
- it "raises error if all elements in the column aren't unique" do
3832
- jholu = DaruLite::DataFrame.new({
3833
- a: ['a','b','a'],
3834
- b: [1,2,4]
3835
- })
3836
-
3837
- expect {
3838
- jholu.set_index(:a)
3839
- }.to raise_error(ArgumentError)
3840
- end
3841
-
3842
- it "sets multiindex if array is given" do
3843
- df = DaruLite::DataFrame.new({
3844
- a: %w[a a b b],
3845
- b: [1, 2, 1, 2],
3846
- c: %w[a b c d]
3847
- })
3848
- df.set_index(%i[a b])
3849
- expected =
3850
- DaruLite::DataFrame.new(
3851
- { c: %w[a b c d] },
3852
- index: DaruLite::MultiIndex.from_tuples(
3853
- [['a', 1], ['a', 2], ['b', 1], ['b', 2]]
3854
- )
3855
- ).tap do |df|
3856
- df.index.name = %i[a b]
3857
- df
3858
- end
3859
- expect(df).to eq(expected)
3860
- end
3861
- end
3862
-
3863
- context "#concat" do
3864
- before do
3865
- @df1 = DaruLite::DataFrame.new({
3866
- a: [1, 2, 3],
3867
- b: [1, 2, 3]
3868
- })
3869
-
3870
- @df2 = DaruLite::DataFrame.new({
3871
- a: [4, 5, 6],
3872
- c: [4, 5, 6]
3873
- })
3874
- end
3875
-
3876
- it 'does not modify the original dataframes' do
3877
- df1_a = @df1[:a].to_a.dup
3878
- df2_a = @df2[:a].to_a.dup
3879
-
3880
- df_concat = @df1.concat @df2
3881
- expect(@df1[:a].to_a).to eq df1_a
3882
- expect(@df2[:a].to_a).to eq df2_a
3883
- end
3884
-
3885
- it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
3886
- df1_a = @df1[:a].to_a.dup
3887
- df2_a = @df2[:a].to_a.dup
3888
-
3889
- df_concat = @df1.concat @df2
3890
- expect(df_concat[:a].to_a).to eq df1_a + df2_a
3891
- end
3892
-
3893
- it 'fills in missing vectors with nils' do
3894
- df1_b = @df1[:b].to_a.dup
3895
- df2_c = @df2[:c].to_a.dup
3896
-
3897
- df_concat = @df1.concat @df2
3898
- expect(df_concat[:b].to_a).to eq df1_b + [nil] * @df2.size
3899
- expect(df_concat[:c].to_a).to eq [nil] * @df1.size + df2_c
3900
- end
3901
-
3902
- end
3903
-
3904
- context "#union" do
3905
- before do
3906
- @df1 = DaruLite::DataFrame.new({
3907
- a: [1, 2, 3],
3908
- b: [1, 2, 3]},
3909
- index: [1,3,5] )
3910
-
3911
- @df2 = DaruLite::DataFrame.new({
3912
- a: [4, 5, 6],
3913
- c: [4, 5, 6]},
3914
- index: [7,9,11])
3915
-
3916
- @df3 = DaruLite::DataFrame.new({
3917
- a: [4, 5, 6],
3918
- c: [4, 5, 6]},
3919
- index: [5,7,9])
3920
- end
3921
-
3922
- it 'does not modify the original dataframes' do
3923
- df1_a = @df1[:a].to_a.dup
3924
- df2_a = @df2[:a].to_a.dup
3925
-
3926
- _ = @df1.union @df2
3927
- expect(@df1[:a].to_a).to eq df1_a
3928
- expect(@df2[:a].to_a).to eq df2_a
3929
- end
3930
-
3931
- it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
3932
- df1_a = @df1[:a].to_a.dup
3933
- df2_a = @df2[:a].to_a.dup
3934
-
3935
- df_union = @df1.union @df2
3936
- expect(df_union[:a].to_a).to eq df1_a + df2_a
3937
- end
3938
-
3939
- it 'fills in missing vectors with nils' do
3940
- df1_b = @df1[:b].to_a.dup
3941
- df2_c = @df2[:c].to_a.dup
3942
-
3943
- df_union = @df1.union @df2
3944
- expect(df_union[:b].to_a).to eq df1_b + [nil] * @df2.size
3945
- expect(df_union[:c].to_a).to eq [nil] * @df1.size + df2_c
3946
- end
3947
-
3948
- it 'overwrites part of the first dataframe if there are double indices' do
3949
- vec = DaruLite::Vector.new({a: 4, b: nil, c: 4})
3950
- expect(@df1.union(@df3).row[5]).to eq vec
3951
- end
3952
-
3953
- it 'concats the indices' do
3954
- v1 = @df1.index.to_a
3955
- v2 = @df2.index.to_a
3956
-
3957
- df_union = @df1.union @df2
3958
- expect(df_union.index.to_a).to eq v1 + v2
3959
- end
3960
- end
3961
-
3962
1081
  context '#inspect' do
3963
1082
  subject { df.inspect }
3964
1083
 
@@ -4139,189 +1258,6 @@ describe DaruLite::DataFrame do
4139
1258
  end
4140
1259
  end
4141
1260
 
4142
- context '#to_s' do
4143
- it 'produces a class, size description' do
4144
- expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame(5x3)>"
4145
- end
4146
-
4147
- it 'produces a class, name, size description' do
4148
- @data_frame.name = "Test"
4149
- expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame: Test(5x3)>"
4150
- end
4151
-
4152
- it 'produces a class, name, size description when the name is a symbol' do
4153
- @data_frame.name = :Test
4154
- expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame: Test(5x3)>"
4155
- end
4156
- end
4157
-
4158
- context '#to_json' do
4159
- let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, index: [:one, :two, :three], name: 'test')}
4160
- subject { JSON.parse(json) }
4161
-
4162
- context 'with index' do
4163
- let(:json) { df.to_json(false) }
4164
- # FIXME: is it most reasonable we can do?.. -- zverok
4165
- # For me, more resonable thing would be something like
4166
- #
4167
- # [
4168
- # {"index" => "one" , "a"=>1, "b"=>3, "c"=>6},
4169
- # {"index" => "two" , "a"=>2, "b"=>4, "c"=>7},
4170
- # {"index" => "three", "a"=>3, "b"=>5, "c"=>8}
4171
- # ]
4172
- #
4173
- # Or maybe
4174
- #
4175
- # [
4176
- # ["one" , {"a"=>1, "b"=>3, "c"=>6}],
4177
- # ["two" , {"a"=>2, "b"=>4, "c"=>7}],
4178
- # ["three", {"a"=>3, "b"=>5, "c"=>8}]
4179
- # ]
4180
- #
4181
- # Or even
4182
- #
4183
- # {
4184
- # "one" => {"a"=>1, "b"=>3, "c"=>6},
4185
- # "two" => {"a"=>2, "b"=>4, "c"=>7},
4186
- # "three" => {"a"=>3, "b"=>5, "c"=>8}
4187
- # }
4188
- #
4189
- it { is_expected.to eq(
4190
- [
4191
- [
4192
- {"a"=>1, "b"=>3, "c"=>6},
4193
- {"a"=>2, "b"=>4, "c"=>7},
4194
- {"a"=>3, "b"=>5, "c"=>8}
4195
- ],
4196
- ["one", "two", "three"]
4197
- ]
4198
- )}
4199
- end
4200
-
4201
- context 'without index' do
4202
- let(:json) { df.to_json(true) }
4203
- it { is_expected.to eq(
4204
- [
4205
- {"a"=>1, "b"=>3, "c"=>6},
4206
- {"a"=>2, "b"=>4, "c"=>7},
4207
- {"a"=>3, "b"=>5, "c"=>8}
4208
- ]
4209
- )}
4210
- end
4211
- end
4212
-
4213
- context '#access_row_tuples_by_indexs' do
4214
- let(:df) {
4215
- DaruLite::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
4216
- let(:df_idx) {
4217
- DaruLite::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
4218
- }
4219
- let (:mi_idx) do
4220
- DaruLite::MultiIndex.from_tuples [
4221
- [:a,:one,:bar],
4222
- [:a,:one,:baz],
4223
- [:b,:two,:bar],
4224
- [:a,:two,:baz],
4225
- ]
4226
- end
4227
- let (:df_mi) do
4228
- DaruLite::DataFrame.new({
4229
- a: 1..4,
4230
- b: 'a'..'d'
4231
- }, index: mi_idx )
4232
- end
4233
- context 'when no index is given' do
4234
- it 'returns empty Array' do
4235
- expect(df.access_row_tuples_by_indexs()).to eq([])
4236
- end
4237
- end
4238
- context 'when index(s) are given' do
4239
- it 'returns Array of row tuples' do
4240
- expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
4241
- expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
4242
- end
4243
- end
4244
- context 'when custom index(s) are given' do
4245
- it 'returns Array of row tuples' do
4246
- expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
4247
- [[52, 1], [7, 3]]
4248
- )
4249
- end
4250
- end
4251
- context 'when multi index is given' do
4252
- it 'returns Array of row tuples' do
4253
- expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
4254
- [[1, "a"], [2, "b"], [4, "d"]]
4255
- )
4256
- expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
4257
- [[2, "b"]]
4258
- )
4259
- end
4260
- end
4261
- end
4262
-
4263
- context '#aggregate' do
4264
- let(:cat_idx) { DaruLite::CategoricalIndex.new [:a, :b, :a, :a, :c] }
4265
- let(:df) { DaruLite::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
4266
- let(:df_cat_idx) {
4267
- DaruLite::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx) }
4268
-
4269
- it 'lambda function on particular column' do
4270
- expect(df.aggregate(num_100_times: ->(df) { (df.num*100).first })).to eq(
4271
- DaruLite::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
4272
- )
4273
- end
4274
- it 'aggregate sum on particular column' do
4275
- expect(df_cat_idx.aggregate(num: :sum)).to eq(
4276
- DaruLite::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
4277
- )
4278
- end
4279
- end
4280
-
4281
- context '#group_by_and_aggregate' do
4282
- let(:spending_df) {
4283
- DaruLite::DataFrame.rows([
4284
- [2010, 'dev', 50, 1],
4285
- [2010, 'dev', 150, 1],
4286
- [2010, 'dev', 200, 1],
4287
- [2011, 'dev', 50, 1],
4288
- [2012, 'dev', 150, 1],
4289
-
4290
- [2011, 'office', 300, 1],
4291
-
4292
- [2010, 'market', 50, 1],
4293
- [2011, 'market', 500, 1],
4294
- [2012, 'market', 500, 1],
4295
- [2012, 'market', 300, 1],
4296
-
4297
- [2012, 'R&D', 10, 1],],
4298
- order: [:year, :category, :spending, :nb_spending])
4299
- }
4300
-
4301
- it 'works as group_by + aggregate' do
4302
- expect(spending_df.group_by_and_aggregate(:year, spending: :sum)).to eq(
4303
- spending_df.group_by(:year).aggregate(spending: :sum))
4304
- expect(spending_df.group_by_and_aggregate([:year, :category], spending: :sum, nb_spending: :size)).to eq(
4305
- spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :size))
4306
- end
4307
- end
4308
-
4309
- context '#create_sql' do
4310
- let(:df) { DaruLite::DataFrame.new({
4311
- a: [1,2,3],
4312
- b: ['test', 'me', 'please'],
4313
- c: ['2015-06-01', '2015-06-02', '2015-06-03']
4314
- },
4315
- name: 'test'
4316
- )}
4317
- subject { df.create_sql('foo') }
4318
- it { is_expected.to eq %Q{
4319
- |CREATE TABLE foo (a INTEGER,
4320
- | b VARCHAR (255),
4321
- | c DATE) CHARACTER SET=UTF8;
4322
- }.unindent}
4323
- end
4324
-
4325
1261
  context "#by_single_key" do
4326
1262
  let(:df) { DaruLite::DataFrame.new(a: [1, 2, 3], b: [4, 5, 6] ) }
4327
1263
 
@@ -4329,48 +1265,4 @@ describe DaruLite::DataFrame do
4329
1265
  expect { df[:c] }.to raise_error(IndexError, /Specified vector c does not exist/)
4330
1266
  end
4331
1267
  end
4332
-
4333
- context "#rotate_vectors" do
4334
- subject { df.rotate_vectors(-1) }
4335
-
4336
- context "several vectors in the dataframe" do
4337
- context 'all vector names are the same type' do
4338
- let(:df) do
4339
- DaruLite::DataFrame.new({
4340
- a: [1,2,3],
4341
- b: [4,5,6],
4342
- total: [5,7,9]
4343
- })
4344
- end
4345
- let(:new_order) { [:total, :a, :b] }
4346
-
4347
- it "return the dataframe with the position of the last vector change to first" do
4348
- expect(subject.vectors.to_a).to eq(new_order)
4349
- end
4350
- end
4351
-
4352
- context 'vector names are of different types' do
4353
- let(:df) do
4354
- DaruLite::DataFrame.new({
4355
- 'a' => [1,2,3],
4356
- b: [4,5,6],
4357
- 3 => [5,7,9]
4358
- })
4359
- end
4360
- let(:new_order) { [3, 'a', :b] }
4361
-
4362
- it "return the dataframe with the position of the last vector change to first" do
4363
- expect(subject.vectors.to_a).to eq(new_order)
4364
- end
4365
- end
4366
- end
4367
-
4368
- context "only one vector in the dataframe" do
4369
- let(:df) { DaruLite::DataFrame.new({ a: [1,2,3] }) }
4370
-
4371
- it "return the dataframe without any change" do
4372
- expect(subject).to eq(df)
4373
- end
4374
- end
4375
- end
4376
1268
  end if mri?