daru_lite 0.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. data/.github/workflows/ci.yml +20 -0
  5. data/.rubocop_todo.yml +35 -33
  6. data/README.md +19 -115
  7. data/daru_lite.gemspec +1 -0
  8. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  9. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  10. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  11. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  12. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  13. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  14. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  15. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  16. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  17. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  18. data/lib/daru_lite/data_frame/missable.rb +75 -0
  19. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  20. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  21. data/lib/daru_lite/data_frame/setable.rb +109 -0
  22. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  23. data/lib/daru_lite/dataframe.rb +142 -2355
  24. data/lib/daru_lite/index/index.rb +13 -0
  25. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  26. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  27. data/lib/daru_lite/vector/calculatable.rb +78 -0
  28. data/lib/daru_lite/vector/convertible.rb +77 -0
  29. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  30. data/lib/daru_lite/vector/fetchable.rb +175 -0
  31. data/lib/daru_lite/vector/filterable.rb +128 -0
  32. data/lib/daru_lite/vector/indexable.rb +77 -0
  33. data/lib/daru_lite/vector/iterable.rb +95 -0
  34. data/lib/daru_lite/vector/joinable.rb +17 -0
  35. data/lib/daru_lite/vector/missable.rb +124 -0
  36. data/lib/daru_lite/vector/queryable.rb +45 -0
  37. data/lib/daru_lite/vector/setable.rb +47 -0
  38. data/lib/daru_lite/vector/sortable.rb +113 -0
  39. data/lib/daru_lite/vector.rb +36 -932
  40. data/lib/daru_lite/version.rb +1 -1
  41. data/spec/data_frame/aggregatable_example.rb +65 -0
  42. data/spec/data_frame/buildable_example.rb +109 -0
  43. data/spec/data_frame/calculatable_example.rb +135 -0
  44. data/spec/data_frame/convertible_example.rb +180 -0
  45. data/spec/data_frame/duplicatable_example.rb +111 -0
  46. data/spec/data_frame/fetchable_example.rb +476 -0
  47. data/spec/data_frame/filterable_example.rb +250 -0
  48. data/spec/data_frame/indexable_example.rb +221 -0
  49. data/spec/data_frame/iterable_example.rb +465 -0
  50. data/spec/data_frame/joinable_example.rb +106 -0
  51. data/spec/data_frame/missable_example.rb +47 -0
  52. data/spec/data_frame/pivotable_example.rb +297 -0
  53. data/spec/data_frame/queryable_example.rb +92 -0
  54. data/spec/data_frame/setable_example.rb +482 -0
  55. data/spec/data_frame/sortable_example.rb +350 -0
  56. data/spec/dataframe_spec.rb +181 -3243
  57. data/spec/index/index_spec.rb +8 -0
  58. data/spec/vector/aggregatable_example.rb +27 -0
  59. data/spec/vector/calculatable_example.rb +82 -0
  60. data/spec/vector/convertible_example.rb +126 -0
  61. data/spec/vector/duplicatable_example.rb +48 -0
  62. data/spec/vector/fetchable_example.rb +463 -0
  63. data/spec/vector/filterable_example.rb +165 -0
  64. data/spec/vector/indexable_example.rb +201 -0
  65. data/spec/vector/iterable_example.rb +111 -0
  66. data/spec/vector/joinable_example.rb +25 -0
  67. data/spec/vector/missable_example.rb +88 -0
  68. data/spec/vector/queryable_example.rb +91 -0
  69. data/spec/vector/setable_example.rb +300 -0
  70. data/spec/vector/sortable_example.rb +242 -0
  71. data/spec/vector_spec.rb +111 -1805
  72. metadata +102 -3
  73. data/.github/ISSUE_TEMPLATE.md +0 -18
@@ -1,9 +1,37 @@
1
+ require 'data_frame/aggregatable_example'
2
+ require 'data_frame/buildable_example'
3
+ require 'data_frame/calculatable_example'
4
+ require 'data_frame/convertible_example'
5
+ require 'data_frame/duplicatable_example'
6
+ require 'data_frame/fetchable_example'
7
+ require 'data_frame/filterable_example'
8
+ require 'data_frame/indexable_example'
9
+ require 'data_frame/iterable_example'
10
+ require 'data_frame/joinable_example'
11
+ require 'data_frame/missable_example'
12
+ require 'data_frame/pivotable_example'
13
+ require 'data_frame/queryable_example'
14
+ require 'data_frame/setable_example'
15
+ require 'data_frame/sortable_example'
16
+
1
17
  describe DaruLite::DataFrame do
2
- before :each do
3
- @data_frame = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
4
- c: [11,22,33,44,55]},
18
+ let(:df) do
19
+ DaruLite::DataFrame.new(
20
+ { b: [11,12,13,14,15], a: [1,2,3,4,5], c: [11,22,33,44,55] },
5
21
  order: [:a, :b, :c],
6
- index: [:one, :two, :three, :four, :five])
22
+ index: [:one, :two, :three, :four, :five]
23
+ )
24
+ end
25
+ let(:df_mi) do
26
+ DaruLite::DataFrame.new(
27
+ [vector_arry1, vector_arry2, vector_arry1, vector_arry2],
28
+ order: order_mi,
29
+ index: multi_index
30
+ )
31
+ end
32
+ let(:vector_arry1) { [11,12,13,14,11,12,13,14,11,12,13,14] }
33
+ let(:vector_arry2) { [1,2,3,4,1,2,3,4,1,2,3,4] }
34
+ let(:multi_index) do
7
35
  tuples = [
8
36
  [:a,:one,:bar],
9
37
  [:a,:one,:baz],
@@ -18,134 +46,36 @@ describe DaruLite::DataFrame do
18
46
  [:c,:two,:foo],
19
47
  [:c,:two,:bar]
20
48
  ]
21
- @multi_index = DaruLite::MultiIndex.from_tuples(tuples)
22
-
23
- @vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14]
24
- @vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4]
25
-
26
- @order_mi = DaruLite::MultiIndex.from_tuples([
27
- [:a,:one,:bar],
28
- [:a,:two,:baz],
29
- [:b,:two,:foo],
30
- [:b,:one,:foo]])
31
-
32
- @df_mi = DaruLite::DataFrame.new([
33
- @vector_arry1,
34
- @vector_arry2,
35
- @vector_arry1,
36
- @vector_arry2], order: @order_mi, index: @multi_index)
49
+ DaruLite::MultiIndex.from_tuples(tuples)
37
50
  end
38
-
39
- context ".rows" do
40
- before do
41
- @rows = [
42
- [1,2,3,4,5],
43
- [1,2,3,4,5],
44
- [1,2,3,4,5],
45
- [1,2,3,4,5]
51
+ let(:order_mi) do
52
+ DaruLite::MultiIndex.from_tuples(
53
+ [
54
+ [:a,:one,:bar],
55
+ [:a,:two,:baz],
56
+ [:b,:two,:foo],
57
+ [:b,:one,:foo]
46
58
  ]
47
- end
48
-
49
- context DaruLite::Index do
50
- it "creates a DataFrame from Array rows" do
51
- df = DaruLite::DataFrame.rows @rows, order: [:a,:b,:c,:d,:e]
52
-
53
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
54
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
55
- expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
56
- end
57
-
58
- it "creates empty dataframe" do
59
- df = DaruLite::DataFrame.rows [], order: [:a, :b, :c]
60
-
61
- expect(df.vectors).to eq(DaruLite::Index.new [:a,:b,:c])
62
- expect(df.index).to be_empty
63
- end
64
-
65
- it "creates a DataFrame from Vector rows" do
66
- rows = @rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
67
-
68
- df = DaruLite::DataFrame.rows rows, order: [:a,:b,:c,:d,:e]
69
-
70
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
71
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
72
- expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
73
- end
74
-
75
- it 'derives index & order from arrays' do
76
- df = DaruLite::DataFrame.rows @rows
77
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
78
- expect(df.vectors) .to eq(DaruLite::Index.new %w[0 1 2 3 4])
79
- end
80
-
81
- it 'derives index & order from vectors' do
82
- rows = @rows.zip(%w[w x y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
83
- df = DaruLite::DataFrame.rows rows
84
- expect(df.index) .to eq(DaruLite::Index.new %w[w x y z])
85
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
86
- end
87
-
88
- it 'behaves, when rows are repeated' do
89
- rows = @rows.zip(%w[w w y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
90
- df = DaruLite::DataFrame.rows rows
91
- expect(df.index) .to eq(DaruLite::Index.new %w[w_1 w_2 y z])
92
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
93
- end
94
-
95
- it 'behaves, when vectors are unnamed' do
96
- rows = @rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
97
- df = DaruLite::DataFrame.rows rows
98
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
99
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
100
- end
101
- end
102
-
103
- context DaruLite::MultiIndex do
104
- it "creates a DataFrame from rows" do
105
- df = DaruLite::DataFrame.rows(
106
- @rows*3, index: @multi_index, order: [:a,:b,:c,:d,:e])
107
-
108
- expect(df.index) .to eq(@multi_index)
109
- expect(df.vectors) .to eq(DaruLite::Index.new([:a,:b,:c,:d,:e]))
110
- expect(df[:a]).to eq(DaruLite::Vector.new([1]*12, index: @multi_index))
111
- end
112
-
113
- it "crates a DataFrame from rows (MultiIndex order)" do
114
- rows = [
115
- [11, 1, 11, 1],
116
- [12, 2, 12, 2],
117
- [13, 3, 13, 3],
118
- [14, 4, 14, 4]
119
- ]
120
- index = DaruLite::MultiIndex.from_tuples([
121
- [:one,:bar],
122
- [:one,:baz],
123
- [:two,:foo],
124
- [:two,:bar]
125
- ])
126
-
127
- df = DaruLite::DataFrame.rows(rows, index: index, order: @order_mi)
128
- expect(df.index) .to eq(index)
129
- expect(df.vectors).to eq(@order_mi)
130
- expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([11,12,13,14],
131
- index: index))
132
- end
133
-
134
- it "creates a DataFrame from Vector rows" do
135
- rows = @rows*3
136
- rows.map! { |r| DaruLite::Vector.new(r, index: @multi_index) }
137
-
138
- df = DaruLite::DataFrame.rows rows, order: @multi_index
139
-
140
- expect(df.index).to eq(DaruLite::Index.new(Array.new(rows.size) { |i| i }))
141
- expect(df.vectors).to eq(@multi_index)
142
- expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new([1]*12))
143
- end
144
- end
145
- end
59
+ )
60
+ end
61
+
62
+ it_behaves_like 'an aggregatable DataFrame'
63
+ it_behaves_like 'a buildable DataFrame'
64
+ it_behaves_like 'a calculatable DataFrame'
65
+ it_behaves_like 'a convertible DataFrame'
66
+ it_behaves_like 'a duplicatable DataFrame'
67
+ it_behaves_like 'a fetchable DataFrame'
68
+ it_behaves_like 'a filterable DataFrame'
69
+ it_behaves_like 'an indexable DataFrame'
70
+ it_behaves_like 'an iterable DataFrame'
71
+ it_behaves_like 'a joinable DataFrame'
72
+ it_behaves_like 'a missable DataFrame'
73
+ it_behaves_like 'a pivotable DataFrame'
74
+ it_behaves_like 'a queryable DataFrame'
75
+ it_behaves_like 'a setable DataFrame'
76
+ it_behaves_like 'a sortable DataFrame'
146
77
 
147
78
  context "#initialize" do
148
-
149
79
  it "initializes an empty DataFrame with no arguments" do
150
80
  df = DaruLite::DataFrame.new
151
81
  expect(df.nrows).to eq(0)
@@ -370,24 +300,24 @@ describe DaruLite::DataFrame do
370
300
 
371
301
  context DaruLite::MultiIndex do
372
302
  it "creates empty DataFrame" do
373
- df = DaruLite::DataFrame.new({}, order: @order_mi)
303
+ df = DaruLite::DataFrame.new({}, order: order_mi)
374
304
 
375
- expect(df.vectors).to eq(@order_mi)
305
+ expect(df.vectors).to eq(order_mi)
376
306
  expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([]))
377
307
  end
378
308
 
379
309
  it "creates from Hash" do
380
310
  df = DaruLite::DataFrame.new({
381
- [:a,:one,:bar] => @vector_arry1,
382
- [:a,:two,:baz] => @vector_arry2,
383
- [:b,:one,:foo] => @vector_arry1,
384
- [:b,:two,:foo] => @vector_arry2
385
- }, order: @order_mi, index: @multi_index)
386
-
387
- expect(df.index) .to eq(@multi_index)
388
- expect(df.vectors) .to eq(@order_mi)
389
- expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new(@vector_arry1,
390
- index: @multi_index))
311
+ [:a,:one,:bar] => vector_arry1,
312
+ [:a,:two,:baz] => vector_arry2,
313
+ [:b,:one,:foo] => vector_arry1,
314
+ [:b,:two,:foo] => vector_arry2
315
+ }, order: order_mi, index: multi_index)
316
+
317
+ expect(df.index) .to eq(multi_index)
318
+ expect(df.vectors) .to eq(order_mi)
319
+ expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new(vector_arry1,
320
+ index: multi_index))
391
321
  end
392
322
 
393
323
  it "creates from Array of Hashes" do
@@ -395,25 +325,25 @@ describe DaruLite::DataFrame do
395
325
  end
396
326
 
397
327
  it "creates from Array of Arrays" do
398
- df = DaruLite::DataFrame.new([@vector_arry1, @vector_arry2, @vector_arry1,
399
- @vector_arry2], index: @multi_index, order: @order_mi)
328
+ df = DaruLite::DataFrame.new([vector_arry1, vector_arry2, vector_arry1,
329
+ vector_arry2], index: multi_index, order: order_mi)
400
330
 
401
- expect(df.index) .to eq(@multi_index)
402
- expect(df.vectors).to eq(@order_mi)
403
- expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new(@vector_arry1,
404
- index: @multi_index))
331
+ expect(df.index) .to eq(multi_index)
332
+ expect(df.vectors).to eq(order_mi)
333
+ expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new(vector_arry1,
334
+ index: multi_index))
405
335
  end
406
336
 
407
337
  it "raises error for order MultiIndex of different size than supplied Array" do
408
338
  expect {
409
- df = DaruLite::DataFrame.new([@vector_arry1, @vector_arry2], order: @order_mi,
410
- index: @multi_index)
339
+ df = DaruLite::DataFrame.new([vector_arry1, vector_arry2], order: order_mi,
340
+ index: multi_index)
411
341
  }.to raise_error
412
342
  end
413
343
 
414
344
  it "aligns MultiIndexes properly" do
415
345
  pending
416
- mi_a = @order_mi
346
+ mi_a = order_mi
417
347
  mi_b = DaruLite::MultiIndex.from_tuples([
418
348
  [:b,:one,:foo],
419
349
  [:a,:one,:bar],
@@ -450,215 +380,6 @@ describe DaruLite::DataFrame do
450
380
  end
451
381
  end
452
382
 
453
- context "#[]" do
454
- context DaruLite::Index do
455
- before :each do
456
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
457
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
458
- index: [:one, :two, :three, :four, :five])
459
- end
460
-
461
- it "returns a Vector" do
462
- expect(@df[:a]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
463
- end
464
-
465
- it "returns a Vector by default" do
466
- expect(@df[:a]).to eq(DaruLite::Vector.new([1,2,3,4,5], name: :a,
467
- index: [:one, :two, :three, :four, :five]))
468
- end
469
-
470
- it "returns a DataFrame" do
471
- temp = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
472
- order: [:a, :b], index: [:one, :two, :three, :four, :five])
473
-
474
- expect(@df[:a, :b]).to eq(temp)
475
- end
476
-
477
- it "accesses vector with Integer index" do
478
- expect(@df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
479
- end
480
-
481
- it "returns a subset of DataFrame when specified range" do
482
- subset = @df[:b..:c]
483
- expect(subset).to eq(DaruLite::DataFrame.new({
484
- b: [11,12,13,14,15],
485
- c: [11,22,33,44,55]
486
- }, index: [:one, :two, :three, :four, :five]))
487
- end
488
-
489
- it 'accepts axis parameter as a last argument' do
490
- expect(@df[:a, :vector]).to eq @df[:a]
491
- expect(@df[:one, :row]).to eq [1, 11, 11].dv(:one, [:a, :b, :c])
492
- end
493
- end
494
-
495
- context DaruLite::MultiIndex do
496
- it "accesses vector with an integer index" do
497
- expect(@df_mi[0]).to eq(
498
- DaruLite::Vector.new(@vector_arry1, index: @multi_index))
499
- end
500
-
501
- it "returns a vector when specifying full tuple" do
502
- expect(@df_mi[:a, :one, :bar]).to eq(
503
- DaruLite::Vector.new(@vector_arry1, index: @multi_index))
504
- end
505
-
506
- it "returns DataFrame when specified first layer of MultiIndex" do
507
- sub_order = DaruLite::MultiIndex.from_tuples([
508
- [:one, :bar],
509
- [:two, :baz]
510
- ])
511
- expect(@df_mi[:a]).to eq(DaruLite::DataFrame.new([
512
- @vector_arry1,
513
- @vector_arry2
514
- ], index: @multi_index, order: sub_order))
515
- end
516
-
517
- it "returns a Vector if the last level of MultiIndex is tracked" do
518
- expect(@df_mi[:a, :one, :bar]).to eq(
519
- DaruLite::Vector.new(@vector_arry1, index: @multi_index))
520
- end
521
- end
522
- end
523
-
524
- context "#[]=" do
525
- context DaruLite::Index do
526
- before :each do
527
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
528
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
529
- index: [:one, :two, :three, :four, :five])
530
- end
531
-
532
- it "assigns directly with the []= operator" do
533
- @data_frame[:a] = [100,200,300,400,500]
534
- expect(@data_frame).to eq(DaruLite::DataFrame.new({
535
- b: [11,12,13,14,15],
536
- a: [100,200,300,400,500],
537
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
538
- index: [:one, :two, :three, :four, :five]))
539
- end
540
-
541
- it "assigns new vector with default length if given just a value" do
542
- @df[:d] = 1.0
543
- expect(@df[:d]).to eq(DaruLite::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
544
- index: [:one, :two, :three, :four, :five], name: :d))
545
- end
546
-
547
- it "updates vector with default length if given just a value" do
548
- @df[:c] = 1.0
549
- expect(@df[:c]).to eq(DaruLite::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
550
- index: [:one, :two, :three, :four, :five], name: :c))
551
- end
552
-
553
- it "appends an Array as a DaruLite::Vector" do
554
- @df[:d] = [69,99,108,85,49]
555
-
556
- expect(@df.d.class).to eq(DaruLite::Vector)
557
- end
558
-
559
- it "appends an arbitrary enumerable as a DaruLite::Vector" do
560
- @df[:d] = Set.new([69,99,108,85,49])
561
-
562
- expect(@df[:d]).to eq(DaruLite::Vector.new([69, 99, 108, 85, 49],
563
- index: [:one, :two, :three, :four, :five], name: :c))
564
- end
565
-
566
- it "replaces an already present vector" do
567
- @df[:a] = [69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five])
568
-
569
- expect(@df.a).to eq([69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five]))
570
- end
571
-
572
- it "appends a new vector to the DataFrame" do
573
- @df[:woo] = [69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five])
574
-
575
- expect(@df.vectors).to eq([:a, :b, :c, :woo].to_index)
576
- end
577
-
578
- it "creates an index for the new vector if not specified" do
579
- @df[:woo] = [69,99,108,85,49]
580
-
581
- expect(@df.woo.index).to eq([:one, :two, :three, :four, :five].to_index)
582
- end
583
-
584
- it "matches index of vector to be inserted with the DataFrame index" do
585
- @df[:shankar] = [69,99,108,85,49].dv(:shankar, [:two, :one, :three, :five, :four])
586
-
587
- expect(@df.shankar).to eq([99,69,108,49,85].dv(:shankar,
588
- [:one, :two, :three, :four, :five]))
589
- end
590
-
591
- it "matches index of vector to be inserted, inserting nils where no match found" do
592
- @df[:shankar] = [1,2,3].dv(:shankar, [:one, :james, :hetfield])
593
-
594
- expect(@df.shankar).to eq([1,nil,nil,nil,nil].dv(:shankar, [:one, :two, :three, :four, :five]))
595
- end
596
-
597
- it "raises error for Array assignment of wrong length" do
598
- expect{
599
- @df[:shiva] = [1,2,3]
600
- }.to raise_error
601
- end
602
-
603
- it "assigns correct name given empty dataframe" do
604
- df_empty = DaruLite::DataFrame.new({})
605
- df_empty[:a] = 1..5
606
- df_empty[:b] = 1..5
607
-
608
- expect(df_empty[:a].name).to equal(:a)
609
- expect(df_empty[:b].name).to equal(:b)
610
- end
611
-
612
- it "appends multiple vectors at a time" do
613
- # TODO
614
- end
615
- end
616
-
617
- context DaruLite::MultiIndex do
618
- it "raises error when incomplete index specified but index is absent" do
619
- expect {
620
- @df_mi[:d] = [100,200,300,400,100,200,300,400,100,200,300,400]
621
- }.to raise_error
622
- end
623
-
624
- it "assigns all sub-indexes when a top level index is specified" do
625
- @df_mi[:a] = [100,200,300,400,100,200,300,400,100,200,300,400]
626
-
627
- expect(@df_mi).to eq(DaruLite::DataFrame.new([
628
- [100,200,300,400,100,200,300,400,100,200,300,400],
629
- [100,200,300,400,100,200,300,400,100,200,300,400],
630
- @vector_arry1,
631
- @vector_arry2], index: @multi_index, order: @order_mi))
632
- end
633
-
634
- it "creates a new vector when full index specfied" do
635
- order = DaruLite::MultiIndex.from_tuples([
636
- [:a,:one,:bar],
637
- [:a,:two,:baz],
638
- [:b,:two,:foo],
639
- [:b,:one,:foo],
640
- [:c,:one,:bar]])
641
- answer = DaruLite::DataFrame.new([
642
- @vector_arry1,
643
- @vector_arry2,
644
- @vector_arry1,
645
- @vector_arry2,
646
- [100,200,300,400,100,200,300,400,100,200,300,400]
647
- ], index: @multi_index, order: order)
648
- @df_mi[:c,:one,:bar] = [100,200,300,400,100,200,300,400,100,200,300,400]
649
-
650
- expect(@df_mi).to eq(answer)
651
- end
652
-
653
- it "assigns correct name given empty dataframe" do
654
- df_empty = DaruLite::DataFrame.new([], index: @multi_index, order: @order_mi)
655
- df_empty[:c, :one, :bar] = 1..12
656
-
657
- expect(df_empty[:c, :one, :bar].name).to eq "conebar"
658
- end
659
- end
660
- end
661
-
662
383
  context '#method_missing' do
663
384
  let(:df) { DaruLite::DataFrame.new({
664
385
  :a => [1, 2, 3, 4, 5],
@@ -720,197 +441,6 @@ describe DaruLite::DataFrame do
720
441
  end
721
442
  end
722
443
 
723
- context '#add_vector' do
724
- subject(:data_frame) {
725
- DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
726
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
727
- index: [:one, :two, :three, :four, :five])
728
- }
729
- before {
730
- data_frame.add_vector :a, [100,200,300,400,500]
731
- }
732
-
733
- it { is_expected.to eq(DaruLite::DataFrame.new({
734
- b: [11,12,13,14,15],
735
- a: [100,200,300,400,500],
736
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
737
- index: [:one, :two, :three, :four, :five]))
738
- }
739
- end
740
-
741
- context "#insert_vector" do
742
- subject(:data_frame) {
743
- DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
744
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
745
- index: [:one, :two, :three, :four, :five])
746
- }
747
-
748
- it "insert a new vector at the desired slot" do
749
- df = DaruLite::DataFrame.new({
750
- a: [1,2,3,4,5],
751
- d: [710, 720, 730, 740, 750],
752
- b: [11, 12, 13, 14, 15],
753
- c: [11,22,33,44,55]}, order: [:a, :d, :b, :c],
754
- index: [:one, :two, :three, :four, :five]
755
- )
756
- data_frame.insert_vector 1, :d, [710, 720, 730, 740, 750]
757
- expect(subject).to eq df
758
- end
759
-
760
- it "raises error for data array being too big" do
761
- expect {
762
- source = (1..8).to_a
763
- data_frame.insert_vector 1, :d, source
764
- }.to raise_error(IndexError)
765
- end
766
-
767
- it "raises error for invalid index value" do
768
- expect {
769
- source = (1..5).to_a
770
- data_frame.insert_vector 4, :d, source
771
- }.to raise_error(ArgumentError)
772
- end
773
-
774
- it "raises error for invalid source type" do
775
- expect {
776
- source = 14
777
- data_frame.insert_vector 3, :d, source
778
- }.to raise_error(ArgumentError)
779
- end
780
- end
781
-
782
- context "#row[]=" do
783
- context DaruLite::Index do
784
- before :each do
785
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
786
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
787
- index: [:one, :two, :three, :four, :five])
788
- end
789
-
790
- it "assigns specified row when Array" do
791
- @df.row[:one] = [49, 99, 59]
792
-
793
- expect(@df.row[:one]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
794
- expect(@df.row[:one].index).to eq([:a, :b, :c].to_index)
795
- expect(@df.row[:one].name) .to eq(:one)
796
- end
797
-
798
- it "assigns specified row when DV" do
799
- @df.row[:one] = [49, 99, 59].dv(nil, [:a, :b, :c])
800
-
801
- expect(@df.row[:one]).to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
802
- end
803
-
804
- it "assigns correct elements when Vector of different index" do
805
- @df.row[:one] = DaruLite::Vector.new([44,62,11], index: [:b,:f,:a])
806
-
807
- expect(@df.row[:one]).to eq(DaruLite::Vector.new([11,44,nil], index: [:a,:b,:c]))
808
- end
809
-
810
- it "creates a new row from an Array" do
811
- @df.row[:patekar] = [9,2,11]
812
-
813
- expect(@df.row[:patekar]).to eq([9,2,11].dv(:patekar, [:a, :b, :c]))
814
- end
815
-
816
- it "creates a new row from a DV" do
817
- @df.row[:patekar] = [9,2,11].dv(nil, [:a, :b, :c])
818
-
819
- expect(@df.row[:patekar]).to eq([9,2,11].dv(:patekar, [:a, :b, :c]))
820
- end
821
-
822
- it "creates a new row from numeric row index and named DV" do
823
- @df.row[2] = [9,2,11].dv(nil, [:a, :b, :c])
824
-
825
- expect(@df.row[2]).to eq([9,2,11].dv(nil, [:a, :b, :c]))
826
- end
827
-
828
- it "correctly aligns assigned DV by index" do
829
- @df.row[:two] = [9,2,11].dv(nil, [:b, :a, :c])
830
-
831
- expect(@df.row[:two]).to eq([2,9,11].dv(:two, [:a, :b, :c]))
832
- end
833
-
834
- it "correctlu aligns assinged DV by index for new rows" do
835
- @df.row[:latest] = DaruLite::Vector.new([2,3,1], index: [:b,:c,:a])
836
-
837
- expect(@df.row[:latest]).to eq(DaruLite::Vector.new([1,2,3], index: [:a,:b,:c]))
838
- end
839
-
840
- it "inserts nils for indexes that dont exist in the DataFrame" do
841
- @df.row[:two] = [49, 99, 59].dv(nil, [:oo, :aah, :gaah])
842
-
843
- expect(@df.row[:two]).to eq([nil,nil,nil].dv(nil, [:a, :b, :c]))
844
- end
845
-
846
- it "correctly inserts row of a different length by matching indexes" do
847
- @df.row[:four] = [5,4,3,2,1,3].dv(nil, [:you, :have, :a, :big, :appetite, :spock])
848
-
849
- expect(@df.row[:four]).to eq([3,nil,nil].dv(:four, [:a, :b, :c]))
850
- end
851
-
852
- it "raises error for row insertion by Array of wrong length" do
853
- expect{
854
- @df.row[:one] = [1,2,3,4,5,6,7]
855
- }.to raise_error
856
- end
857
- end
858
-
859
- context DaruLite::MultiIndex do
860
- pending
861
- # TO DO
862
- end
863
-
864
- context DaruLite::CategoricalIndex do
865
- let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
866
- let(:df) do
867
- DaruLite::DataFrame.new({
868
- a: 'a'..'e',
869
- b: 1..5
870
- }, index: idx)
871
- end
872
-
873
- context "modify exiting row" do
874
- context "single category" do
875
- subject { df }
876
- before { df.row[:a] = ['x', 'y'] }
877
-
878
- it { is_expected.to be_a DaruLite::DataFrame }
879
- its(:index) { is_expected.to eq idx }
880
- its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
881
- its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
882
- its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
883
- end
884
-
885
- context "multiple categories" do
886
- subject { df }
887
- before { df.row[:a, 1] = ['x', 'y'] }
888
-
889
- it { is_expected.to be_a DaruLite::DataFrame }
890
- its(:index) { is_expected.to eq idx }
891
- its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
892
- its(:'a.to_a') { is_expected.to eq ['x', 'x', 'x', 'x', 'e'] }
893
- its(:'b.to_a') { is_expected.to eq ['y', 'y', 'y', 'y', 5] }
894
- end
895
-
896
- context "positional index" do
897
- subject { df }
898
- before { df.row[0, 2] = ['x', 'y'] }
899
-
900
- it { is_expected.to be_a DaruLite::DataFrame }
901
- its(:index) { is_expected.to eq idx }
902
- its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
903
- its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
904
- its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
905
- end
906
- end
907
-
908
- context "add new row" do
909
- # TODO
910
- end
911
- end
912
- end
913
-
914
444
  context "#row.at" do
915
445
  context DaruLite::Index do
916
446
  let(:idx) { DaruLite::Index.new [1, 0, :c] }
@@ -1139,311 +669,21 @@ describe DaruLite::DataFrame do
1139
669
  end
1140
670
  end
1141
671
 
1142
- context "#row.set_at" do
1143
- let(:df) do
1144
- DaruLite::DataFrame.new({
1145
- a: 1..3,
1146
- b: 'a'..'c'
1147
- })
1148
- end
672
+ context "#row[]" do
673
+ context DaruLite::Index do
674
+ before :each do
675
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
676
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
677
+ index: [:one, :two, :three, :four, :five])
678
+ end
1149
679
 
1150
- context "single position" do
1151
- subject { df }
1152
- before { df.row.set_at [1], ['x', 'y'] }
680
+ it "creates an index for assignment if not already specified" do
681
+ @df.row[:one] = [49, 99, 59]
1153
682
 
1154
- its(:size) { is_expected.to eq 3 }
1155
- its(:'a.to_a') { is_expected.to eq [1, 'x', 3] }
1156
- its(:'b.to_a') { is_expected.to eq ['a', 'y', 'c'] }
1157
- end
1158
-
1159
- context "multiple position" do
1160
- subject { df }
1161
- before { df.row.set_at [0, 2], ['x', 'y'] }
1162
-
1163
- its(:size) { is_expected.to eq 3 }
1164
- its(:'a.to_a') { is_expected.to eq ['x', 2, 'x'] }
1165
- its(:'b.to_a') { is_expected.to eq ['y', 'b', 'y'] }
1166
- end
1167
-
1168
- context "invalid position" do
1169
- it { expect { df.row.set_at [3], ['x', 'y'] }.to raise_error IndexError }
1170
- end
1171
-
1172
- context "invalid positions" do
1173
- it { expect { df.row.set_at [2, 3], ['x', 'y'] }.to raise_error IndexError }
1174
- end
1175
-
1176
- context "incorrect size" do
1177
- it { expect { df.row.set_at [1], ['x', 'y', 'z'] }.to raise_error SizeError }
1178
- end
1179
- end
1180
-
1181
- context "#at" do
1182
- context DaruLite::Index do
1183
- let(:idx) { DaruLite::Index.new [:a, :b, :c] }
1184
- let(:df) do
1185
- DaruLite::DataFrame.new({
1186
- 1 => 1..3,
1187
- a: 'a'..'c',
1188
- b: 11..13
1189
- }, index: idx)
1190
- end
1191
-
1192
- context "single position" do
1193
- subject { df.at 1 }
1194
-
1195
- it { is_expected.to be_a DaruLite::Vector }
1196
- its(:size) { is_expected.to eq 3 }
1197
- its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1198
- its(:index) { is_expected.to eq idx }
1199
- end
1200
-
1201
- context "multiple positions" do
1202
- subject { df.at 0, 2 }
1203
-
1204
- it { is_expected.to be_a DaruLite::DataFrame }
1205
- its(:shape) { is_expected.to eq [3, 2] }
1206
- its(:index) { is_expected.to eq idx }
1207
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1208
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1209
- end
1210
-
1211
- context "single invalid position" do
1212
- it { expect { df. at 3 }.to raise_error IndexError }
1213
- end
1214
-
1215
- context "multiple invalid positions" do
1216
- it { expect { df.at 2, 3 }.to raise_error IndexError }
1217
- end
1218
-
1219
- context "range" do
1220
- subject { df.at 0..1 }
1221
-
1222
- it { is_expected.to be_a DaruLite::DataFrame }
1223
- its(:shape) { is_expected.to eq [3, 2] }
1224
- its(:index) { is_expected.to eq idx }
1225
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1226
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1227
- end
1228
-
1229
- context "range with negative end" do
1230
- subject { df.at 0..-2 }
1231
-
1232
- it { is_expected.to be_a DaruLite::DataFrame }
1233
- its(:shape) { is_expected.to eq [3, 2] }
1234
- its(:index) { is_expected.to eq idx }
1235
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1236
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1237
- end
1238
-
1239
- context "range with single element" do
1240
- subject { df.at 1..1 }
1241
-
1242
- it { is_expected.to be_a DaruLite::DataFrame }
1243
- its(:shape) { is_expected.to eq [3, 1] }
1244
- its(:index) { is_expected.to eq idx }
1245
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1246
- end
1247
- end
1248
-
1249
- context DaruLite::MultiIndex do
1250
- let (:idx) do
1251
- DaruLite::MultiIndex.from_tuples [
1252
- [:a,:one,:bar],
1253
- [:a,:one,:baz],
1254
- [:b,:two,:bar],
1255
- ]
1256
- end
1257
- let(:df) do
1258
- DaruLite::DataFrame.new({
1259
- 1 => 1..3,
1260
- a: 'a'..'c',
1261
- b: 11..13
1262
- }, index: idx)
1263
- end
1264
-
1265
- context "single position" do
1266
- subject { df.at 1 }
1267
-
1268
- it { is_expected.to be_a DaruLite::Vector }
1269
- its(:size) { is_expected.to eq 3 }
1270
- its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1271
- its(:index) { is_expected.to eq idx }
1272
- end
1273
-
1274
- context "multiple positions" do
1275
- subject { df.at 0, 2 }
1276
-
1277
- it { is_expected.to be_a DaruLite::DataFrame }
1278
- its(:shape) { is_expected.to eq [3, 2] }
1279
- its(:index) { is_expected.to eq idx }
1280
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1281
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1282
- end
1283
-
1284
- context "single invalid position" do
1285
- it { expect { df. at 3 }.to raise_error IndexError }
1286
- end
1287
-
1288
- context "multiple invalid positions" do
1289
- it { expect { df.at 2, 3 }.to raise_error IndexError }
1290
- end
1291
-
1292
- context "range" do
1293
- subject { df.at 0..1 }
1294
-
1295
- it { is_expected.to be_a DaruLite::DataFrame }
1296
- its(:shape) { is_expected.to eq [3, 2] }
1297
- its(:index) { is_expected.to eq idx }
1298
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1299
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1300
- end
1301
-
1302
- context "range with negative end" do
1303
- subject { df.at 0..-2 }
1304
-
1305
- it { is_expected.to be_a DaruLite::DataFrame }
1306
- its(:shape) { is_expected.to eq [3, 2] }
1307
- its(:index) { is_expected.to eq idx }
1308
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1309
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1310
- end
1311
-
1312
- context "range with single element" do
1313
- subject { df.at 1..1 }
1314
-
1315
- it { is_expected.to be_a DaruLite::DataFrame }
1316
- its(:shape) { is_expected.to eq [3, 1] }
1317
- its(:index) { is_expected.to eq idx }
1318
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1319
- end
1320
- end
1321
-
1322
- context DaruLite::CategoricalIndex do
1323
- let (:idx) { DaruLite::CategoricalIndex.new [:a, 1, 1] }
1324
- let(:df) do
1325
- DaruLite::DataFrame.new({
1326
- 1 => 1..3,
1327
- a: 'a'..'c',
1328
- b: 11..13
1329
- }, index: idx)
1330
- end
1331
-
1332
- context "single position" do
1333
- subject { df.at 1 }
1334
-
1335
- it { is_expected.to be_a DaruLite::Vector }
1336
- its(:size) { is_expected.to eq 3 }
1337
- its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1338
- its(:index) { is_expected.to eq idx }
1339
- end
1340
-
1341
- context "multiple positions" do
1342
- subject { df.at 0, 2 }
1343
-
1344
- it { is_expected.to be_a DaruLite::DataFrame }
1345
- its(:shape) { is_expected.to eq [3, 2] }
1346
- its(:index) { is_expected.to eq idx }
1347
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1348
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1349
- end
1350
-
1351
- context "single invalid position" do
1352
- it { expect { df. at 3 }.to raise_error IndexError }
1353
- end
1354
-
1355
- context "multiple invalid positions" do
1356
- it { expect { df.at 2, 3 }.to raise_error IndexError }
1357
- end
1358
-
1359
- context "range" do
1360
- subject { df.at 0..1 }
1361
-
1362
- it { is_expected.to be_a DaruLite::DataFrame }
1363
- its(:shape) { is_expected.to eq [3, 2] }
1364
- its(:index) { is_expected.to eq idx }
1365
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1366
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1367
- end
1368
-
1369
- context "range with negative index" do
1370
- subject { df.at 0..-2 }
1371
-
1372
- it { is_expected.to be_a DaruLite::DataFrame }
1373
- its(:shape) { is_expected.to eq [3, 2] }
1374
- its(:index) { is_expected.to eq idx }
1375
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1376
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1377
- end
1378
-
1379
- context "range with single element" do
1380
- subject { df.at 1..1 }
1381
-
1382
- it { is_expected.to be_a DaruLite::DataFrame }
1383
- its(:shape) { is_expected.to eq [3, 1] }
1384
- its(:index) { is_expected.to eq idx }
1385
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1386
- end
1387
- end
1388
- end
1389
-
1390
- context "#set_at" do
1391
- let(:df) do
1392
- DaruLite::DataFrame.new({
1393
- 1 => 1..3,
1394
- a: 'a'..'c',
1395
- b: 11..13
1396
- })
1397
- end
1398
-
1399
- context "single position" do
1400
- subject { df }
1401
- before { df.set_at [1], ['x', 'y', 'z'] }
1402
-
1403
- its(:shape) { is_expected.to eq [3, 3] }
1404
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1405
- its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1406
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1407
- end
1408
-
1409
- context "multiple position" do
1410
- subject { df }
1411
- before { df.set_at [1, 2], ['x', 'y', 'z'] }
1412
-
1413
- its(:shape) { is_expected.to eq [3, 3] }
1414
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1415
- its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1416
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1417
- end
1418
-
1419
- context "invalid position" do
1420
- it { expect { df.set_at [3], ['x', 'y', 'z'] }.to raise_error IndexError }
1421
- end
1422
-
1423
- context "invalid positions" do
1424
- it { expect { df.set_at [2, 3], ['x', 'y', 'z'] }.to raise_error IndexError }
1425
- end
1426
-
1427
- context "incorrect size" do
1428
- it { expect { df.set_at [1], ['x', 'y'] }.to raise_error SizeError }
1429
- end
1430
- end
1431
-
1432
- context "#row[]" do
1433
- context DaruLite::Index do
1434
- before :each do
1435
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1436
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
1437
- index: [:one, :two, :three, :four, :five])
1438
- end
1439
-
1440
- it "creates an index for assignment if not already specified" do
1441
- @df.row[:one] = [49, 99, 59]
1442
-
1443
- expect(@df[:one, :row]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
1444
- expect(@df[:one, :row].index).to eq([:a, :b, :c].to_index)
1445
- expect(@df[:one, :row].name) .to eq(:one)
1446
- end
683
+ expect(@df[:one, :row]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
684
+ expect(@df[:one, :row].index).to eq([:a, :b, :c].to_index)
685
+ expect(@df[:one, :row].name) .to eq(:one)
686
+ end
1447
687
 
1448
688
  it "returns a DataFrame when specifying numeric Range" do
1449
689
  expect(@df.row[0..2]).to eq(
@@ -1488,7 +728,7 @@ describe DaruLite::DataFrame do
1488
728
 
1489
729
  context DaruLite::MultiIndex do
1490
730
  it "returns a Vector when specifying integer index" do
1491
- expect(@df_mi.row[0]).to eq(DaruLite::Vector.new([11,1,11,1], index: @order_mi))
731
+ expect(df_mi.row[0]).to eq(DaruLite::Vector.new([11,1,11,1], index: order_mi))
1492
732
  end
1493
733
 
1494
734
  it "returns a DataFrame whecn specifying numeric range" do
@@ -1497,16 +737,16 @@ describe DaruLite::DataFrame do
1497
737
  [:a,:one,:baz]
1498
738
  ])
1499
739
 
1500
- expect(@df_mi.row[0..1]).to eq(DaruLite::DataFrame.new([
740
+ expect(df_mi.row[0..1]).to eq(DaruLite::DataFrame.new([
1501
741
  [11,12],
1502
742
  [1,2],
1503
743
  [11,12],
1504
744
  [1,2]
1505
- ], order: @order_mi, index: sub_index, name: :numeric_range))
745
+ ], order: order_mi, index: sub_index, name: :numeric_range))
1506
746
  end
1507
747
 
1508
748
  it "returns a Vector when specifying complete tuple" do
1509
- expect(@df_mi.row[:c,:two,:foo]).to eq(DaruLite::Vector.new([13,3,13,3], index: @order_mi))
749
+ expect(df_mi.row[:c,:two,:foo]).to eq(DaruLite::Vector.new([13,3,13,3], index: order_mi))
1510
750
  end
1511
751
 
1512
752
  it "returns DataFrame when specifying first layer of MultiIndex" do
@@ -1516,12 +756,12 @@ describe DaruLite::DataFrame do
1516
756
  [:two,:foo],
1517
757
  [:two,:bar]
1518
758
  ])
1519
- expect(@df_mi.row[:c]).to eq(DaruLite::DataFrame.new([
759
+ expect(df_mi.row[:c]).to eq(DaruLite::DataFrame.new([
1520
760
  [11,12,13,14],
1521
761
  [1,2,3,4],
1522
762
  [11,12,13,14],
1523
763
  [1,2,3,4]
1524
- ], index: sub_index, order: @order_mi))
764
+ ], index: sub_index, order: order_mi))
1525
765
  end
1526
766
 
1527
767
  it "returns DataFrame when specifying first and second layer of MultiIndex" do
@@ -1529,12 +769,12 @@ describe DaruLite::DataFrame do
1529
769
  [:bar],
1530
770
  [:baz]
1531
771
  ])
1532
- expect(@df_mi.row[:c,:one]).to eq(DaruLite::DataFrame.new([
772
+ expect(df_mi.row[:c,:one]).to eq(DaruLite::DataFrame.new([
1533
773
  [11,12],
1534
774
  [1,2],
1535
775
  [11,12],
1536
776
  [1,2]
1537
- ], index: sub_index, order: @order_mi))
777
+ ], index: sub_index, order: order_mi))
1538
778
  end
1539
779
  end
1540
780
 
@@ -1596,107 +836,6 @@ describe DaruLite::DataFrame do
1596
836
  end
1597
837
  end
1598
838
 
1599
- context "#add_row" do
1600
- subject(:data_frame) {
1601
- DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1602
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
1603
- index: [:one, :two, :three, :four, :five])
1604
- }
1605
- context 'named' do
1606
- before {
1607
- data_frame.add_row [100,200,300], :six
1608
- }
1609
-
1610
- it { is_expected.to eq(DaruLite::DataFrame.new({
1611
- a: [1,2,3,4,5,100],
1612
- b: [11,12,13,14,15,200],
1613
- c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1614
- index: [:one, :two, :three, :four, :five, :six]))
1615
- }
1616
- end
1617
-
1618
- context 'unnamed' do
1619
- before {
1620
- data_frame.add_row [100,200,300]
1621
- }
1622
-
1623
- it { is_expected.to eq(DaruLite::DataFrame.new({
1624
- a: [1,2,3,4,5,100],
1625
- b: [11,12,13,14,15,200],
1626
- c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1627
- index: [:one, :two, :three, :four, :five, 5]))
1628
- }
1629
- end
1630
-
1631
- context 'with mulitiindex DF' do
1632
- subject(:data_frame) {
1633
- DaruLite::DataFrame.new({b: [11,12,13], a: [1,2,3],
1634
- c: [11,22,33]}, order: [:a, :b, :c],
1635
- index: DaruLite::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four]]))
1636
- }
1637
-
1638
- before { data_frame.add_row [100,200,300], [:two, :five] }
1639
-
1640
- it { is_expected.to eq(DaruLite::DataFrame.new({
1641
- b: [11,12,13,200], a: [1,2,3,100],
1642
- c: [11,22,33,300]}, order: [:a, :b, :c],
1643
- index: DaruLite::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four], [:two, :five]])))
1644
- }
1645
- end
1646
-
1647
- it "allows adding rows after making empty DF by specfying only order" do
1648
- df = DaruLite::DataFrame.new({}, order: [:a, :b, :c])
1649
- df.add_row [1,2,3]
1650
- df.add_row [5,6,7]
1651
-
1652
- expect(df[:a]).to eq(DaruLite::Vector.new([1,5]))
1653
- expect(df[:b]).to eq(DaruLite::Vector.new([2,6]))
1654
- expect(df[:c]).to eq(DaruLite::Vector.new([3,7]))
1655
- expect(df.index).to eq(DaruLite::Index.new([0,1]))
1656
- end
1657
- end
1658
-
1659
- context "#first" do
1660
- it 'works' do
1661
- expect(@data_frame.first(2)).to eq(
1662
- DaruLite::DataFrame.new({b: [11,12], a: [1,2], c: [11,22]},
1663
- order: [:a, :b, :c],
1664
- index: [:one, :two]))
1665
- end
1666
-
1667
- it 'works with too large values' do
1668
- expect(@data_frame.first(200)).to eq(@data_frame)
1669
- end
1670
-
1671
- it 'has synonym' do
1672
- expect(@data_frame.first(2)).to eq(@data_frame.head(2))
1673
- end
1674
-
1675
- it 'works on DateTime indexes' do
1676
- idx = DaruLite::DateTimeIndex.new(['2017-01-01', '2017-02-01', '2017-03-01'])
1677
- df = DaruLite::DataFrame.new({col1: ['a', 'b', 'c']}, index: idx)
1678
- first = DaruLite::DataFrame.new({col1: ['a']}, index: DaruLite::DateTimeIndex.new(['2017-01-01']))
1679
- expect(df.head(1)).to eq(first)
1680
- end
1681
- end
1682
-
1683
- context "#last" do
1684
- it 'works' do
1685
- expect(@data_frame.last(2)).to eq(
1686
- DaruLite::DataFrame.new({b: [14,15], a: [4,5], c: [44,55]},
1687
- order: [:a, :b, :c],
1688
- index: [:four, :five]))
1689
- end
1690
-
1691
- it 'works with too large values' do
1692
- expect(@data_frame.last(200)).to eq(@data_frame)
1693
- end
1694
-
1695
- it 'has synonym' do
1696
- expect(@data_frame.last(2)).to eq(@data_frame.tail(2))
1697
- end
1698
- end
1699
-
1700
839
  context "#==" do
1701
840
  it "compares by vectors, index and values of a DataFrame (ignores name)" do
1702
841
  a = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
@@ -1710,1083 +849,81 @@ describe DaruLite::DataFrame do
1710
849
  end
1711
850
 
1712
851
  context '#rename' do
1713
- subject { @data_frame.rename 'other' }
852
+ subject { df.rename 'other' }
1714
853
 
1715
854
  it { is_expected.to be_a DaruLite::DataFrame }
1716
855
  its(:name) { is_expected.to eq 'other' }
1717
856
  end
1718
857
 
1719
- context "#dup" do
858
+ context "#delete_vector" do
1720
859
  context DaruLite::Index do
1721
- it "dups every data structure inside DataFrame" do
1722
- clo = @data_frame.dup
1723
-
1724
- expect(clo.object_id) .not_to eq(@data_frame.object_id)
1725
- expect(clo.vectors.object_id).not_to eq(@data_frame.vectors.object_id)
1726
- expect(clo.index.object_id) .not_to eq(@data_frame.index.object_id)
860
+ it "deletes the specified vector" do
861
+ df.delete_vector :a
1727
862
 
1728
- @data_frame.each_vector_with_index do |vector, index|
1729
- expect(vector.object_id).not_to eq(clo[index].object_id)
1730
- expect(vector.to_a.object_id).not_to eq(clo[index].to_a.object_id)
1731
- end
863
+ expect(df).to eq(DaruLite::DataFrame.new({b: [11,12,13,14,15],
864
+ c: [11,22,33,44,55]}, order: [:b, :c],
865
+ index: [:one, :two, :three, :four, :five]))
1732
866
  end
1733
867
  end
868
+ end
1734
869
 
1735
- context DaruLite::MultiIndex do
1736
- it "duplicates with multi index" do
1737
- clo = @df_mi.dup
870
+ context "#delete_vectors" do
871
+ context DaruLite::Index do
872
+ it "deletes the specified vectors" do
873
+ df.delete_vectors :a, :b
1738
874
 
1739
- expect(clo) .to eq(@df_mi)
1740
- expect(clo.vectors.object_id).not_to eq(@df_mi.vectors.object_id)
1741
- expect(clo.index.object_id) .not_to eq(@df_mi.index.object_id)
875
+ expect(df).to eq(DaruLite::DataFrame.new({
876
+ c: [11,22,33,44,55]}, order: [:c],
877
+ index: [:one, :two, :three, :four, :five]))
1742
878
  end
1743
879
  end
1744
880
  end
1745
881
 
1746
- context '#reject_values' do
1747
- let(:df) do
1748
- DaruLite::DataFrame.new({
1749
- a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1750
- b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1751
- c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1752
- }, index: 11..18)
1753
- end
1754
- before { df.to_category :b }
1755
-
1756
- context 'remove nils only' do
1757
- subject { df.reject_values nil }
1758
- it { is_expected.to be_a DaruLite::DataFrame }
1759
- its(:'b.type') { is_expected.to eq :category }
1760
- its(:'a.to_a') { is_expected.to eq [1, 2, 7] }
1761
- its(:'b.to_a') { is_expected.to eq [:a, :b, 8] }
1762
- its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 7] }
1763
- its(:'index.to_a') { is_expected.to eq [11, 12, 18] }
1764
- end
1765
-
1766
- context 'remove Float::NAN only' do
1767
- subject { df.reject_values Float::NAN }
1768
- it { is_expected.to be_a DaruLite::DataFrame }
1769
- its(:'b.type') { is_expected.to eq :category }
1770
- its(:'a.to_a') { is_expected.to eq [1, 3, nil, 1, 7] }
1771
- its(:'b.to_a') { is_expected.to eq [:a, nil, 3, 5, 8] }
1772
- its(:'c.to_a') { is_expected.to eq ['a', 3, 5, nil, 7] }
1773
- its(:'index.to_a') { is_expected.to eq [11, 13, 16, 17, 18] }
1774
- end
882
+ context "#delete_row" do
883
+ it "deletes the specified row" do
884
+ df.delete_row :three
1775
885
 
1776
- context 'remove both nil and Float::NAN' do
1777
- subject { df.reject_values nil, Float::NAN }
1778
- it { is_expected.to be_a DaruLite::DataFrame }
1779
- its(:'b.type') { is_expected.to eq :category }
1780
- its(:'a.to_a') { is_expected.to eq [1, 7] }
1781
- its(:'b.to_a') { is_expected.to eq [:a, 8] }
1782
- its(:'c.to_a') { is_expected.to eq ['a', 7] }
1783
- its(:'index.to_a') { is_expected.to eq [11, 18] }
886
+ expect(df).to eq(DaruLite::DataFrame.new({b: [11,12,14,15], a: [1,2,4,5],
887
+ c: [11,22,44,55]}, order: [:a, :b, :c], index: [:one, :two, :four, :five]))
1784
888
  end
889
+ end
1785
890
 
1786
- context 'any other values' do
1787
- subject { df.reject_values 1, 5 }
1788
- it { is_expected.to be_a DaruLite::DataFrame }
1789
- its(:'b.type') { is_expected.to eq :category }
1790
- its(:'a.to_a') { is_expected.to eq [2, 3, nil, Float::NAN, 7] }
1791
- its(:'b.to_a') { is_expected.to eq [:b, nil, Float::NAN, nil, 8] }
1792
- its(:'c.to_a') { is_expected.to eq [Float::NAN, 3, 4, 3, 7] }
1793
- its(:'index.to_a') { is_expected.to eq [12, 13, 14, 15, 18] }
891
+ context "#rename_vectors!" do
892
+ before do
893
+ @df = DaruLite::DataFrame.new({
894
+ a: [1,2,3,4,5],
895
+ b: [11,22,33,44,55],
896
+ c: %w(a b c d e)
897
+ })
1794
898
  end
1795
899
 
1796
- context 'when resultant dataframe has one row' do
1797
- subject { df.reject_values 1, 2, 3, 4, 5, nil, Float::NAN }
1798
- it { is_expected.to be_a DaruLite::DataFrame }
1799
- its(:'b.type') { is_expected.to eq :category }
1800
- its(:'a.to_a') { is_expected.to eq [7] }
1801
- its(:'b.to_a') { is_expected.to eq [8] }
1802
- its(:'c.to_a') { is_expected.to eq [7] }
1803
- its(:'index.to_a') { is_expected.to eq [18] }
900
+ it "returns self as modified dataframe" do
901
+ expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
1804
902
  end
1805
903
 
1806
- context 'when resultant dataframe is empty' do
1807
- subject { df.reject_values 1, 2, 3, 4, 5, 6, 7, nil, Float::NAN }
1808
- it { is_expected.to be_a DaruLite::DataFrame }
1809
- its(:'b.type') { is_expected.to eq :category }
1810
- its(:'a.to_a') { is_expected.to eq [] }
1811
- its(:'b.to_a') { is_expected.to eq [] }
1812
- its(:'c.to_a') { is_expected.to eq [] }
1813
- its(:'index.to_a') { is_expected.to eq [] }
904
+ it "re-uses rename_vectors method" do
905
+ name_map = { :a => :alpha, :c => :gamma }
906
+ expect(@df).to receive(:rename_vectors).with(name_map)
907
+ @df.rename_vectors! name_map
1814
908
  end
1815
909
  end
1816
910
 
1817
- context '#replace_values' do
1818
- subject do
1819
- DaruLite::DataFrame.new({
1820
- a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1821
- b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1822
- c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
911
+ context "#rename_vectors" do
912
+ before do
913
+ @df = DaruLite::DataFrame.new({
914
+ a: [1,2,3,4,5],
915
+ b: [11,22,33,44,55],
916
+ c: %w(a b c d e)
1823
917
  })
1824
918
  end
1825
- before { subject.to_category :b }
1826
919
 
1827
- context 'replace nils only' do
1828
- before { subject.replace_values nil, 10 }
1829
- it { is_expected.to be_a DaruLite::DataFrame }
1830
- its(:'b.type') { is_expected.to eq :category }
1831
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, Float::NAN, 10, 1, 7] }
1832
- its(:'b.to_a') { is_expected.to eq [:a, :b, 10, Float::NAN, 10, 3, 5, 8] }
1833
- its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 5, 10, 7] }
920
+ it "returns DaruLite::Index" do
921
+ expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(DaruLite::Index)
1834
922
  end
1835
923
 
1836
- context 'replace Float::NAN only' do
1837
- before { subject.replace_values Float::NAN, 10 }
1838
- it { is_expected.to be_a DaruLite::DataFrame }
1839
- its(:'b.type') { is_expected.to eq :category }
1840
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, nil, 10, nil, 1, 7] }
1841
- its(:'b.to_a') { is_expected.to eq [:a, :b, nil, 10, nil, 3, 5, 8] }
1842
- its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, nil, 7] }
1843
- end
1844
-
1845
- context 'replace both nil and Float::NAN' do
1846
- before { subject.replace_values [nil, Float::NAN], 10 }
1847
- it { is_expected.to be_a DaruLite::DataFrame }
1848
- its(:'b.type') { is_expected.to eq :category }
1849
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, 10, 10, 1, 7] }
1850
- its(:'b.to_a') { is_expected.to eq [:a, :b, 10, 10, 10, 3, 5, 8] }
1851
- its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, 10, 7] }
1852
- end
1853
-
1854
- context 'replace other values' do
1855
- before { subject.replace_values [1, 5], 10 }
1856
- it { is_expected.to be_a DaruLite::DataFrame }
1857
- its(:'b.type') { is_expected.to eq :category }
1858
- its(:'a.to_a') { is_expected.to eq [10, 2, 3, nil, Float::NAN, nil, 10, 7] }
1859
- its(:'b.to_a') { is_expected.to eq [:a, :b, nil, Float::NAN, nil, 3, 10, 8] }
1860
- its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 10, nil, 7] }
1861
- end
1862
- end
1863
-
1864
- describe 'uniq' do
1865
- let(:df) do
1866
- DaruLite::DataFrame.from_csv 'spec/fixtures/duplicates.csv'
1867
- end
1868
-
1869
- context 'with no args' do
1870
- it do
1871
- result = df.uniq
1872
- expect(result.shape.first).to eq 30
1873
- end
1874
- end
1875
-
1876
- context 'given a vector' do
1877
- it do
1878
- result = df.uniq("color")
1879
- expect(result.shape.first).to eq 2
1880
- end
1881
- end
1882
-
1883
- context 'given an array of vectors' do
1884
- it do
1885
- result = df.uniq("color", "director_name")
1886
- expect(result.shape.first).to eq 29
1887
- end
1888
- end
1889
- end
1890
-
1891
- context '#rolling_fillna!' do
1892
- subject do
1893
- DaruLite::DataFrame.new({
1894
- a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1895
- b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
1896
- c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1897
- })
1898
- end
1899
-
1900
- context 'rolling_fillna! forwards' do
1901
- before { subject.rolling_fillna!(:forward) }
1902
- it { expect(subject.rolling_fillna!(:forward)).to eq(subject) }
1903
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
1904
- its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
1905
- its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
1906
- end
1907
-
1908
- context 'rolling_fillna! backwards' do
1909
- before { subject.rolling_fillna!(:backward) }
1910
- it { expect(subject.rolling_fillna!(:backward)).to eq(subject) }
1911
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
1912
- its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
1913
- its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
1914
- end
1915
- end
1916
-
1917
- context "#clone" do
1918
- it "returns a view of the whole dataframe" do
1919
- cloned = @data_frame.clone
1920
- expect(@data_frame.object_id).to_not eq(cloned.object_id)
1921
- expect(@data_frame[:a].object_id).to eq(cloned[:a].object_id)
1922
- expect(@data_frame[:b].object_id).to eq(cloned[:b].object_id)
1923
- expect(@data_frame[:c].object_id).to eq(cloned[:c].object_id)
1924
- end
1925
-
1926
- it "returns a view of selected vectors" do
1927
- cloned = @data_frame.clone(:a, :b)
1928
- expect(cloned.object_id).to_not eq(@data_frame.object_id)
1929
- expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
1930
- expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
1931
- end
1932
-
1933
- it "clones properly when supplied array" do
1934
- cloned = @data_frame.clone([:a, :b])
1935
- expect(cloned.object_id).to_not eq(@data_frame.object_id)
1936
- expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
1937
- expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
1938
- end
1939
-
1940
- it "original dataframe remains unaffected when operations are applied
1941
- on cloned data frame" do
1942
- original = @data_frame.dup
1943
- cloned = @data_frame.clone
1944
- cloned.delete_vector :a
1945
-
1946
- expect(@data_frame).to eq(original)
1947
- end
1948
-
1949
- end
1950
-
1951
- context "#clone_only_valid" do
1952
- let(:df_with_missing) {
1953
- DaruLite::DataFrame.new({
1954
- a: [1 , 2, 3, nil, 4, nil, 5],
1955
- b: [nil, 2, 3, nil, 4, nil, 5],
1956
- c: [1, 2, 3, 43 , 4, nil, 5]
1957
- })
1958
- }
1959
-
1960
- let(:df_without_missing) {
1961
- DaruLite::DataFrame.new({
1962
- a: [2,3,4,5],
1963
- c: [2,3,4,5]
1964
- })
1965
- }
1966
- it 'does the most reasonable thing' do
1967
- expect(df_with_missing.clone_only_valid).to eq(df_with_missing.reject_values(*DaruLite::MISSING_VALUES))
1968
- expect(df_without_missing.clone_only_valid).to eq(df_without_missing.clone)
1969
- end
1970
- end
1971
-
1972
- context "#clone_structure" do
1973
- it "clones only the index and vector structures of the data frame" do
1974
- cs = @data_frame.clone_structure
1975
-
1976
- expect(cs.vectors).to eq(@data_frame.vectors)
1977
- expect(cs.index).to eq(@data_frame.index)
1978
- expect(cs[:a]).to eq(DaruLite::Vector.new([nil] * cs[:a].size, index: @data_frame.index))
1979
- end
1980
- end
1981
-
1982
- context "#each_index" do
1983
- it "iterates over index" do
1984
- idxs = []
1985
- ret = @data_frame.each_index do |index|
1986
- idxs << index
1987
- end
1988
-
1989
- expect(idxs).to eq([:one, :two, :three, :four, :five])
1990
-
1991
- expect(ret).to eq(@data_frame)
1992
- end
1993
- end
1994
-
1995
- context "#each_vector_with_index" do
1996
- it "iterates over vectors with index" do
1997
- idxs = []
1998
- ret = @data_frame.each_vector_with_index do |vector, index|
1999
- idxs << index
2000
- expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
2001
- expect(vector.class).to eq(DaruLite::Vector)
2002
- end
2003
-
2004
- expect(idxs).to eq([:a, :b, :c])
2005
-
2006
- expect(ret).to eq(@data_frame)
2007
- end
2008
- end
2009
-
2010
- context "#each_row_with_index" do
2011
- it "iterates over rows with indexes" do
2012
- idxs = []
2013
- ret = @data_frame.each_row_with_index do |row, idx|
2014
- idxs << idx
2015
- expect(row.index).to eq([:a, :b, :c].to_index)
2016
- expect(row.class).to eq(DaruLite::Vector)
2017
- end
2018
-
2019
- expect(idxs).to eq([:one, :two, :three, :four, :five])
2020
- expect(ret) .to eq(@data_frame)
2021
- end
2022
- end
2023
-
2024
- context "#each" do
2025
- it "iterates over rows" do
2026
- ret = @data_frame.each(:row) do |row|
2027
- expect(row.index).to eq([:a, :b, :c].to_index)
2028
- expect(row.class).to eq(DaruLite::Vector)
2029
- end
2030
-
2031
- expect(ret).to eq(@data_frame)
2032
- end
2033
-
2034
- it "iterates over all vectors" do
2035
- ret = @data_frame.each do |vector|
2036
- expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
2037
- expect(vector.class).to eq(DaruLite::Vector)
2038
- end
2039
-
2040
- expect(ret).to eq(@data_frame)
2041
- end
2042
-
2043
- it "returns Enumerable if no block specified" do
2044
- ret = @data_frame.each
2045
- expect(ret.is_a?(Enumerator)).to eq(true)
2046
- end
2047
-
2048
- it "raises on unknown axis" do
2049
- expect { @data_frame.each(:kitten) }.to raise_error(ArgumentError, /axis/)
2050
- end
2051
- end
2052
-
2053
- context "#recode" do
2054
- before do
2055
- @ans_vector = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2056
- c: [21,32,43,54,65]}, order: [:a, :b, :c],
2057
- index: [:one, :two, :three, :four, :five])
2058
-
2059
- @ans_rows = DaruLite::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
2060
- c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
2061
- index: [:one, :two, :three, :four, :five])
2062
-
2063
- @data_frame_date_time = @data_frame.dup
2064
- @data_frame_date_time.index = DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
2065
-
2066
- @ans_vector_date_time = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2067
- c: [21,32,43,54,65]}, order: [:a, :b, :c],
2068
- index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
2069
-
2070
- @ans_rows_date_time = DaruLite::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
2071
- c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
2072
- index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
2073
- end
2074
-
2075
- it "maps over the vectors of a DataFrame and returns a DataFrame" do
2076
- ret = @data_frame.recode do |vector|
2077
- vector.map! { |e| e += 10}
2078
- end
2079
-
2080
- expect(ret).to eq(@ans_vector)
2081
- end
2082
-
2083
- it "maps over the rows of a DataFrame and returns a DataFrame" do
2084
- ret = @data_frame.recode(:row) do |row|
2085
- expect(row.class).to eq(DaruLite::Vector)
2086
- row.map! { |e| e*e }
2087
- end
2088
-
2089
- expect(ret).to eq(@ans_rows)
2090
- end
2091
-
2092
- it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
2093
- ret = @data_frame_date_time.recode do |vector|
2094
- vector.map! { |e| e += 10}
2095
- end
2096
-
2097
- expect(ret).to eq(@ans_vector_date_time)
2098
- end
2099
-
2100
- it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
2101
- ret = @data_frame_date_time.recode(:row) do |row|
2102
- expect(row.class).to eq(DaruLite::Vector)
2103
- row.map! { |e| e*e }
2104
- end
2105
-
2106
- expect(ret).to eq(@ans_rows_date_time)
2107
- end
2108
-
2109
- end
2110
-
2111
- context "#collect" do
2112
- before do
2113
- @df = DaruLite::DataFrame.new({
2114
- a: [1,2,3,4,5],
2115
- b: [11,22,33,44,55],
2116
- c: [1,2,3,4,5]
2117
- })
2118
- end
2119
-
2120
- it "collects calculation over rows and returns a Vector from the results" do
2121
- expect(@df.collect(:row) { |row| (row[:a] + row[:c]) * row[:c] }).to eq(
2122
- DaruLite::Vector.new([2,8,18,32,50])
2123
- )
2124
- end
2125
-
2126
- it "collects calculation over vectors and returns a Vector from the results" do
2127
- expect(@df.collect { |v| v[0] * v[1] + v[4] }).to eq(
2128
- DaruLite::Vector.new([7,297,7], index: [:a, :b, :c])
2129
- )
2130
- end
2131
- end
2132
-
2133
- context "#map" do
2134
- it "iterates over rows and returns an Array" do
2135
- ret = @data_frame.map(:row) do |row|
2136
- expect(row.class).to eq(DaruLite::Vector)
2137
- row[:a] * row[:c]
2138
- end
2139
-
2140
- expect(ret).to eq([11, 44, 99, 176, 275])
2141
- expect(@data_frame.vectors.to_a).to eq([:a, :b, :c])
2142
- end
2143
-
2144
- it "iterates over vectors and returns an Array" do
2145
- ret = @data_frame.map do |vector|
2146
- vector.mean
2147
- end
2148
- expect(ret).to eq([3.0, 13.0, 33.0])
2149
- end
2150
- end
2151
-
2152
- context "#map!" do
2153
- before do
2154
- @ans_vector = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2155
- c: [21,32,43,54,65]}, order: [:a, :b, :c],
2156
- index: [:one, :two, :three, :four, :five])
2157
-
2158
- @ans_row = DaruLite::DataFrame.new({b: [12,13,14,15,16], a: [2,3,4,5,6],
2159
- c: [12,23,34,45,56]}, order: [:a, :b, :c],
2160
- index: [:one, :two, :three, :four, :five])
2161
- end
2162
-
2163
- it "destructively maps over the vectors and changes the DF" do
2164
- @data_frame.map! do |vector|
2165
- vector + 10
2166
- end
2167
- expect(@data_frame).to eq(@ans_vector)
2168
- end
2169
-
2170
- it "destructively maps over the rows and changes the DF" do
2171
- @data_frame.map!(:row) do |row|
2172
- row + 1
2173
- end
2174
-
2175
- expect(@data_frame).to eq(@ans_row)
2176
- end
2177
- end
2178
-
2179
- context "#map_vectors_with_index" do
2180
- it "iterates over vectors with index and returns an Array" do
2181
- idx = []
2182
- ret = @data_frame.map_vectors_with_index do |vector, index|
2183
- idx << index
2184
- vector.recode { |e| e += 10}
2185
- end
2186
-
2187
- expect(ret).to eq([
2188
- DaruLite::Vector.new([11,12,13,14,15],index: [:one, :two, :three, :four, :five]),
2189
- DaruLite::Vector.new([21,22,23,24,25],index: [:one, :two, :three, :four, :five]),
2190
- DaruLite::Vector.new([21,32,43,54,65],index: [:one, :two, :three, :four, :five])])
2191
- expect(idx).to eq([:a, :b, :c])
2192
- end
2193
- end
2194
-
2195
- # FIXME: collect_VECTORS_with_index, but map_VECTOR_with_index -- ??? -- zverok
2196
- # (Not saying about unfortunate difference between them...)
2197
- context "#collect_vector_with_index" do
2198
- it "iterates over vectors with index and returns an Array" do
2199
- idx = []
2200
- ret = @data_frame.collect_vector_with_index do |vector, index|
2201
- idx << index
2202
- vector.sum
2203
- end
2204
-
2205
- expect(ret).to eq(DaruLite::Vector.new([15, 65, 165], index: [:a, :b, :c]))
2206
- expect(idx).to eq([:a, :b, :c])
2207
- end
2208
- end
2209
-
2210
- context "#map_rows_with_index" do
2211
- it "iterates over rows with index and returns an Array" do
2212
- idx = []
2213
- ret = @data_frame.map_rows_with_index do |row, index|
2214
- idx << index
2215
- expect(row.class).to eq(DaruLite::Vector)
2216
- row[:a] * row[:c]
2217
- end
2218
-
2219
- expect(ret).to eq([11, 44, 99, 176, 275])
2220
- expect(idx).to eq([:one, :two, :three, :four, :five])
2221
- end
2222
- end
2223
-
2224
- context '#collect_row_with_index' do
2225
- it "iterates over rows with index and returns a Vector" do
2226
- idx = []
2227
- ret = @data_frame.collect_row_with_index do |row, index|
2228
- idx << index
2229
- expect(row.class).to eq(DaruLite::Vector)
2230
- row[:a] * row[:c]
2231
- end
2232
-
2233
- expected = DaruLite::Vector.new([11, 44, 99, 176, 275], index: @data_frame.index)
2234
- expect(ret).to eq(expected)
2235
- expect(idx).to eq([:one, :two, :three, :four, :five])
2236
- end
2237
- end
2238
-
2239
- context "#delete_vector" do
2240
- context DaruLite::Index do
2241
- it "deletes the specified vector" do
2242
- @data_frame.delete_vector :a
2243
-
2244
- expect(@data_frame).to eq(DaruLite::DataFrame.new({b: [11,12,13,14,15],
2245
- c: [11,22,33,44,55]}, order: [:b, :c],
2246
- index: [:one, :two, :three, :four, :five]))
2247
- end
2248
- end
2249
- end
2250
-
2251
- context "#delete_vectors" do
2252
- context DaruLite::Index do
2253
- it "deletes the specified vectors" do
2254
- @data_frame.delete_vectors :a, :b
2255
-
2256
- expect(@data_frame).to eq(DaruLite::DataFrame.new({
2257
- c: [11,22,33,44,55]}, order: [:c],
2258
- index: [:one, :two, :three, :four, :five]))
2259
- end
2260
- end
2261
- end
2262
-
2263
- context "#delete_row" do
2264
- it "deletes the specified row" do
2265
- @data_frame.delete_row :three
2266
-
2267
- expect(@data_frame).to eq(DaruLite::DataFrame.new({b: [11,12,14,15], a: [1,2,4,5],
2268
- c: [11,22,44,55]}, order: [:a, :b, :c], index: [:one, :two, :four, :five]))
2269
- end
2270
- end
2271
-
2272
- context "#keep_row_if" do
2273
- pending "changing row from under the iterator trips this"
2274
- it "keeps row if block evaluates to true" do
2275
- df = DaruLite::DataFrame.new({b: [10,12,20,23,30], a: [50,30,30,1,5],
2276
- c: [10,20,30,40,50]}, order: [:a, :b, :c],
2277
- index: [:one, :two, :three, :four, :five])
2278
-
2279
- df.keep_row_if do |row|
2280
- row[:a] % 10 == 0
2281
- end
2282
- # TODO: write expectation
2283
- end
2284
- end
2285
-
2286
- context "#keep_vector_if" do
2287
- it "keeps vector if block evaluates to true" do
2288
- @data_frame.keep_vector_if do |vector|
2289
- vector == [1,2,3,4,5].dv(nil, [:one, :two, :three, :four, :five])
2290
- end
2291
-
2292
- expect(@data_frame).to eq(DaruLite::DataFrame.new({a: [1,2,3,4,5]}, order: [:a],
2293
- index: [:one, :two, :three, :four, :five]))
2294
- end
2295
- end
2296
-
2297
- context "#filter_field" do
2298
- before do
2299
- @df = DaruLite::DataFrame.new({
2300
- :id => DaruLite::Vector.new([1, 2, 3, 4, 5]),
2301
- :name => DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
2302
- :age => DaruLite::Vector.new([20, 23, 25, 27, 5]),
2303
- :city => DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
2304
- :a1 => DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
2305
- order: [:id, :name, :age, :city, :a1])
2306
- end
2307
-
2308
- it "creates new vector with the data of a given field for which block returns true" do
2309
- filtered = @df.filter_vector(:id) { |c| c[:id] == 2 or c[:id] == 4 }
2310
- expect(filtered).to eq(DaruLite::Vector.new([2,4]))
2311
- end
2312
- end
2313
-
2314
- context "#filter_rows" do
2315
- context DaruLite::Index do
2316
- context "when specified no index" do
2317
- it "filters rows" do
2318
- df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]})
2319
-
2320
- a = df.filter_rows do |row|
2321
- row[:a] % 2 == 0
2322
- end
2323
-
2324
- expect(a).to eq(DaruLite::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [1]))
2325
- end
2326
- end
2327
-
2328
- context "when specified numerical index" do
2329
- it "filters rows" do
2330
- df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]}, index: [1,2,3])
2331
-
2332
- a = df.filter_rows do |row|
2333
- row[:a] % 2 == 0
2334
- end
2335
-
2336
- expect(a).to eq(DaruLite::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [2]))
2337
- end
2338
- end
2339
-
2340
- it "preserves names of vectors" do
2341
- df = DaruLite::DataFrame.new a: 1..3, b: 4..6
2342
- df1 = df.filter_rows { |r| r[:a] != 2 }
2343
-
2344
- expect(df1[:a].name).to eq(df[:a].name)
2345
- end
2346
- end
2347
- end
2348
-
2349
- context "#filter_vectors" do
2350
- context DaruLite::Index do
2351
- it "filters vectors" do
2352
- df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]})
2353
-
2354
- a = df.filter_vectors do |vector|
2355
- vector[0] == 1
2356
- end
2357
-
2358
- expect(a).to eq(DaruLite::DataFrame.new({a: [1,2,3]}))
2359
- end
2360
- end
2361
- end
2362
-
2363
- context "#filter" do
2364
- let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]}) }
2365
- it "dispatches" do
2366
- expect(df.filter(:row){|r| r[:a] % 2 == 0 }).to \
2367
- eq df.filter_rows{|r| r[:a] % 2 == 0 }
2368
-
2369
- expect(df.filter(:vector){|v| v[0] == 1}).to \
2370
- eq df.filter_vectors{|v| v[0] == 1}
2371
-
2372
- expect { df.filter(:kitten){} }.to raise_error ArgumentError, /axis/
2373
- end
2374
- end
2375
-
2376
- context "#to_a" do
2377
- context DaruLite::Index do
2378
- it "converts DataFrame into array of hashes" do
2379
- arry = @data_frame.to_a
2380
-
2381
- expect(arry).to eq(
2382
- [
2383
- [
2384
- {a: 1, b: 11, c: 11},
2385
- {a: 2, b: 12, c: 22},
2386
- {a: 3, b: 13, c: 33},
2387
- {a: 4, b: 14, c: 44},
2388
- {a: 5, b: 15, c: 55}
2389
- ],
2390
- [
2391
- :one, :two, :three, :four, :five
2392
- ]
2393
- ])
2394
- end
2395
- end
2396
-
2397
- context DaruLite::MultiIndex do
2398
- pending
2399
- end
2400
- end
2401
-
2402
- context "#to_h" do
2403
- it "converts to a hash" do
2404
- expect(@data_frame.to_h).to eq(
2405
- {
2406
- a: DaruLite::Vector.new([1,2,3,4,5],
2407
- index: [:one, :two, :three, :four, :five]),
2408
- b: DaruLite::Vector.new([11,12,13,14,15],
2409
- index: [:one, :two, :three, :four, :five]),
2410
- c: DaruLite::Vector.new([11,22,33,44,55],
2411
- index: [:one, :two, :three, :four, :five])
2412
- }
2413
- )
2414
- end
2415
- end
2416
-
2417
- context "#sort" do
2418
- context DaruLite::Index do
2419
- before :each do
2420
- @df = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
2421
- end
2422
-
2423
- it "sorts according to given vector order (bang)" do
2424
- a_sorter = lambda { |a| a }
2425
- ans = @df.sort([:a], by: { a: a_sorter })
2426
-
2427
- expect(ans).to eq(
2428
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3], c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']},
2429
- index: [2,1,0,4,5,3])
2430
- )
2431
- expect(ans).to_not eq(@df)
2432
- end
2433
-
2434
- it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
2435
- ans = @df.sort([:a, :b])
2436
- expect(ans).to eq(
2437
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
2438
- index: [2,1,0,5,4,3])
2439
- )
2440
- expect(ans).to_not eq(@df)
2441
- end
2442
- end
2443
-
2444
- context DaruLite::MultiIndex do
2445
- pending
2446
- end
2447
-
2448
- context DaruLite::CategoricalIndex do
2449
- let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
2450
- let(:df) do
2451
- DaruLite::DataFrame.new({
2452
- a: [2, -1, 3, 4, 5],
2453
- b: ['x', 'y', 'x', 'a', 'y'],
2454
- c: [nil, nil, -2, 2, 1]
2455
- }, index: idx)
2456
- end
2457
-
2458
- context "ascending order" do
2459
- context "single vector" do
2460
- subject { df.sort [:a] }
2461
-
2462
- its(:'index.to_a') { is_expected.to eq [1, :a, :a, 1, :c] }
2463
- its(:'a.to_a') { is_expected.to eq [-1, 2, 3, 4, 5] }
2464
- its(:'b.to_a') { is_expected.to eq ['y', 'x', 'x', 'a', 'y'] }
2465
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2466
- end
2467
-
2468
- context "multiple vectors" do
2469
- subject { df.sort [:c, :b] }
2470
-
2471
- its(:'index.to_a') { is_expected.to eq [:a, 1, :a, :c, 1] }
2472
- its(:'a.to_a') { is_expected.to eq [2, -1, 3, 5, 4] }
2473
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'y', 'a'] }
2474
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 1, 2] }
2475
- end
2476
-
2477
- context "block" do
2478
- context "automatic handle nils" do
2479
- subject do
2480
- df.sort [:c], by: {c: lambda { |a| a.abs } }, handle_nils: true
2481
- end
2482
-
2483
- its(:'index.to_a') { is_expected.to eq [:a, 1, :c, :a, 1] }
2484
- its(:'a.to_a') { is_expected.to eq [2, -1, 5, 3, 4] }
2485
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'y', 'x', 'a'] }
2486
- its(:'c.to_a') { is_expected.to eq [nil, nil, 1, -2, 2] }
2487
- end
2488
-
2489
- context "manually handle nils" do
2490
- subject do
2491
- df.sort [:c], by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } }
2492
- end
2493
-
2494
- its(:'index.to_a') { is_expected.to eq [:c, :a, 1, :a, 1] }
2495
- its(:'a.to_a') { is_expected.to eq [5, 3, 4, 2, -1] }
2496
- its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'x', 'y'] }
2497
- its(:'c.to_a') { is_expected.to eq [1, -2, 2, nil, nil] }
2498
- end
2499
- end
2500
- end
2501
-
2502
- context "descending order" do
2503
- context "single vector" do
2504
- subject { df.sort [:a], ascending: false }
2505
-
2506
- its(:'index.to_a') { is_expected.to eq [:c, 1, :a, :a, 1] }
2507
- its(:'a.to_a') { is_expected.to eq [5, 4, 3, 2, -1] }
2508
- its(:'b.to_a') { is_expected.to eq ['y', 'a', 'x', 'x', 'y'] }
2509
- its(:'c.to_a') { is_expected.to eq [1, 2, -2, nil, nil] }
2510
- end
2511
-
2512
- context "multiple vectors" do
2513
- subject { df.sort [:c, :b], ascending: false }
2514
-
2515
- its(:'index.to_a') { is_expected.to eq [1, :a, 1, :c, :a] }
2516
- its(:'a.to_a') { is_expected.to eq [-1, 2, 4, 5, 3] }
2517
- its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'y', 'x'] }
2518
- its(:'c.to_a') { is_expected.to eq [nil, nil, 2, 1, -2] }
2519
- end
2520
-
2521
- context "block" do
2522
- context "automatic handle nils" do
2523
- subject do
2524
- df.sort [:c],
2525
- by: {c: lambda { |a| a.abs } },
2526
- handle_nils: true,
2527
- ascending: false
2528
- end
2529
-
2530
- its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2531
- its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2532
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2533
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2534
- end
2535
-
2536
- context "manually handle nils" do
2537
- subject do
2538
- df.sort [:c],
2539
- by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } },
2540
- ascending: false
2541
- end
2542
-
2543
- its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2544
- its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2545
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2546
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2547
- end
2548
- end
2549
- end
2550
- end
2551
- end
2552
-
2553
- context "#sort!" do
2554
- context DaruLite::Index do
2555
- before :each do
2556
- @df = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1],
2557
- c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
2558
- end
2559
-
2560
- it "sorts according to given vector order (bang)" do
2561
- a_sorter = lambda { |a| a }
2562
-
2563
- expect(@df.sort!([:a], by: { a: a_sorter })).to eq(
2564
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3],
2565
- c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']}, index: [2,1,0,4,5,3])
2566
- )
2567
- end
2568
-
2569
- it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
2570
- expect(@df.sort!([:a, :b])).to eq(
2571
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
2572
- index: [2,1,0,5,4,3])
2573
- )
2574
- end
2575
-
2576
- it "sorts both vectors in descending order" do
2577
- expect(@df.sort!([:a,:b], ascending: [false, false])).to eq(
2578
- DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,9,1,-2,-1,5], c: ['aaaa','aaaaa','aaaaaa', 'a','aa', 'aaa'] },
2579
- index: [3,4,5,0,1,2])
2580
- )
2581
- end
2582
-
2583
- it "sorts one vector in desc and other is asc" do
2584
- expect(@df.sort!([:a, :b], ascending: [false, true])).to eq(
2585
- DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,-2,1,9,-1,5], c: ['aaaa','a','aaaaaa','aaaaa','aa','aaa']},
2586
- index: [3,0,5,4,1,2])
2587
- )
2588
- end
2589
-
2590
- it "sorts many vectors" do
2591
- d = DaruLite::DataFrame.new({a: [1,1,1,222,44,5,5,544], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2592
-
2593
- expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
2594
- DaruLite::DataFrame.new({a: [544,222,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
2595
- index: [7,3,4,6,5,0,1,2])
2596
- )
2597
- end
2598
-
2599
- it "places nils at the beginning when sorting ascedingly" do
2600
- d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2601
-
2602
- expect(d.sort!([:a, :b, :c], ascending: [true, true, false])).to eq(
2603
- DaruLite::DataFrame.new({a: [nil,nil,1,1,1,5,5,44], b: [3,222,44,44,333,22,554,111], c: [5,3,3,2,5,5,1,3]},
2604
- index: [7,3,0,1,2,6,5,4])
2605
- )
2606
- end
2607
-
2608
- it "places nils at the beginning when sorting decendingly" do
2609
- d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2610
-
2611
- expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
2612
- DaruLite::DataFrame.new({a: [nil,nil,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
2613
- index: [7,3,4,6,5,0,1,2])
2614
- )
2615
- end
2616
-
2617
- it "sorts vectors of non-numeric types with nils in ascending order" do
2618
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2619
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2620
-
2621
- expect(non_numeric.sort!([:c], ascending: [true])).to eq(
2622
- DaruLite::DataFrame.new({a: [-6, 5, 5, 1, 7, 5], b: [1, 1, nil, -1, nil, -1],
2623
- c: [nil, nil, "aaa", "aaa", "baaa", "xxx"]},
2624
- index: [2, 5, 0, 1, 3, 4])
2625
- )
2626
- end
2627
-
2628
- it "sorts vectors of non-numeric types with nils in descending order" do
2629
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2630
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2631
-
2632
- expect(non_numeric.sort!([:c], ascending: [false])).to eq(
2633
- DaruLite::DataFrame.new({a: [-6, 5, 5, 7, 5, 1], b: [1, 1, -1, nil, nil, -1],
2634
- c: [nil, nil, "xxx", "baaa", "aaa", "aaa"]},
2635
- index: [2, 5, 4, 3, 0, 1])
2636
- )
2637
- end
2638
-
2639
- it "sorts vectors with block provided and handle nils automatically" do
2640
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2641
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2642
-
2643
- expect(non_numeric.sort!([:b], by: {b: lambda { |a| a.abs } }, handle_nils: true)).to eq(
2644
- DaruLite::DataFrame.new({a: [5, 7, 1, -6, 5, 5], b: [nil, nil, -1, 1, -1, 1],
2645
- c: ["aaa", "baaa", "aaa", nil, "xxx", nil]},
2646
- index: [0, 3, 1, 2, 4, 5])
2647
- )
2648
- end
2649
-
2650
- it "sorts vectors with block provided and nils handled manually" do
2651
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2652
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2653
-
2654
- expect(non_numeric.sort!([:b], by: {b: lambda { |a| (a.nil?)?[1]:[0, a.abs]} }, handle_nils: false)).to eq(
2655
- DaruLite::DataFrame.new({a: [1, -6, 5, 5, 5, 7], b: [-1, 1, -1, 1, nil, nil],
2656
- c: ["aaa", nil, "xxx", nil, "aaa", "baaa"]},
2657
- index: [1, 2, 4, 5, 0, 3])
2658
- )
2659
- end
2660
- end
2661
-
2662
- context DaruLite::MultiIndex do
2663
- pending
2664
- it "sorts the DataFrame when specified full tuple" do
2665
- @df_mi.sort([[:a,:one,:bar]])
2666
- end
2667
- end
2668
- end
2669
-
2670
- context "#index=" do
2671
- before :each do
2672
- @df = DaruLite::DataFrame.new({
2673
- a: [1,2,3,4,5],
2674
- b: [11,22,33,44,55],
2675
- c: %w(a b c d e)
2676
- })
2677
- end
2678
-
2679
- it "simply reassigns the index" do
2680
- @df.index = DaruLite::Index.new(['4','foo', :bar, 0, 23])
2681
- expect(@df.row['foo']).to eq(DaruLite::Vector.new([2,22,'b'], index: [:a,:b,:c]))
2682
- end
2683
-
2684
- it "raises error for improper length index" do
2685
- expect {
2686
- @df.index = DaruLite::Index.new([1,2])
2687
- }.to raise_error(ArgumentError)
2688
- end
2689
-
2690
- it "is able to accept array" do
2691
- @df.index = (1..5).to_a
2692
- expect(@df.index).to eq DaruLite::Index.new (1..5).to_a
2693
- end
2694
- end
2695
-
2696
- context '#order=' do
2697
- let(:df) do
2698
- DaruLite::DataFrame.new({
2699
- a: [1, 2, 3],
2700
- b: [4, 5, 6]
2701
- }, order: [:a, :b])
2702
- end
2703
-
2704
- context 'correct order' do
2705
- before { df.order = [:b, :a] }
2706
- subject { df }
2707
-
2708
- its(:'vectors.to_a') { is_expected.to eq [:b, :a] }
2709
- its(:'b.to_a') { is_expected.to eq [4, 5, 6] }
2710
- its(:'a.to_a') { is_expected.to eq [1, 2, 3] }
2711
- end
2712
-
2713
- context 'insufficient vectors' do
2714
- it { expect { df.order = [:a] }.to raise_error }
2715
- end
2716
-
2717
- context 'wrong vectors' do
2718
- it { expect { df.order = [:a, :b, 'b'] }.to raise_error }
2719
- end
2720
- end
2721
-
2722
- context "#vectors=" do
2723
- before :each do
2724
- @df = DaruLite::DataFrame.new({
2725
- a: [1,2,3,4,5],
2726
- b: [11,22,33,44,55],
2727
- c: %w(a b c d e)
2728
- })
2729
- end
2730
-
2731
- it "simply reassigns vectors" do
2732
- @df.vectors = DaruLite::Index.new(['b',0,'m'])
2733
-
2734
- expect(@df.vectors).to eq(DaruLite::Index.new(['b',0,'m']))
2735
- expect(@df['b']).to eq(DaruLite::Vector.new([1,2,3,4,5]))
2736
- expect(@df[0]).to eq(DaruLite::Vector.new([11,22,33,44,55]))
2737
- expect(@df['m']).to eq(DaruLite::Vector.new(%w(a b c d e)))
2738
- end
2739
-
2740
- it "raises error for improper length index" do
2741
- expect {
2742
- @df.vectors = DaruLite::Index.new([1,2,'3',4,'5'])
2743
- }.to raise_error(ArgumentError)
2744
- end
2745
-
2746
- it "change name of vectors in @data" do
2747
- new_index_array = [:k, :l, :m]
2748
- @df.vectors = DaruLite::Index.new(new_index_array)
2749
-
2750
- expect(@df.data.map { |vector| vector.name }).to eq(new_index_array)
2751
- end
2752
- end
2753
-
2754
- context "#rename_vectors!" do
2755
- before do
2756
- @df = DaruLite::DataFrame.new({
2757
- a: [1,2,3,4,5],
2758
- b: [11,22,33,44,55],
2759
- c: %w(a b c d e)
2760
- })
2761
- end
2762
-
2763
- it "returns self as modified dataframe" do
2764
- expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
2765
- end
2766
-
2767
- it "re-uses rename_vectors method" do
2768
- name_map = { :a => :alpha, :c => :gamma }
2769
- expect(@df).to receive(:rename_vectors).with(name_map)
2770
- @df.rename_vectors! name_map
2771
- end
2772
- end
2773
-
2774
- context "#rename_vectors" do
2775
- before do
2776
- @df = DaruLite::DataFrame.new({
2777
- a: [1,2,3,4,5],
2778
- b: [11,22,33,44,55],
2779
- c: %w(a b c d e)
2780
- })
2781
- end
2782
-
2783
- it "returns DaruLite::Index" do
2784
- expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(DaruLite::Index)
2785
- end
2786
-
2787
- it "renames vectors using a hash map" do
2788
- @df.rename_vectors :a => :alpha, :c => :gamma
2789
- expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
924
+ it "renames vectors using a hash map" do
925
+ @df.rename_vectors :a => :alpha, :c => :gamma
926
+ expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
2790
927
  end
2791
928
 
2792
929
  it "overwrites vectors if the new name already exists" do
@@ -2832,69 +969,10 @@ describe DaruLite::DataFrame do
2832
969
  end
2833
970
  end
2834
971
 
2835
- context "#reindex" do
2836
- it "re indexes and aligns accordingly" do
2837
- df = DaruLite::DataFrame.new({
2838
- a: [1,2,3,4,5],
2839
- b: [11,22,33,44,55],
2840
- c: %w(a b c d e)
2841
- })
2842
-
2843
- ans = df.reindex(DaruLite::Index.new([1,3,0,8,2]))
2844
- expect(ans).to eq(DaruLite::DataFrame.new({
2845
- a: [2,4,1,nil,3],
2846
- b: [22,44,11,nil,33],
2847
- c: ['b','d','a',nil,'c']
2848
- }, index: DaruLite::Index.new([1,3,0,8,2])))
2849
- expect(ans).to_not eq(df)
2850
- end
2851
- end
2852
-
2853
- context "#reindex_vectors" do
2854
- it "re indexes vectors and aligns accordingly" do
2855
- df = DaruLite::DataFrame.new({
2856
- a: [1,2,3,4,5],
2857
- b: [11,22,33,44,55],
2858
- c: %w(a b c d e)
2859
- })
2860
-
2861
- ans = df.reindex_vectors(DaruLite::Index.new([:b, 'a', :a]))
2862
- expect(ans).to eq(DaruLite::DataFrame.new({
2863
- :b => [11,22,33,44,55],
2864
- 'a' => [nil, nil, nil, nil, nil],
2865
- :a => [1,2,3,4,5]
2866
- }, order: [:b, 'a', :a]))
2867
- end
2868
-
2869
- it 'raises ArgumentError if argument was not an index' do
2870
- df = DaruLite::DataFrame.new([])
2871
- expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
2872
- end
2873
- end
2874
-
2875
- context "#to_matrix" do
2876
- before do
2877
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
2878
- c: [11,22,33,44,55], d: [5,4,nil,2,1], e: ['this', 'has', 'string','data','too']},
2879
- order: [:a, :b, :c,:d,:e],
2880
- index: [:one, :two, :three, :four, :five])
2881
- end
2882
-
2883
- it "concats numeric non-nil vectors to Matrix" do
2884
- expect(@df.to_matrix).to eq(Matrix[
2885
- [1,11,11,5],
2886
- [2,12,22,4],
2887
- [3,13,33,nil],
2888
- [4,14,44,2],
2889
- [5,15,55,1]
2890
- ])
2891
- end
2892
- end
2893
-
2894
972
  context "#transpose" do
2895
973
  context DaruLite::Index do
2896
974
  it "transposes a DataFrame including row and column indexing" do
2897
- expect(@data_frame.transpose).to eq(DaruLite::DataFrame.new({
975
+ expect(df.transpose).to eq(DaruLite::DataFrame.new({
2898
976
  one: [1,11,11],
2899
977
  two: [2,12,22],
2900
978
  three: [3,13,33],
@@ -2903,533 +981,37 @@ describe DaruLite::DataFrame do
2903
981
  }, index: [:a, :b, :c],
2904
982
  order: [:one, :two, :three, :four, :five])
2905
983
  )
2906
- end
2907
- end
2908
-
2909
- context DaruLite::MultiIndex do
2910
- it "transposes a DataFrame including row and column indexing" do
2911
- expect(@df_mi.transpose).to eq(DaruLite::DataFrame.new([
2912
- @vector_arry1,
2913
- @vector_arry2,
2914
- @vector_arry1,
2915
- @vector_arry2].transpose, index: @order_mi, order: @multi_index))
2916
- end
2917
- end
2918
- end
2919
-
2920
- context "#pivot_table" do
2921
- before do
2922
- @df = DaruLite::DataFrame.new({
2923
- a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
2924
- b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
2925
- c: ['small','large','large','small','small','large','small','large','small'],
2926
- d: [1,2,2,3,3,4,5,6,7],
2927
- e: [2,4,4,6,6,8,10,12,14]
2928
- })
2929
- end
2930
-
2931
- it "creates row index as per (single) index argument and default aggregates to mean" do
2932
- expect(@df.pivot_table(index: [:a])).to eq(DaruLite::DataFrame.new({
2933
- d: [5.5,2.2],
2934
- e: [11.0,4.4]
2935
- }, index: ['bar', 'foo']))
2936
- end
2937
-
2938
- it "creates row index as per (double) index argument and default aggregates to mean" do
2939
- agg_mi = DaruLite::MultiIndex.from_tuples(
2940
- [
2941
- ['bar', 'large'],
2942
- ['bar', 'small'],
2943
- ['foo', 'large'],
2944
- ['foo', 'small']
2945
- ]
2946
- )
2947
- expect(@df.pivot_table(index: [:a, :c]).round(2)).to eq(DaruLite::DataFrame.new({
2948
- d: [5.0 , 6.0, 2.0, 2.33],
2949
- e: [10.0, 12.0, 4.0, 4.67]
2950
- }, index: agg_mi))
2951
- end
2952
-
2953
- it "creates row and vector index as per (single) index and (single) vectors args" do
2954
- agg_vectors = DaruLite::MultiIndex.from_tuples([
2955
- [:d, 'one'],
2956
- [:d, 'two'],
2957
- [:e, 'one'],
2958
- [:e, 'two']
2959
- ])
2960
- agg_index = DaruLite::MultiIndex.from_tuples(
2961
- [
2962
- ['bar'],
2963
- ['foo']
2964
- ]
2965
- )
2966
-
2967
- expect(@df.pivot_table(index: [:a], vectors: [:b]).round(2)).to eq(
2968
- DaruLite::DataFrame.new(
2969
- [
2970
- [4.5, 1.67],
2971
- [6.5, 3.0],
2972
- [9.0, 3.33],
2973
- [13, 6]
2974
- ], order: agg_vectors, index: agg_index)
2975
- )
2976
- end
2977
-
2978
- it "creates row and vector index as per (single) index and (double) vector args" do
2979
- agg_vectors = DaruLite::MultiIndex.from_tuples(
2980
- [
2981
- [:d, 'one', 'large'],
2982
- [:d, 'one', 'small'],
2983
- [:d, 'two', 'large'],
2984
- [:d, 'two', 'small'],
2985
- [:e, 'one', 'large'],
2986
- [:e, 'one', 'small'],
2987
- [:e, 'two', 'large'],
2988
- [:e, 'two', 'small']
2989
- ]
2990
- )
2991
-
2992
- agg_index = DaruLite::MultiIndex.from_tuples(
2993
- [
2994
- ['bar'],
2995
- ['foo']
2996
- ]
2997
- )
2998
-
2999
- expect(@df.pivot_table(index: [:a], vectors: [:b, :c])).to eq(DaruLite::DataFrame.new(
3000
- [
3001
- [4.0,2.0],
3002
- [5.0,1.0],
3003
- [6.0,nil],
3004
- [7.0,3.0],
3005
- [8.0,4.0],
3006
- [10.0,2.0],
3007
- [12.0,nil],
3008
- [14.0,6.0]
3009
- ], order: agg_vectors, index: agg_index
3010
- ))
3011
- end
3012
-
3013
- it "creates row and vector index with (double) index and (double) vector args" do
3014
- agg_index = DaruLite::MultiIndex.from_tuples([
3015
- ['bar', 4],
3016
- ['bar', 5],
3017
- ['bar', 6],
3018
- ['bar', 7],
3019
- ['foo', 1],
3020
- ['foo', 2],
3021
- ['foo', 3]
3022
- ])
3023
-
3024
- agg_vectors = DaruLite::MultiIndex.from_tuples([
3025
- [:e, 'one', 'large'],
3026
- [:e, 'one', 'small'],
3027
- [:e, 'two', 'large'],
3028
- [:e, 'two', 'small']
3029
- ])
3030
-
3031
- expect(@df.pivot_table(index: [:a, :d], vectors: [:b, :c])).to eq(
3032
- DaruLite::DataFrame.new(
3033
- [
3034
- [8 ,nil,nil,nil,nil, 4,nil],
3035
- [nil, 10,nil,nil, 2,nil,nil],
3036
- [nil,nil, 12,nil,nil,nil,nil],
3037
- [nil,nil,nil, 14,nil,nil, 6],
3038
- ], index: agg_index, order: agg_vectors)
3039
- )
3040
- end
3041
-
3042
- it "only aggregates over the vector specified in the values argument" do
3043
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3044
- [
3045
- [:e, 'one', 'large'],
3046
- [:e, 'one', 'small'],
3047
- [:e, 'two', 'large'],
3048
- [:e, 'two', 'small']
3049
- ]
3050
- )
3051
- agg_index = DaruLite::MultiIndex.from_tuples(
3052
- [
3053
- ['bar'],
3054
- ['foo']
3055
- ]
3056
- )
3057
- expect(@df.pivot_table(index: [:a], vectors: [:b, :c], values: :e)).to eq(
3058
- DaruLite::DataFrame.new(
3059
- [
3060
- [8, 4],
3061
- [10, 2],
3062
- [12,nil],
3063
- [14, 6]
3064
- ], order: agg_vectors, index: agg_index
3065
- )
3066
- )
3067
-
3068
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3069
- [
3070
- [:d, 'one'],
3071
- [:d, 'two'],
3072
- [:e, 'one'],
3073
- [:e, 'two']
3074
- ]
3075
- )
3076
- expect(@df.pivot_table(index: [:a], vectors: [:b], values: [:d, :e])).to eq(
3077
- DaruLite::DataFrame.new(
3078
- [
3079
- [4.5, 5.0/3],
3080
- [6.5, 3.0],
3081
- [9.0, 10.0/3],
3082
- [13.0, 6.0]
3083
- ], order: agg_vectors, index: agg_index
3084
- )
3085
- )
3086
- end
3087
-
3088
- it "overrides default aggregate function to aggregate over sum" do
3089
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3090
- [
3091
- [:e, 'one', 'large'],
3092
- [:e, 'one', 'small'],
3093
- [:e, 'two', 'large'],
3094
- [:e, 'two', 'small']
3095
- ]
3096
- )
3097
- agg_index = DaruLite::MultiIndex.from_tuples(
3098
- [
3099
- ['bar'],
3100
- ['foo']
3101
- ]
3102
- )
3103
- expect(@df.pivot_table(index: [:a], vectors: [:b, :c], values: :e, agg: :sum)).to eq(
3104
- DaruLite::DataFrame.new(
3105
- [
3106
- [8, 8],
3107
- [10, 2],
3108
- [12,nil],
3109
- [14, 12]
3110
- ], order: agg_vectors, index: agg_index
3111
- )
3112
- )
3113
- end
3114
-
3115
- it "raises error if no non-numeric vectors are present" do
3116
- df = DaruLite::DataFrame.new({a: ['a', 'b', 'c'], b: ['b', 'e', 'd']})
3117
- expect {
3118
- df.pivot_table(index: [:a])
3119
- }.to raise_error
3120
- end
3121
-
3122
- it "raises error if atleast a row index is not specified" do
3123
- expect {
3124
- @df.pivot_table
3125
- }.to raise_error
3126
- end
3127
-
3128
- it "aggregates when nils are present in value vector" do
3129
- df = DaruLite::DataFrame.new({
3130
- a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
3131
- b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
3132
- c: ['small','large','large','small','small','large','small','large','small'],
3133
- d: [1,2,2,3,3,4,5,6,7],
3134
- e: [2,nil,4,6,6,8,10,12,nil]
3135
- })
3136
-
3137
- expect(df.pivot_table index: [:a]).to eq(
3138
- DaruLite::DataFrame.new({
3139
- d: [5.0, 2.2, 7],
3140
- e: [10.0, 4.5, nil]
3141
- }, index: DaruLite::Index.new(['bar', 'foo', 'ice'])))
3142
- end
3143
-
3144
- it "works when nils are present in value vector" do
3145
- df = DaruLite::DataFrame.new({
3146
- a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
3147
- b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
3148
- c: ['small','large','large','small','small','large','small','large','small'],
3149
- d: [1,2,2,3,3,4,5,6,7],
3150
- e: [2,nil,4,6,6,8,10,12,nil]
3151
- })
3152
-
3153
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3154
- [
3155
- [:e, 'one'],
3156
- [:e, 'two']
3157
- ]
3158
- )
3159
-
3160
- agg_index = DaruLite::MultiIndex.from_tuples(
3161
- [
3162
- ['bar'],
3163
- ['foo'],
3164
- ['ice']
3165
- ]
3166
- )
3167
-
3168
- expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
3169
- DaruLite::DataFrame.new(
3170
- [
3171
- [9, 3, nil],
3172
- [12, 6, nil]
3173
- ], order: agg_vectors, index: agg_index
3174
- )
3175
- )
3176
- end
3177
-
3178
- it 'performs date pivoting' do
3179
- categories = %i[jan feb mar apr may jun jul aug sep oct nov dec]
3180
- df = DaruLite::DataFrame.rows([
3181
- [2014, 2, 1600.0, 20.0],
3182
- [2014, 3, 1680.0, 21.0],
3183
- [2016, 2, 1600.0, 20.0],
3184
- [2016, 4, 1520.0, 19.0],
3185
- ], order: [:year, :month, :visitors, :days])
3186
- df[:averages] = df[:visitors] / df[:days]
3187
- df[:month] = df[:month].map{|i| categories[i - 1]}
3188
- actual = df.pivot_table(index: :month, vectors: [:year], values: :averages)
3189
-
3190
- # NB: As you can see, there are some "illogical" parts:
3191
- # months are sorted lexicographically, then made into multi-index
3192
- # with one-element-per-tuple, then order of columns is dependent
3193
- # on which month is lexicographically first (its apr, so, apr-2016
3194
- # is first row to gather, so 2016 is first column).
3195
- #
3196
- # All of it is descendance of our group_by implementation (which
3197
- # always sorts results & always make array keys). I hope that fixing
3198
- # group_by, even to the extend described at https://github.com/v0dro/daru/issues/152,
3199
- # will be fix this case also.
3200
- expected =
3201
- DaruLite::DataFrame.new(
3202
- [
3203
- [80.0, 80.0, nil],
3204
- [nil, 80.0, 80.0],
3205
- ], index: DaruLite::MultiIndex.from_tuples([[:apr], [:feb], [:mar]]),
3206
- order: DaruLite::MultiIndex.from_tuples([[:averages, 2016], [:averages, 2014]])
3207
- )
3208
- # Comparing their parts previous to full comparison allows to
3209
- # find complicated differences.
3210
- expect(actual.vectors).to eq expected.vectors
3211
- expect(actual.index).to eq expected.index
3212
- expect(actual).to eq expected
3213
- end
3214
- end
3215
-
3216
- context "#shape" do
3217
- it "returns an array containing number of rows and columns" do
3218
- expect(@data_frame.shape).to eq([5,3])
3219
- end
3220
- end
3221
-
3222
- context "#nest" do
3223
- it "nests in a hash" do
3224
- df = DaruLite::DataFrame.new({
3225
- :a => DaruLite::Vector.new(%w(a a a b b b)),
3226
- :b => DaruLite::Vector.new(%w(c c d d e e)),
3227
- :c => DaruLite::Vector.new(%w(f g h i j k))
3228
- })
3229
- nest = df.nest :a, :b
3230
- expect(nest['a']['c']).to eq([{ :c => 'f' }, { :c => 'g' }])
3231
- expect(nest['a']['d']).to eq([{ :c => 'h' }])
3232
- expect(nest['b']['e']).to eq([{ :c => 'j' }, { :c => 'k' }])
3233
- end
3234
- end
3235
-
3236
- context "#summary" do
3237
- subject { df.summary }
3238
-
3239
- context "DataFrame" do
3240
- let(:df) { DaruLite::DataFrame.new({a: [1,2,5], b: [1,2,"string"]}, order: [:a, :b], index: [:one, :two, :three], name: 'frame') }
3241
- it { is_expected.to eq %Q{
3242
- |= frame
3243
- | Number of rows: 3
3244
- | Element:[a]
3245
- | == a
3246
- | n :3
3247
- | non-missing:3
3248
- | median: 2
3249
- | mean: 2.6667
3250
- | std.dev.: 2.0817
3251
- | std.err.: 1.2019
3252
- | skew: 0.2874
3253
- | kurtosis: -2.3333
3254
- | Element:[b]
3255
- | == b
3256
- | n :3
3257
- | non-missing:3
3258
- | factors: 1,2,string
3259
- | mode: 1,2,string
3260
- | Distribution
3261
- | 1 1 100.00%
3262
- | 2 1 100.00%
3263
- | string 1 100.00%
3264
- }.unindent }
3265
- end
3266
- end
3267
-
3268
- context '#to_df' do
3269
- it 'returns the dataframe' do
3270
- @data_frame.to_df == @data_frame
3271
- end
3272
- end
3273
-
3274
- context "#merge" do
3275
- it "merges one dataframe with another" do
3276
- a = DaruLite::Vector.new [1, 2, 3]
3277
- b = DaruLite::Vector.new [3, 4, 5]
3278
- c = DaruLite::Vector.new [4, 5, 6]
3279
- d = DaruLite::Vector.new [7, 8, 9]
3280
- e = DaruLite::Vector.new [10, 20, 30]
3281
- ds1 = DaruLite::DataFrame.new({ :a => a, :b => b })
3282
- ds2 = DaruLite::DataFrame.new({ :c => c, :d => d })
3283
- exp = DaruLite::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
3284
-
3285
- expect(ds1.merge(ds2)).to eq(exp)
3286
- expect(ds2.merge(ds1)).to eq(
3287
- DaruLite::DataFrame.new({c: c, d: d, a: a, b: b}, order: [:c, :d, :a, :b]))
3288
-
3289
- ds3 = DaruLite::DataFrame.new({ :a => e })
3290
- exp = DaruLite::DataFrame.new({ :a_1 => a, :a_2 => e, :b => b },
3291
- order: [:a_1, :b, :a_2])
3292
-
3293
- expect(ds1.merge(ds3)).to eq(exp)
3294
- end
3295
-
3296
- context "preserves type of vector names" do
3297
- let(:df1) { DaruLite::DataFrame.new({'a'=> [1, 2, 3]}) }
3298
- let(:df2) { DaruLite::DataFrame.new({:b=> [4, 5, 6]}) }
3299
- subject { df1.merge df2 }
3300
-
3301
- it { is_expected.to be_a DaruLite::DataFrame }
3302
- it { expect(subject['a'].to_a).to eq [1, 2, 3] }
3303
- it { expect(subject[:b].to_a).to eq [4, 5, 6] }
3304
- end
3305
-
3306
- context "preserves indices for dataframes with same index" do
3307
- let(:index) { ['one','two','three'] }
3308
- let(:df1) { DaruLite::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
3309
- let(:df2) { DaruLite::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
3310
- subject { df1.merge df2 }
3311
-
3312
- its(:index) { is_expected.to eq DaruLite::Index.new(index) }
3313
- end
3314
- end
3315
-
3316
- context "#vector_by_calculation" do
3317
- it "DSL for returning vector of each calculation" do
3318
- a1 = DaruLite::Vector.new([1, 2, 3, 4, 5, 6, 7])
3319
- a2 = DaruLite::Vector.new([10, 20, 30, 40, 50, 60, 70])
3320
- a3 = DaruLite::Vector.new([100, 200, 300, 400, 500, 600, 700])
3321
- ds = DaruLite::DataFrame.new({ :a => a1, :b => a2, :c => a3 })
3322
- total = ds.vector_by_calculation { a + b + c }
3323
- expected = DaruLite::Vector.new([111, 222, 333, 444, 555, 666, 777])
3324
- expect(total).to eq(expected)
3325
- end
3326
- end
3327
-
3328
- context "group_by" do
3329
- context "on a single row DataFrame" do
3330
- let(:df){ DaruLite::DataFrame.new(city: %w[Kyiv], year: [2015], value: [1]) }
3331
- it "returns a groupby object" do
3332
- expect(df.group_by([:city])).to be_a(DaruLite::Core::GroupBy)
3333
- end
3334
- it "has the correct index" do
3335
- expect(df.group_by([:city]).groups).to eq({["Kyiv"]=>[0]})
3336
- end
3337
- end
3338
- end
3339
-
3340
- context "#vector_sum" do
3341
- before do
3342
- a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil, nil]
3343
- a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30, nil]
3344
- b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2, nil]
3345
- b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3, nil]
3346
- @df = DaruLite::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2 })
3347
- end
3348
-
3349
- it "calculates complete vector sum" do
3350
- expect(@df.vector_sum).to eq(DaruLite::Vector.new [nil, 15, 26, nil, 28, nil, nil])
3351
- end
3352
-
3353
- it "ignores nils if skipnil is true" do
3354
- expect(@df.vector_sum skipnil: true).to eq(DaruLite::Vector.new [13, 15, 26, 25, 28, 35, 0])
3355
- end
3356
-
3357
- it "calculates partial vector sum" do
3358
- a = @df.vector_sum([:a1, :a2])
3359
- b = @df.vector_sum([:b1, :b2])
3360
-
3361
- expect(a).to eq(DaruLite::Vector.new [11, 12, 23, 24, 25, nil, nil])
3362
- expect(b).to eq(DaruLite::Vector.new [nil, 3, 3, nil, 3, 5, nil])
3363
- end
3364
- end
3365
-
3366
- context "#missing_values_rows" do
3367
- it "returns number of missing values in each row" do
3368
- a1 = DaruLite::Vector.new [1, nil, 3, 4, 5, nil]
3369
- a2 = DaruLite::Vector.new [10, nil, 20, 20, 20, 30]
3370
- b1 = DaruLite::Vector.new [nil, nil, 1, 1, 1, 2]
3371
- b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
3372
- c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
3373
- df = DaruLite::DataFrame.new({
3374
- :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3375
-
3376
- expect(df.missing_values_rows).to eq(DaruLite::Vector.new [2, 3, 0, 1, 0, 1])
984
+ end
3377
985
  end
3378
- end
3379
986
 
3380
- context "#vector_count_characters" do
3381
- it "" do
3382
- a1 = DaruLite::Vector.new( [1, 'abcde', 3, 4, 5, nil])
3383
- a2 = DaruLite::Vector.new( [10, 20.3, 20, 20, 20, 30])
3384
- b1 = DaruLite::Vector.new( [nil, '343434', 1, 1, 1, 2])
3385
- b2 = DaruLite::Vector.new( [2, 2, 2, nil, 2, 3])
3386
- c = DaruLite::Vector.new([nil, 2, 'This is a nice example', 2, 2, 2])
3387
- ds = DaruLite::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3388
-
3389
- expect(ds.vector_count_characters).to eq(DaruLite::Vector.new([4, 17, 27, 5, 6, 5]))
987
+ context DaruLite::MultiIndex do
988
+ it "transposes a DataFrame including row and column indexing" do
989
+ expect(df_mi.transpose).to eq(DaruLite::DataFrame.new([
990
+ vector_arry1,
991
+ vector_arry2,
992
+ vector_arry1,
993
+ vector_arry2].transpose, index: order_mi, order: multi_index))
994
+ end
3390
995
  end
3391
996
  end
3392
997
 
3393
- context '#include_values?' do
3394
- let(:df) do
3395
- DaruLite::DataFrame.new({
3396
- a: [1, 2, 3, 4, Float::NAN, 6, 1],
3397
- b: [:a, :b, nil, Float::NAN, nil, 3, 5],
3398
- c: ['a', 6, 3, 4, 3, 5, 3],
3399
- d: [1, 2, 3, 5, 1, 2, 5]
3400
- })
3401
- end
3402
- before { df.to_category :b }
3403
-
3404
- context 'true' do
3405
- it { expect(df.include_values? nil).to eq true }
3406
- it { expect(df.include_values? Float::NAN).to eq true }
3407
- it { expect(df.include_values? nil, Float::NAN).to eq true }
3408
- it { expect(df.include_values? 1, 30).to eq true }
3409
- end
3410
-
3411
- context 'false' do
3412
- it { expect(df[:a, :c].include_values? nil).to eq false }
3413
- it { expect(df[:c, :d].include_values? Float::NAN).to eq false }
3414
- it { expect(df[:c, :d].include_values? nil, Float::NAN).to eq false }
3415
- it { expect(df.include_values? 10, 20).to eq false }
998
+ context "#shape" do
999
+ it "returns an array containing number of rows and columns" do
1000
+ expect(df.shape).to eq([5,3])
3416
1001
  end
3417
1002
  end
3418
1003
 
3419
- context "#vector_mean" do
3420
- before do
3421
- a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil]
3422
- a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30]
3423
- b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2]
3424
- b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
3425
- c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
3426
- @df = DaruLite::DataFrame.new({
3427
- :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3428
- end
3429
-
3430
- it "calculates complete vector mean" do
3431
- expect(@df.vector_mean).to eq(
3432
- DaruLite::Vector.new [nil, 3.4, 6, nil, 6.0, nil])
1004
+ context "#nest" do
1005
+ it "nests in a hash" do
1006
+ df = DaruLite::DataFrame.new({
1007
+ :a => DaruLite::Vector.new(%w(a a a b b b)),
1008
+ :b => DaruLite::Vector.new(%w(c c d d e e)),
1009
+ :c => DaruLite::Vector.new(%w(f g h i j k))
1010
+ })
1011
+ nest = df.nest :a, :b
1012
+ expect(nest['a']['c']).to eq([{ :c => 'f' }, { :c => 'g' }])
1013
+ expect(nest['a']['d']).to eq([{ :c => 'h' }])
1014
+ expect(nest['b']['e']).to eq([{ :c => 'j' }, { :c => 'k' }])
3433
1015
  end
3434
1016
  end
3435
1017
 
@@ -3473,64 +1055,6 @@ describe DaruLite::DataFrame do
3473
1055
  end
3474
1056
  end
3475
1057
 
3476
- context "#verify" do
3477
- def create_test(*args, &proc)
3478
- description = args.shift
3479
- fields = args
3480
- [description, fields, proc]
3481
- end
3482
-
3483
- before do
3484
- name = DaruLite::Vector.new %w(r1 r2 r3 r4)
3485
- v1 = DaruLite::Vector.new [1, 2, 3, 4]
3486
- v2 = DaruLite::Vector.new [4, 3, 2, 1]
3487
- v3 = DaruLite::Vector.new [10, 20, 30, 40]
3488
- v4 = DaruLite::Vector.new %w(a b a b)
3489
- @df = DaruLite::DataFrame.new({
3490
- :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :id => name
3491
- }, order: [:v1, :v2, :v3, :v4, :id])
3492
- end
3493
-
3494
- it "correctly verifies data as per the block" do
3495
- # Correct
3496
- t1 = create_test('If v4=a, v1 odd') do |r|
3497
- r[:v4] == 'b' or (r[:v4] == 'a' and r[:v1].odd?)
3498
- end
3499
- t2 = create_test('v3=v1*10') { |r| r[:v3] == r[:v1] * 10 }
3500
- # Fail!
3501
- t3 = create_test("v4='b'") { |r| r[:v4] == 'b' }
3502
- exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
3503
- exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
3504
-
3505
- dataf = @df.verify(t3, t1, t2)
3506
- expect(dataf).to eq(exp1)
3507
- end
3508
-
3509
- it "uses additional fields to extend error messages" do
3510
- t = create_test("v4='b'", :v2, :v3) { |r| r[:v4] == 'b' }
3511
-
3512
- dataf = @df.verify(:id, t)
3513
- expect(dataf).to eq(["1 [r1]: v4='b' (v2=4, v3=10)", "3 [r3]: v4='b' (v2=2, v3=30)"])
3514
- end
3515
- end
3516
-
3517
- context "#compute" do
3518
- it "performs a computation when supplied in a string" do
3519
- v1 = DaruLite::Vector.new [1, 2, 3, 4]
3520
- v2 = DaruLite::Vector.new [4, 3, 2, 1]
3521
- v3 = DaruLite::Vector.new [10, 20, 30, 40]
3522
- vnumeric = DaruLite::Vector.new [0, 0, 1, 4]
3523
- vsum = DaruLite::Vector.new [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0]
3524
- vmult = DaruLite::Vector.new [1 * 4, 2 * 3, 3 * 2, 4 * 1]
3525
-
3526
- df = DaruLite::DataFrame.new({:v1 => v1, :v2 => v2, :v3 => v3})
3527
-
3528
- expect(df.compute("v1/v2")).to eq(vnumeric)
3529
- expect(df.compute("v1+v2+v3")).to eq(vsum)
3530
- expect(df.compute("v1*v2")).to eq(vmult)
3531
- end
3532
- end
3533
-
3534
1058
  context ".crosstab_by_assignation" do
3535
1059
  it "" do
3536
1060
  v1 = DaruLite::Vector.new %w(a a a b b b c c c)
@@ -3554,394 +1078,6 @@ describe DaruLite::DataFrame do
3554
1078
  end
3555
1079
  end
3556
1080
 
3557
- context "#one_to_many" do
3558
- it "" do
3559
- rows = [
3560
- ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
3561
- ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
3562
- ['3', 'alfred', nil, nil, nil, nil, nil, nil]
3563
- ]
3564
-
3565
- df = DaruLite::DataFrame.rows(rows,
3566
- order: ['id', 'name', 'car_color1', 'car_value1', 'car_color2',
3567
- 'car_value2', 'car_color3', 'car_value3'])
3568
-
3569
- ids = DaruLite::Vector.new %w(1 1 2 2 2)
3570
- colors = DaruLite::Vector.new %w(red blue green orange white)
3571
- values = DaruLite::Vector.new [10, 20, 15, 30, 20]
3572
- col_ids = DaruLite::Vector.new [1, 2, 1, 2, 3]
3573
-
3574
- df_expected = DaruLite::DataFrame.new({
3575
- 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values
3576
- }, order: ['id', '_col_id', 'color', 'value'])
3577
-
3578
- expect(df.one_to_many(['id'], 'car_%v%n')).to eq(df_expected)
3579
- end
3580
- end
3581
-
3582
- context "#any?" do
3583
- before do
3584
- @df = DaruLite::DataFrame.new({
3585
- a: [1,2,3,4,5],
3586
- b: [10,20,30,40,50],
3587
- c: [11,22,33,44,55]})
3588
- end
3589
-
3590
- it "returns true if any one of the vectors satisfy condition" do
3591
- expect(@df.any? { |v| v[0] == 1 }).to eq(true)
3592
- end
3593
-
3594
- it "returns false if none of the vectors satisfy the condition" do
3595
- expect(@df.any? { |v| v.mean > 100 }).to eq(false)
3596
- end
3597
-
3598
- it "returns true if any one of the rows satisfy condition" do
3599
- expect(@df.any?(:row) { |r| r[:a] == 1 and r[:c] == 11 }).to eq(true)
3600
- end
3601
-
3602
- it "returns false if none of the rows satisfy the condition" do
3603
- expect(@df.any?(:row) { |r| r.mean > 100 }).to eq(false)
3604
- end
3605
-
3606
- it 'fails on unknown axis' do
3607
- expect { @df.any?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3608
- end
3609
- end
3610
-
3611
- context "#all?" do
3612
- before do
3613
- @df = DaruLite::DataFrame.new({
3614
- a: [1,2,3,4,5],
3615
- b: [10,20,30,40,50],
3616
- c: [11,22,33,44,55]})
3617
- end
3618
-
3619
- it "returns true if all of the vectors satisfy condition" do
3620
- expect(@df.all? { |v| v.mean < 40 }).to eq(true)
3621
- end
3622
-
3623
- it "returns false if any one of the vectors does not satisfy condition" do
3624
- expect(@df.all? { |v| v.mean == 30 }).to eq(false)
3625
- end
3626
-
3627
- it "returns true if all of the rows satisfy condition" do
3628
- expect(@df.all?(:row) { |r| r.mean < 70 }).to eq(true)
3629
- end
3630
-
3631
- it "returns false if any one of the rows does not satisfy condition" do
3632
- expect(@df.all?(:row) { |r| r.mean == 30 }).to eq(false)
3633
- end
3634
-
3635
- it 'fails on unknown axis' do
3636
- expect { @df.all?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3637
- end
3638
- end
3639
-
3640
- context "#only_numerics" do
3641
- before do
3642
- @v1 = DaruLite::Vector.new([1,2,3,4,5])
3643
- @v2 = DaruLite::Vector.new(%w(one two three four five))
3644
- @v3 = DaruLite::Vector.new([11,22,33,44,55])
3645
- @df = DaruLite::DataFrame.new({
3646
- a: @v1, b: @v2, c: @v3 }, clone: false)
3647
- end
3648
-
3649
- it "returns a view of only the numeric vectors" do
3650
- dfon = @df.only_numerics(clone: false)
3651
-
3652
- expect(dfon).to eq(
3653
- DaruLite::DataFrame.new({ a: @v1, c: @v3 }, clone: false))
3654
- expect(dfon[:a].object_id).to eq(@v1.object_id)
3655
- end
3656
-
3657
- it "returns a clone of numeric vectors" do
3658
- dfon = @df.only_numerics
3659
-
3660
- expect(dfon).to eq(
3661
- DaruLite::DataFrame.new({ a: @v1, c: @v3}, clone: false)
3662
- )
3663
- expect(dfon[:a].object_id).to_not eq(@v1.object_id)
3664
- end
3665
-
3666
- context DaruLite::MultiIndex do
3667
- before do
3668
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3669
- [
3670
- [:d, :one, :large],
3671
- [:d, :one, :small],
3672
- [:d, :two, :large],
3673
- [:d, :two, :small],
3674
- [:e, :one, :large],
3675
- [:e, :one, :small],
3676
- [:e, :two, :large],
3677
- [:e, :two, :small]
3678
- ]
3679
- )
3680
-
3681
- agg_index = DaruLite::MultiIndex.from_tuples(
3682
- [
3683
- [:bar],
3684
- [:foo]
3685
- ]
3686
- )
3687
- @df = DaruLite::DataFrame.new(
3688
- [
3689
- [4.112,2.234],
3690
- %w(a b),
3691
- [6.342,nil],
3692
- [7.2344,3.23214],
3693
- [8.234,4.533],
3694
- [10.342,2.3432],
3695
- [12.0,nil],
3696
- %w(a b)
3697
- ], order: agg_vectors, index: agg_index
3698
- )
3699
- end
3700
-
3701
- it "returns numeric vectors" do
3702
- vectors = DaruLite::MultiIndex.from_tuples(
3703
- [
3704
- [:d, :one, :large],
3705
- [:d, :two, :large],
3706
- [:d, :two, :small],
3707
- [:e, :one, :large],
3708
- [:e, :one, :small],
3709
- [:e, :two, :large]
3710
- ]
3711
- )
3712
-
3713
- index = DaruLite::MultiIndex.from_tuples(
3714
- [
3715
- [:bar],
3716
- [:foo]
3717
- ]
3718
- )
3719
- answer = DaruLite::DataFrame.new(
3720
- [
3721
- [4.112,2.234],
3722
- [6.342,nil],
3723
- [7.2344,3.23214],
3724
- [8.234,4.533],
3725
- [10.342,2.3432],
3726
- [12.0,nil],
3727
- ], order: vectors, index: index
3728
- )
3729
-
3730
- expect(@df.only_numerics).to eq(answer)
3731
- end
3732
- end
3733
- end
3734
-
3735
- context '#reset_index' do
3736
- context 'when Index' do
3737
- subject do
3738
- DaruLite::DataFrame.new(
3739
- {'vals' => [1,2,3,4,5]},
3740
- index: DaruLite::Index.new(%w[a b c d e], name: 'indices')
3741
- ).reset_index
3742
- end
3743
-
3744
- it { is_expected.to eq DaruLite::DataFrame.new(
3745
- 'indices' => %w[a b c d e],
3746
- 'vals' => [1,2,3,4,5]
3747
- )}
3748
- end
3749
-
3750
- context 'when MultiIndex' do
3751
- subject do
3752
- mi = DaruLite::MultiIndex.from_tuples([
3753
- [0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
3754
- ])
3755
- mi.name = %w[nums alphas]
3756
- DaruLite::DataFrame.new(
3757
- {'vals' => [1,2,3,4]},
3758
- index: mi
3759
- ).reset_index
3760
- end
3761
-
3762
- it { is_expected.to eq DaruLite::DataFrame.new(
3763
- 'nums' => [0,0,1,1],
3764
- 'alphas' => %w[a b a b],
3765
- 'vals' => [1,2,3,4]
3766
- )}
3767
- end
3768
- end
3769
-
3770
- context "#set_index" do
3771
- before(:each) do
3772
- @df = DaruLite::DataFrame.new({
3773
- a: [1,2,3,4,5],
3774
- b: ['a','b','c','d','e'],
3775
- c: [11,22,33,44,55]
3776
- })
3777
- end
3778
-
3779
- it "sets a particular column as the index and deletes that column" do
3780
- @df.set_index(:b)
3781
- expect(@df).to eq(
3782
- DaruLite::DataFrame.new({
3783
- a: [1,2,3,4,5],
3784
- c: [11,22,33,44,55]
3785
- }, index: ['a','b','c','d','e'])
3786
- )
3787
- end
3788
-
3789
- it "sets a particular column as index but keeps that column" do
3790
- expect(@df.set_index(:c, keep: true)).to eq(
3791
- DaruLite::DataFrame.new({
3792
- a: [1,2,3,4,5],
3793
- b: ['a','b','c','d','e'],
3794
- c: [11,22,33,44,55]
3795
- }, index: [11,22,33,44,55]))
3796
- expect(@df[:c]).to eq(@df[:c])
3797
- end
3798
-
3799
- it "sets categorical index if categorical is true" do
3800
- data = {
3801
- a: [1, 2, 3, 4, 5],
3802
- b: [:a, 1, :a, 1, 'c'],
3803
- c: %w[a b c d e]
3804
- }
3805
- df = DaruLite::DataFrame.new(data)
3806
- df.set_index(:b, categorical: true)
3807
- expected = DaruLite::DataFrame.new(
3808
- data.slice(:a, :c),
3809
- index: DaruLite::CategoricalIndex.new(data[:b])
3810
- )
3811
- expect(df).to eq(expected)
3812
- end
3813
-
3814
- it "raises error if all elements in the column aren't unique" do
3815
- jholu = DaruLite::DataFrame.new({
3816
- a: ['a','b','a'],
3817
- b: [1,2,4]
3818
- })
3819
-
3820
- expect {
3821
- jholu.set_index(:a)
3822
- }.to raise_error(ArgumentError)
3823
- end
3824
-
3825
- it "sets multiindex if array is given" do
3826
- df = DaruLite::DataFrame.new({
3827
- a: %w[a a b b],
3828
- b: [1, 2, 1, 2],
3829
- c: %w[a b c d]
3830
- })
3831
- df.set_index(%i[a b])
3832
- expected =
3833
- DaruLite::DataFrame.new(
3834
- { c: %w[a b c d] },
3835
- index: DaruLite::MultiIndex.from_tuples(
3836
- [['a', 1], ['a', 2], ['b', 1], ['b', 2]]
3837
- )
3838
- ).tap do |df|
3839
- df.index.name = %i[a b]
3840
- df
3841
- end
3842
- expect(df).to eq(expected)
3843
- end
3844
- end
3845
-
3846
- context "#concat" do
3847
- before do
3848
- @df1 = DaruLite::DataFrame.new({
3849
- a: [1, 2, 3],
3850
- b: [1, 2, 3]
3851
- })
3852
-
3853
- @df2 = DaruLite::DataFrame.new({
3854
- a: [4, 5, 6],
3855
- c: [4, 5, 6]
3856
- })
3857
- end
3858
-
3859
- it 'does not modify the original dataframes' do
3860
- df1_a = @df1[:a].to_a.dup
3861
- df2_a = @df2[:a].to_a.dup
3862
-
3863
- df_concat = @df1.concat @df2
3864
- expect(@df1[:a].to_a).to eq df1_a
3865
- expect(@df2[:a].to_a).to eq df2_a
3866
- end
3867
-
3868
- it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
3869
- df1_a = @df1[:a].to_a.dup
3870
- df2_a = @df2[:a].to_a.dup
3871
-
3872
- df_concat = @df1.concat @df2
3873
- expect(df_concat[:a].to_a).to eq df1_a + df2_a
3874
- end
3875
-
3876
- it 'fills in missing vectors with nils' do
3877
- df1_b = @df1[:b].to_a.dup
3878
- df2_c = @df2[:c].to_a.dup
3879
-
3880
- df_concat = @df1.concat @df2
3881
- expect(df_concat[:b].to_a).to eq df1_b + [nil] * @df2.size
3882
- expect(df_concat[:c].to_a).to eq [nil] * @df1.size + df2_c
3883
- end
3884
-
3885
- end
3886
-
3887
- context "#union" do
3888
- before do
3889
- @df1 = DaruLite::DataFrame.new({
3890
- a: [1, 2, 3],
3891
- b: [1, 2, 3]},
3892
- index: [1,3,5] )
3893
-
3894
- @df2 = DaruLite::DataFrame.new({
3895
- a: [4, 5, 6],
3896
- c: [4, 5, 6]},
3897
- index: [7,9,11])
3898
-
3899
- @df3 = DaruLite::DataFrame.new({
3900
- a: [4, 5, 6],
3901
- c: [4, 5, 6]},
3902
- index: [5,7,9])
3903
- end
3904
-
3905
- it 'does not modify the original dataframes' do
3906
- df1_a = @df1[:a].to_a.dup
3907
- df2_a = @df2[:a].to_a.dup
3908
-
3909
- _ = @df1.union @df2
3910
- expect(@df1[:a].to_a).to eq df1_a
3911
- expect(@df2[:a].to_a).to eq df2_a
3912
- end
3913
-
3914
- it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
3915
- df1_a = @df1[:a].to_a.dup
3916
- df2_a = @df2[:a].to_a.dup
3917
-
3918
- df_union = @df1.union @df2
3919
- expect(df_union[:a].to_a).to eq df1_a + df2_a
3920
- end
3921
-
3922
- it 'fills in missing vectors with nils' do
3923
- df1_b = @df1[:b].to_a.dup
3924
- df2_c = @df2[:c].to_a.dup
3925
-
3926
- df_union = @df1.union @df2
3927
- expect(df_union[:b].to_a).to eq df1_b + [nil] * @df2.size
3928
- expect(df_union[:c].to_a).to eq [nil] * @df1.size + df2_c
3929
- end
3930
-
3931
- it 'overwrites part of the first dataframe if there are double indices' do
3932
- vec = DaruLite::Vector.new({a: 4, b: nil, c: 4})
3933
- expect(@df1.union(@df3).row[5]).to eq vec
3934
- end
3935
-
3936
- it 'concats the indices' do
3937
- v1 = @df1.index.to_a
3938
- v2 = @df2.index.to_a
3939
-
3940
- df_union = @df1.union @df2
3941
- expect(df_union.index.to_a).to eq v1 + v2
3942
- end
3943
- end
3944
-
3945
1081
  context '#inspect' do
3946
1082
  subject { df.inspect }
3947
1083
 
@@ -4021,6 +1157,18 @@ describe DaruLite::DataFrame do
4021
1157
  }.unindent}
4022
1158
  end
4023
1159
 
1160
+ context 'with integers as vectors names' do
1161
+ let(:df) { DaruLite::DataFrame.new({ 1 => [1,2,3], b: [3,4,5], c: [6,7,8] }, name: 'test')}
1162
+
1163
+ it { is_expected.to eq %Q{
1164
+ |#<DaruLite::DataFrame: test (3x3)>
1165
+ | 1 b c
1166
+ | 0 1 3 6
1167
+ | 1 2 4 7
1168
+ | 2 3 5 8
1169
+ }.unindent}
1170
+ end
1171
+
4024
1172
  context 'very long' do
4025
1173
  let(:df) { DaruLite::DataFrame.new({a: [1,1,1]*20, b: [1,1,1]*20, c: [1,1,1]*20}, name: 'test')}
4026
1174
  it { is_expected.to eq %Q{
@@ -4110,189 +1258,6 @@ describe DaruLite::DataFrame do
4110
1258
  end
4111
1259
  end
4112
1260
 
4113
- context '#to_s' do
4114
- it 'produces a class, size description' do
4115
- expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame(5x3)>"
4116
- end
4117
-
4118
- it 'produces a class, name, size description' do
4119
- @data_frame.name = "Test"
4120
- expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame: Test(5x3)>"
4121
- end
4122
-
4123
- it 'produces a class, name, size description when the name is a symbol' do
4124
- @data_frame.name = :Test
4125
- expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame: Test(5x3)>"
4126
- end
4127
- end
4128
-
4129
- context '#to_json' do
4130
- let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, index: [:one, :two, :three], name: 'test')}
4131
- subject { JSON.parse(json) }
4132
-
4133
- context 'with index' do
4134
- let(:json) { df.to_json(false) }
4135
- # FIXME: is it most reasonable we can do?.. -- zverok
4136
- # For me, more resonable thing would be something like
4137
- #
4138
- # [
4139
- # {"index" => "one" , "a"=>1, "b"=>3, "c"=>6},
4140
- # {"index" => "two" , "a"=>2, "b"=>4, "c"=>7},
4141
- # {"index" => "three", "a"=>3, "b"=>5, "c"=>8}
4142
- # ]
4143
- #
4144
- # Or maybe
4145
- #
4146
- # [
4147
- # ["one" , {"a"=>1, "b"=>3, "c"=>6}],
4148
- # ["two" , {"a"=>2, "b"=>4, "c"=>7}],
4149
- # ["three", {"a"=>3, "b"=>5, "c"=>8}]
4150
- # ]
4151
- #
4152
- # Or even
4153
- #
4154
- # {
4155
- # "one" => {"a"=>1, "b"=>3, "c"=>6},
4156
- # "two" => {"a"=>2, "b"=>4, "c"=>7},
4157
- # "three" => {"a"=>3, "b"=>5, "c"=>8}
4158
- # }
4159
- #
4160
- it { is_expected.to eq(
4161
- [
4162
- [
4163
- {"a"=>1, "b"=>3, "c"=>6},
4164
- {"a"=>2, "b"=>4, "c"=>7},
4165
- {"a"=>3, "b"=>5, "c"=>8}
4166
- ],
4167
- ["one", "two", "three"]
4168
- ]
4169
- )}
4170
- end
4171
-
4172
- context 'without index' do
4173
- let(:json) { df.to_json(true) }
4174
- it { is_expected.to eq(
4175
- [
4176
- {"a"=>1, "b"=>3, "c"=>6},
4177
- {"a"=>2, "b"=>4, "c"=>7},
4178
- {"a"=>3, "b"=>5, "c"=>8}
4179
- ]
4180
- )}
4181
- end
4182
- end
4183
-
4184
- context '#access_row_tuples_by_indexs' do
4185
- let(:df) {
4186
- DaruLite::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
4187
- let(:df_idx) {
4188
- DaruLite::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
4189
- }
4190
- let (:mi_idx) do
4191
- DaruLite::MultiIndex.from_tuples [
4192
- [:a,:one,:bar],
4193
- [:a,:one,:baz],
4194
- [:b,:two,:bar],
4195
- [:a,:two,:baz],
4196
- ]
4197
- end
4198
- let (:df_mi) do
4199
- DaruLite::DataFrame.new({
4200
- a: 1..4,
4201
- b: 'a'..'d'
4202
- }, index: mi_idx )
4203
- end
4204
- context 'when no index is given' do
4205
- it 'returns empty Array' do
4206
- expect(df.access_row_tuples_by_indexs()).to eq([])
4207
- end
4208
- end
4209
- context 'when index(s) are given' do
4210
- it 'returns Array of row tuples' do
4211
- expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
4212
- expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
4213
- end
4214
- end
4215
- context 'when custom index(s) are given' do
4216
- it 'returns Array of row tuples' do
4217
- expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
4218
- [[52, 1], [7, 3]]
4219
- )
4220
- end
4221
- end
4222
- context 'when multi index is given' do
4223
- it 'returns Array of row tuples' do
4224
- expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
4225
- [[1, "a"], [2, "b"], [4, "d"]]
4226
- )
4227
- expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
4228
- [[2, "b"]]
4229
- )
4230
- end
4231
- end
4232
- end
4233
-
4234
- context '#aggregate' do
4235
- let(:cat_idx) { DaruLite::CategoricalIndex.new [:a, :b, :a, :a, :c] }
4236
- let(:df) { DaruLite::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
4237
- let(:df_cat_idx) {
4238
- DaruLite::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx) }
4239
-
4240
- it 'lambda function on particular column' do
4241
- expect(df.aggregate(num_100_times: ->(df) { (df.num*100).first })).to eq(
4242
- DaruLite::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
4243
- )
4244
- end
4245
- it 'aggregate sum on particular column' do
4246
- expect(df_cat_idx.aggregate(num: :sum)).to eq(
4247
- DaruLite::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
4248
- )
4249
- end
4250
- end
4251
-
4252
- context '#group_by_and_aggregate' do
4253
- let(:spending_df) {
4254
- DaruLite::DataFrame.rows([
4255
- [2010, 'dev', 50, 1],
4256
- [2010, 'dev', 150, 1],
4257
- [2010, 'dev', 200, 1],
4258
- [2011, 'dev', 50, 1],
4259
- [2012, 'dev', 150, 1],
4260
-
4261
- [2011, 'office', 300, 1],
4262
-
4263
- [2010, 'market', 50, 1],
4264
- [2011, 'market', 500, 1],
4265
- [2012, 'market', 500, 1],
4266
- [2012, 'market', 300, 1],
4267
-
4268
- [2012, 'R&D', 10, 1],],
4269
- order: [:year, :category, :spending, :nb_spending])
4270
- }
4271
-
4272
- it 'works as group_by + aggregate' do
4273
- expect(spending_df.group_by_and_aggregate(:year, spending: :sum)).to eq(
4274
- spending_df.group_by(:year).aggregate(spending: :sum))
4275
- expect(spending_df.group_by_and_aggregate([:year, :category], spending: :sum, nb_spending: :size)).to eq(
4276
- spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :size))
4277
- end
4278
- end
4279
-
4280
- context '#create_sql' do
4281
- let(:df) { DaruLite::DataFrame.new({
4282
- a: [1,2,3],
4283
- b: ['test', 'me', 'please'],
4284
- c: ['2015-06-01', '2015-06-02', '2015-06-03']
4285
- },
4286
- name: 'test'
4287
- )}
4288
- subject { df.create_sql('foo') }
4289
- it { is_expected.to eq %Q{
4290
- |CREATE TABLE foo (a INTEGER,
4291
- | b VARCHAR (255),
4292
- | c DATE) CHARACTER SET=UTF8;
4293
- }.unindent}
4294
- end
4295
-
4296
1261
  context "#by_single_key" do
4297
1262
  let(:df) { DaruLite::DataFrame.new(a: [1, 2, 3], b: [4, 5, 6] ) }
4298
1263
 
@@ -4300,31 +1265,4 @@ describe DaruLite::DataFrame do
4300
1265
  expect { df[:c] }.to raise_error(IndexError, /Specified vector c does not exist/)
4301
1266
  end
4302
1267
  end
4303
-
4304
- context "#rotate_vectors" do
4305
- subject { df.rotate_vectors(-1) }
4306
-
4307
- context "several vectors in the dataframe" do
4308
- let(:df) do
4309
- DaruLite::DataFrame.new({
4310
- a: [1,2,3],
4311
- b: [4,5,6],
4312
- total: [5,7,9]
4313
- })
4314
- end
4315
- let(:new_order) { [:total, :a, :b] }
4316
-
4317
- it "return the dataframe with the position of the last vector change to first" do
4318
- expect(subject.vectors.to_a).to eq(new_order)
4319
- end
4320
- end
4321
-
4322
- context "only one vector in the dataframe" do
4323
- let(:df) { DaruLite::DataFrame.new({ a: [1,2,3] }) }
4324
-
4325
- it "return the dataframe without any change" do
4326
- expect(subject).to eq(df)
4327
- end
4328
- end
4329
- end
4330
1268
  end if mri?