daru_lite 0.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. data/.github/workflows/ci.yml +20 -0
  5. data/.rubocop_todo.yml +35 -33
  6. data/README.md +19 -115
  7. data/daru_lite.gemspec +1 -0
  8. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  9. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  10. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  11. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  12. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  13. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  14. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  15. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  16. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  17. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  18. data/lib/daru_lite/data_frame/missable.rb +75 -0
  19. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  20. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  21. data/lib/daru_lite/data_frame/setable.rb +109 -0
  22. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  23. data/lib/daru_lite/dataframe.rb +142 -2355
  24. data/lib/daru_lite/index/index.rb +13 -0
  25. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  26. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  27. data/lib/daru_lite/vector/calculatable.rb +78 -0
  28. data/lib/daru_lite/vector/convertible.rb +77 -0
  29. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  30. data/lib/daru_lite/vector/fetchable.rb +175 -0
  31. data/lib/daru_lite/vector/filterable.rb +128 -0
  32. data/lib/daru_lite/vector/indexable.rb +77 -0
  33. data/lib/daru_lite/vector/iterable.rb +95 -0
  34. data/lib/daru_lite/vector/joinable.rb +17 -0
  35. data/lib/daru_lite/vector/missable.rb +124 -0
  36. data/lib/daru_lite/vector/queryable.rb +45 -0
  37. data/lib/daru_lite/vector/setable.rb +47 -0
  38. data/lib/daru_lite/vector/sortable.rb +113 -0
  39. data/lib/daru_lite/vector.rb +36 -932
  40. data/lib/daru_lite/version.rb +1 -1
  41. data/spec/data_frame/aggregatable_example.rb +65 -0
  42. data/spec/data_frame/buildable_example.rb +109 -0
  43. data/spec/data_frame/calculatable_example.rb +135 -0
  44. data/spec/data_frame/convertible_example.rb +180 -0
  45. data/spec/data_frame/duplicatable_example.rb +111 -0
  46. data/spec/data_frame/fetchable_example.rb +476 -0
  47. data/spec/data_frame/filterable_example.rb +250 -0
  48. data/spec/data_frame/indexable_example.rb +221 -0
  49. data/spec/data_frame/iterable_example.rb +465 -0
  50. data/spec/data_frame/joinable_example.rb +106 -0
  51. data/spec/data_frame/missable_example.rb +47 -0
  52. data/spec/data_frame/pivotable_example.rb +297 -0
  53. data/spec/data_frame/queryable_example.rb +92 -0
  54. data/spec/data_frame/setable_example.rb +482 -0
  55. data/spec/data_frame/sortable_example.rb +350 -0
  56. data/spec/dataframe_spec.rb +181 -3243
  57. data/spec/index/index_spec.rb +8 -0
  58. data/spec/vector/aggregatable_example.rb +27 -0
  59. data/spec/vector/calculatable_example.rb +82 -0
  60. data/spec/vector/convertible_example.rb +126 -0
  61. data/spec/vector/duplicatable_example.rb +48 -0
  62. data/spec/vector/fetchable_example.rb +463 -0
  63. data/spec/vector/filterable_example.rb +165 -0
  64. data/spec/vector/indexable_example.rb +201 -0
  65. data/spec/vector/iterable_example.rb +111 -0
  66. data/spec/vector/joinable_example.rb +25 -0
  67. data/spec/vector/missable_example.rb +88 -0
  68. data/spec/vector/queryable_example.rb +91 -0
  69. data/spec/vector/setable_example.rb +300 -0
  70. data/spec/vector/sortable_example.rb +242 -0
  71. data/spec/vector_spec.rb +111 -1805
  72. metadata +102 -3
  73. data/.github/ISSUE_TEMPLATE.md +0 -18
@@ -1,9 +1,37 @@
1
+ require 'data_frame/aggregatable_example'
2
+ require 'data_frame/buildable_example'
3
+ require 'data_frame/calculatable_example'
4
+ require 'data_frame/convertible_example'
5
+ require 'data_frame/duplicatable_example'
6
+ require 'data_frame/fetchable_example'
7
+ require 'data_frame/filterable_example'
8
+ require 'data_frame/indexable_example'
9
+ require 'data_frame/iterable_example'
10
+ require 'data_frame/joinable_example'
11
+ require 'data_frame/missable_example'
12
+ require 'data_frame/pivotable_example'
13
+ require 'data_frame/queryable_example'
14
+ require 'data_frame/setable_example'
15
+ require 'data_frame/sortable_example'
16
+
1
17
  describe DaruLite::DataFrame do
2
- before :each do
3
- @data_frame = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
4
- c: [11,22,33,44,55]},
18
+ let(:df) do
19
+ DaruLite::DataFrame.new(
20
+ { b: [11,12,13,14,15], a: [1,2,3,4,5], c: [11,22,33,44,55] },
5
21
  order: [:a, :b, :c],
6
- index: [:one, :two, :three, :four, :five])
22
+ index: [:one, :two, :three, :four, :five]
23
+ )
24
+ end
25
+ let(:df_mi) do
26
+ DaruLite::DataFrame.new(
27
+ [vector_arry1, vector_arry2, vector_arry1, vector_arry2],
28
+ order: order_mi,
29
+ index: multi_index
30
+ )
31
+ end
32
+ let(:vector_arry1) { [11,12,13,14,11,12,13,14,11,12,13,14] }
33
+ let(:vector_arry2) { [1,2,3,4,1,2,3,4,1,2,3,4] }
34
+ let(:multi_index) do
7
35
  tuples = [
8
36
  [:a,:one,:bar],
9
37
  [:a,:one,:baz],
@@ -18,134 +46,36 @@ describe DaruLite::DataFrame do
18
46
  [:c,:two,:foo],
19
47
  [:c,:two,:bar]
20
48
  ]
21
- @multi_index = DaruLite::MultiIndex.from_tuples(tuples)
22
-
23
- @vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14]
24
- @vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4]
25
-
26
- @order_mi = DaruLite::MultiIndex.from_tuples([
27
- [:a,:one,:bar],
28
- [:a,:two,:baz],
29
- [:b,:two,:foo],
30
- [:b,:one,:foo]])
31
-
32
- @df_mi = DaruLite::DataFrame.new([
33
- @vector_arry1,
34
- @vector_arry2,
35
- @vector_arry1,
36
- @vector_arry2], order: @order_mi, index: @multi_index)
49
+ DaruLite::MultiIndex.from_tuples(tuples)
37
50
  end
38
-
39
- context ".rows" do
40
- before do
41
- @rows = [
42
- [1,2,3,4,5],
43
- [1,2,3,4,5],
44
- [1,2,3,4,5],
45
- [1,2,3,4,5]
51
+ let(:order_mi) do
52
+ DaruLite::MultiIndex.from_tuples(
53
+ [
54
+ [:a,:one,:bar],
55
+ [:a,:two,:baz],
56
+ [:b,:two,:foo],
57
+ [:b,:one,:foo]
46
58
  ]
47
- end
48
-
49
- context DaruLite::Index do
50
- it "creates a DataFrame from Array rows" do
51
- df = DaruLite::DataFrame.rows @rows, order: [:a,:b,:c,:d,:e]
52
-
53
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
54
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
55
- expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
56
- end
57
-
58
- it "creates empty dataframe" do
59
- df = DaruLite::DataFrame.rows [], order: [:a, :b, :c]
60
-
61
- expect(df.vectors).to eq(DaruLite::Index.new [:a,:b,:c])
62
- expect(df.index).to be_empty
63
- end
64
-
65
- it "creates a DataFrame from Vector rows" do
66
- rows = @rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
67
-
68
- df = DaruLite::DataFrame.rows rows, order: [:a,:b,:c,:d,:e]
69
-
70
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
71
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
72
- expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
73
- end
74
-
75
- it 'derives index & order from arrays' do
76
- df = DaruLite::DataFrame.rows @rows
77
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
78
- expect(df.vectors) .to eq(DaruLite::Index.new %w[0 1 2 3 4])
79
- end
80
-
81
- it 'derives index & order from vectors' do
82
- rows = @rows.zip(%w[w x y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
83
- df = DaruLite::DataFrame.rows rows
84
- expect(df.index) .to eq(DaruLite::Index.new %w[w x y z])
85
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
86
- end
87
-
88
- it 'behaves, when rows are repeated' do
89
- rows = @rows.zip(%w[w w y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
90
- df = DaruLite::DataFrame.rows rows
91
- expect(df.index) .to eq(DaruLite::Index.new %w[w_1 w_2 y z])
92
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
93
- end
94
-
95
- it 'behaves, when vectors are unnamed' do
96
- rows = @rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
97
- df = DaruLite::DataFrame.rows rows
98
- expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
99
- expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
100
- end
101
- end
102
-
103
- context DaruLite::MultiIndex do
104
- it "creates a DataFrame from rows" do
105
- df = DaruLite::DataFrame.rows(
106
- @rows*3, index: @multi_index, order: [:a,:b,:c,:d,:e])
107
-
108
- expect(df.index) .to eq(@multi_index)
109
- expect(df.vectors) .to eq(DaruLite::Index.new([:a,:b,:c,:d,:e]))
110
- expect(df[:a]).to eq(DaruLite::Vector.new([1]*12, index: @multi_index))
111
- end
112
-
113
- it "crates a DataFrame from rows (MultiIndex order)" do
114
- rows = [
115
- [11, 1, 11, 1],
116
- [12, 2, 12, 2],
117
- [13, 3, 13, 3],
118
- [14, 4, 14, 4]
119
- ]
120
- index = DaruLite::MultiIndex.from_tuples([
121
- [:one,:bar],
122
- [:one,:baz],
123
- [:two,:foo],
124
- [:two,:bar]
125
- ])
126
-
127
- df = DaruLite::DataFrame.rows(rows, index: index, order: @order_mi)
128
- expect(df.index) .to eq(index)
129
- expect(df.vectors).to eq(@order_mi)
130
- expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([11,12,13,14],
131
- index: index))
132
- end
133
-
134
- it "creates a DataFrame from Vector rows" do
135
- rows = @rows*3
136
- rows.map! { |r| DaruLite::Vector.new(r, index: @multi_index) }
137
-
138
- df = DaruLite::DataFrame.rows rows, order: @multi_index
139
-
140
- expect(df.index).to eq(DaruLite::Index.new(Array.new(rows.size) { |i| i }))
141
- expect(df.vectors).to eq(@multi_index)
142
- expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new([1]*12))
143
- end
144
- end
145
- end
59
+ )
60
+ end
61
+
62
+ it_behaves_like 'an aggregatable DataFrame'
63
+ it_behaves_like 'a buildable DataFrame'
64
+ it_behaves_like 'a calculatable DataFrame'
65
+ it_behaves_like 'a convertible DataFrame'
66
+ it_behaves_like 'a duplicatable DataFrame'
67
+ it_behaves_like 'a fetchable DataFrame'
68
+ it_behaves_like 'a filterable DataFrame'
69
+ it_behaves_like 'an indexable DataFrame'
70
+ it_behaves_like 'an iterable DataFrame'
71
+ it_behaves_like 'a joinable DataFrame'
72
+ it_behaves_like 'a missable DataFrame'
73
+ it_behaves_like 'a pivotable DataFrame'
74
+ it_behaves_like 'a queryable DataFrame'
75
+ it_behaves_like 'a setable DataFrame'
76
+ it_behaves_like 'a sortable DataFrame'
146
77
 
147
78
  context "#initialize" do
148
-
149
79
  it "initializes an empty DataFrame with no arguments" do
150
80
  df = DaruLite::DataFrame.new
151
81
  expect(df.nrows).to eq(0)
@@ -370,24 +300,24 @@ describe DaruLite::DataFrame do
370
300
 
371
301
  context DaruLite::MultiIndex do
372
302
  it "creates empty DataFrame" do
373
- df = DaruLite::DataFrame.new({}, order: @order_mi)
303
+ df = DaruLite::DataFrame.new({}, order: order_mi)
374
304
 
375
- expect(df.vectors).to eq(@order_mi)
305
+ expect(df.vectors).to eq(order_mi)
376
306
  expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([]))
377
307
  end
378
308
 
379
309
  it "creates from Hash" do
380
310
  df = DaruLite::DataFrame.new({
381
- [:a,:one,:bar] => @vector_arry1,
382
- [:a,:two,:baz] => @vector_arry2,
383
- [:b,:one,:foo] => @vector_arry1,
384
- [:b,:two,:foo] => @vector_arry2
385
- }, order: @order_mi, index: @multi_index)
386
-
387
- expect(df.index) .to eq(@multi_index)
388
- expect(df.vectors) .to eq(@order_mi)
389
- expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new(@vector_arry1,
390
- index: @multi_index))
311
+ [:a,:one,:bar] => vector_arry1,
312
+ [:a,:two,:baz] => vector_arry2,
313
+ [:b,:one,:foo] => vector_arry1,
314
+ [:b,:two,:foo] => vector_arry2
315
+ }, order: order_mi, index: multi_index)
316
+
317
+ expect(df.index) .to eq(multi_index)
318
+ expect(df.vectors) .to eq(order_mi)
319
+ expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new(vector_arry1,
320
+ index: multi_index))
391
321
  end
392
322
 
393
323
  it "creates from Array of Hashes" do
@@ -395,25 +325,25 @@ describe DaruLite::DataFrame do
395
325
  end
396
326
 
397
327
  it "creates from Array of Arrays" do
398
- df = DaruLite::DataFrame.new([@vector_arry1, @vector_arry2, @vector_arry1,
399
- @vector_arry2], index: @multi_index, order: @order_mi)
328
+ df = DaruLite::DataFrame.new([vector_arry1, vector_arry2, vector_arry1,
329
+ vector_arry2], index: multi_index, order: order_mi)
400
330
 
401
- expect(df.index) .to eq(@multi_index)
402
- expect(df.vectors).to eq(@order_mi)
403
- expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new(@vector_arry1,
404
- index: @multi_index))
331
+ expect(df.index) .to eq(multi_index)
332
+ expect(df.vectors).to eq(order_mi)
333
+ expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new(vector_arry1,
334
+ index: multi_index))
405
335
  end
406
336
 
407
337
  it "raises error for order MultiIndex of different size than supplied Array" do
408
338
  expect {
409
- df = DaruLite::DataFrame.new([@vector_arry1, @vector_arry2], order: @order_mi,
410
- index: @multi_index)
339
+ df = DaruLite::DataFrame.new([vector_arry1, vector_arry2], order: order_mi,
340
+ index: multi_index)
411
341
  }.to raise_error
412
342
  end
413
343
 
414
344
  it "aligns MultiIndexes properly" do
415
345
  pending
416
- mi_a = @order_mi
346
+ mi_a = order_mi
417
347
  mi_b = DaruLite::MultiIndex.from_tuples([
418
348
  [:b,:one,:foo],
419
349
  [:a,:one,:bar],
@@ -450,215 +380,6 @@ describe DaruLite::DataFrame do
450
380
  end
451
381
  end
452
382
 
453
- context "#[]" do
454
- context DaruLite::Index do
455
- before :each do
456
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
457
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
458
- index: [:one, :two, :three, :four, :five])
459
- end
460
-
461
- it "returns a Vector" do
462
- expect(@df[:a]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
463
- end
464
-
465
- it "returns a Vector by default" do
466
- expect(@df[:a]).to eq(DaruLite::Vector.new([1,2,3,4,5], name: :a,
467
- index: [:one, :two, :three, :four, :five]))
468
- end
469
-
470
- it "returns a DataFrame" do
471
- temp = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
472
- order: [:a, :b], index: [:one, :two, :three, :four, :five])
473
-
474
- expect(@df[:a, :b]).to eq(temp)
475
- end
476
-
477
- it "accesses vector with Integer index" do
478
- expect(@df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
479
- end
480
-
481
- it "returns a subset of DataFrame when specified range" do
482
- subset = @df[:b..:c]
483
- expect(subset).to eq(DaruLite::DataFrame.new({
484
- b: [11,12,13,14,15],
485
- c: [11,22,33,44,55]
486
- }, index: [:one, :two, :three, :four, :five]))
487
- end
488
-
489
- it 'accepts axis parameter as a last argument' do
490
- expect(@df[:a, :vector]).to eq @df[:a]
491
- expect(@df[:one, :row]).to eq [1, 11, 11].dv(:one, [:a, :b, :c])
492
- end
493
- end
494
-
495
- context DaruLite::MultiIndex do
496
- it "accesses vector with an integer index" do
497
- expect(@df_mi[0]).to eq(
498
- DaruLite::Vector.new(@vector_arry1, index: @multi_index))
499
- end
500
-
501
- it "returns a vector when specifying full tuple" do
502
- expect(@df_mi[:a, :one, :bar]).to eq(
503
- DaruLite::Vector.new(@vector_arry1, index: @multi_index))
504
- end
505
-
506
- it "returns DataFrame when specified first layer of MultiIndex" do
507
- sub_order = DaruLite::MultiIndex.from_tuples([
508
- [:one, :bar],
509
- [:two, :baz]
510
- ])
511
- expect(@df_mi[:a]).to eq(DaruLite::DataFrame.new([
512
- @vector_arry1,
513
- @vector_arry2
514
- ], index: @multi_index, order: sub_order))
515
- end
516
-
517
- it "returns a Vector if the last level of MultiIndex is tracked" do
518
- expect(@df_mi[:a, :one, :bar]).to eq(
519
- DaruLite::Vector.new(@vector_arry1, index: @multi_index))
520
- end
521
- end
522
- end
523
-
524
- context "#[]=" do
525
- context DaruLite::Index do
526
- before :each do
527
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
528
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
529
- index: [:one, :two, :three, :four, :five])
530
- end
531
-
532
- it "assigns directly with the []= operator" do
533
- @data_frame[:a] = [100,200,300,400,500]
534
- expect(@data_frame).to eq(DaruLite::DataFrame.new({
535
- b: [11,12,13,14,15],
536
- a: [100,200,300,400,500],
537
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
538
- index: [:one, :two, :three, :four, :five]))
539
- end
540
-
541
- it "assigns new vector with default length if given just a value" do
542
- @df[:d] = 1.0
543
- expect(@df[:d]).to eq(DaruLite::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
544
- index: [:one, :two, :three, :four, :five], name: :d))
545
- end
546
-
547
- it "updates vector with default length if given just a value" do
548
- @df[:c] = 1.0
549
- expect(@df[:c]).to eq(DaruLite::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
550
- index: [:one, :two, :three, :four, :five], name: :c))
551
- end
552
-
553
- it "appends an Array as a DaruLite::Vector" do
554
- @df[:d] = [69,99,108,85,49]
555
-
556
- expect(@df.d.class).to eq(DaruLite::Vector)
557
- end
558
-
559
- it "appends an arbitrary enumerable as a DaruLite::Vector" do
560
- @df[:d] = Set.new([69,99,108,85,49])
561
-
562
- expect(@df[:d]).to eq(DaruLite::Vector.new([69, 99, 108, 85, 49],
563
- index: [:one, :two, :three, :four, :five], name: :c))
564
- end
565
-
566
- it "replaces an already present vector" do
567
- @df[:a] = [69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five])
568
-
569
- expect(@df.a).to eq([69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five]))
570
- end
571
-
572
- it "appends a new vector to the DataFrame" do
573
- @df[:woo] = [69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five])
574
-
575
- expect(@df.vectors).to eq([:a, :b, :c, :woo].to_index)
576
- end
577
-
578
- it "creates an index for the new vector if not specified" do
579
- @df[:woo] = [69,99,108,85,49]
580
-
581
- expect(@df.woo.index).to eq([:one, :two, :three, :four, :five].to_index)
582
- end
583
-
584
- it "matches index of vector to be inserted with the DataFrame index" do
585
- @df[:shankar] = [69,99,108,85,49].dv(:shankar, [:two, :one, :three, :five, :four])
586
-
587
- expect(@df.shankar).to eq([99,69,108,49,85].dv(:shankar,
588
- [:one, :two, :three, :four, :five]))
589
- end
590
-
591
- it "matches index of vector to be inserted, inserting nils where no match found" do
592
- @df[:shankar] = [1,2,3].dv(:shankar, [:one, :james, :hetfield])
593
-
594
- expect(@df.shankar).to eq([1,nil,nil,nil,nil].dv(:shankar, [:one, :two, :three, :four, :five]))
595
- end
596
-
597
- it "raises error for Array assignment of wrong length" do
598
- expect{
599
- @df[:shiva] = [1,2,3]
600
- }.to raise_error
601
- end
602
-
603
- it "assigns correct name given empty dataframe" do
604
- df_empty = DaruLite::DataFrame.new({})
605
- df_empty[:a] = 1..5
606
- df_empty[:b] = 1..5
607
-
608
- expect(df_empty[:a].name).to equal(:a)
609
- expect(df_empty[:b].name).to equal(:b)
610
- end
611
-
612
- it "appends multiple vectors at a time" do
613
- # TODO
614
- end
615
- end
616
-
617
- context DaruLite::MultiIndex do
618
- it "raises error when incomplete index specified but index is absent" do
619
- expect {
620
- @df_mi[:d] = [100,200,300,400,100,200,300,400,100,200,300,400]
621
- }.to raise_error
622
- end
623
-
624
- it "assigns all sub-indexes when a top level index is specified" do
625
- @df_mi[:a] = [100,200,300,400,100,200,300,400,100,200,300,400]
626
-
627
- expect(@df_mi).to eq(DaruLite::DataFrame.new([
628
- [100,200,300,400,100,200,300,400,100,200,300,400],
629
- [100,200,300,400,100,200,300,400,100,200,300,400],
630
- @vector_arry1,
631
- @vector_arry2], index: @multi_index, order: @order_mi))
632
- end
633
-
634
- it "creates a new vector when full index specfied" do
635
- order = DaruLite::MultiIndex.from_tuples([
636
- [:a,:one,:bar],
637
- [:a,:two,:baz],
638
- [:b,:two,:foo],
639
- [:b,:one,:foo],
640
- [:c,:one,:bar]])
641
- answer = DaruLite::DataFrame.new([
642
- @vector_arry1,
643
- @vector_arry2,
644
- @vector_arry1,
645
- @vector_arry2,
646
- [100,200,300,400,100,200,300,400,100,200,300,400]
647
- ], index: @multi_index, order: order)
648
- @df_mi[:c,:one,:bar] = [100,200,300,400,100,200,300,400,100,200,300,400]
649
-
650
- expect(@df_mi).to eq(answer)
651
- end
652
-
653
- it "assigns correct name given empty dataframe" do
654
- df_empty = DaruLite::DataFrame.new([], index: @multi_index, order: @order_mi)
655
- df_empty[:c, :one, :bar] = 1..12
656
-
657
- expect(df_empty[:c, :one, :bar].name).to eq "conebar"
658
- end
659
- end
660
- end
661
-
662
383
  context '#method_missing' do
663
384
  let(:df) { DaruLite::DataFrame.new({
664
385
  :a => [1, 2, 3, 4, 5],
@@ -720,197 +441,6 @@ describe DaruLite::DataFrame do
720
441
  end
721
442
  end
722
443
 
723
- context '#add_vector' do
724
- subject(:data_frame) {
725
- DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
726
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
727
- index: [:one, :two, :three, :four, :five])
728
- }
729
- before {
730
- data_frame.add_vector :a, [100,200,300,400,500]
731
- }
732
-
733
- it { is_expected.to eq(DaruLite::DataFrame.new({
734
- b: [11,12,13,14,15],
735
- a: [100,200,300,400,500],
736
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
737
- index: [:one, :two, :three, :four, :five]))
738
- }
739
- end
740
-
741
- context "#insert_vector" do
742
- subject(:data_frame) {
743
- DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
744
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
745
- index: [:one, :two, :three, :four, :five])
746
- }
747
-
748
- it "insert a new vector at the desired slot" do
749
- df = DaruLite::DataFrame.new({
750
- a: [1,2,3,4,5],
751
- d: [710, 720, 730, 740, 750],
752
- b: [11, 12, 13, 14, 15],
753
- c: [11,22,33,44,55]}, order: [:a, :d, :b, :c],
754
- index: [:one, :two, :three, :four, :five]
755
- )
756
- data_frame.insert_vector 1, :d, [710, 720, 730, 740, 750]
757
- expect(subject).to eq df
758
- end
759
-
760
- it "raises error for data array being too big" do
761
- expect {
762
- source = (1..8).to_a
763
- data_frame.insert_vector 1, :d, source
764
- }.to raise_error(IndexError)
765
- end
766
-
767
- it "raises error for invalid index value" do
768
- expect {
769
- source = (1..5).to_a
770
- data_frame.insert_vector 4, :d, source
771
- }.to raise_error(ArgumentError)
772
- end
773
-
774
- it "raises error for invalid source type" do
775
- expect {
776
- source = 14
777
- data_frame.insert_vector 3, :d, source
778
- }.to raise_error(ArgumentError)
779
- end
780
- end
781
-
782
- context "#row[]=" do
783
- context DaruLite::Index do
784
- before :each do
785
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
786
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
787
- index: [:one, :two, :three, :four, :five])
788
- end
789
-
790
- it "assigns specified row when Array" do
791
- @df.row[:one] = [49, 99, 59]
792
-
793
- expect(@df.row[:one]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
794
- expect(@df.row[:one].index).to eq([:a, :b, :c].to_index)
795
- expect(@df.row[:one].name) .to eq(:one)
796
- end
797
-
798
- it "assigns specified row when DV" do
799
- @df.row[:one] = [49, 99, 59].dv(nil, [:a, :b, :c])
800
-
801
- expect(@df.row[:one]).to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
802
- end
803
-
804
- it "assigns correct elements when Vector of different index" do
805
- @df.row[:one] = DaruLite::Vector.new([44,62,11], index: [:b,:f,:a])
806
-
807
- expect(@df.row[:one]).to eq(DaruLite::Vector.new([11,44,nil], index: [:a,:b,:c]))
808
- end
809
-
810
- it "creates a new row from an Array" do
811
- @df.row[:patekar] = [9,2,11]
812
-
813
- expect(@df.row[:patekar]).to eq([9,2,11].dv(:patekar, [:a, :b, :c]))
814
- end
815
-
816
- it "creates a new row from a DV" do
817
- @df.row[:patekar] = [9,2,11].dv(nil, [:a, :b, :c])
818
-
819
- expect(@df.row[:patekar]).to eq([9,2,11].dv(:patekar, [:a, :b, :c]))
820
- end
821
-
822
- it "creates a new row from numeric row index and named DV" do
823
- @df.row[2] = [9,2,11].dv(nil, [:a, :b, :c])
824
-
825
- expect(@df.row[2]).to eq([9,2,11].dv(nil, [:a, :b, :c]))
826
- end
827
-
828
- it "correctly aligns assigned DV by index" do
829
- @df.row[:two] = [9,2,11].dv(nil, [:b, :a, :c])
830
-
831
- expect(@df.row[:two]).to eq([2,9,11].dv(:two, [:a, :b, :c]))
832
- end
833
-
834
- it "correctlu aligns assinged DV by index for new rows" do
835
- @df.row[:latest] = DaruLite::Vector.new([2,3,1], index: [:b,:c,:a])
836
-
837
- expect(@df.row[:latest]).to eq(DaruLite::Vector.new([1,2,3], index: [:a,:b,:c]))
838
- end
839
-
840
- it "inserts nils for indexes that dont exist in the DataFrame" do
841
- @df.row[:two] = [49, 99, 59].dv(nil, [:oo, :aah, :gaah])
842
-
843
- expect(@df.row[:two]).to eq([nil,nil,nil].dv(nil, [:a, :b, :c]))
844
- end
845
-
846
- it "correctly inserts row of a different length by matching indexes" do
847
- @df.row[:four] = [5,4,3,2,1,3].dv(nil, [:you, :have, :a, :big, :appetite, :spock])
848
-
849
- expect(@df.row[:four]).to eq([3,nil,nil].dv(:four, [:a, :b, :c]))
850
- end
851
-
852
- it "raises error for row insertion by Array of wrong length" do
853
- expect{
854
- @df.row[:one] = [1,2,3,4,5,6,7]
855
- }.to raise_error
856
- end
857
- end
858
-
859
- context DaruLite::MultiIndex do
860
- pending
861
- # TO DO
862
- end
863
-
864
- context DaruLite::CategoricalIndex do
865
- let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
866
- let(:df) do
867
- DaruLite::DataFrame.new({
868
- a: 'a'..'e',
869
- b: 1..5
870
- }, index: idx)
871
- end
872
-
873
- context "modify exiting row" do
874
- context "single category" do
875
- subject { df }
876
- before { df.row[:a] = ['x', 'y'] }
877
-
878
- it { is_expected.to be_a DaruLite::DataFrame }
879
- its(:index) { is_expected.to eq idx }
880
- its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
881
- its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
882
- its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
883
- end
884
-
885
- context "multiple categories" do
886
- subject { df }
887
- before { df.row[:a, 1] = ['x', 'y'] }
888
-
889
- it { is_expected.to be_a DaruLite::DataFrame }
890
- its(:index) { is_expected.to eq idx }
891
- its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
892
- its(:'a.to_a') { is_expected.to eq ['x', 'x', 'x', 'x', 'e'] }
893
- its(:'b.to_a') { is_expected.to eq ['y', 'y', 'y', 'y', 5] }
894
- end
895
-
896
- context "positional index" do
897
- subject { df }
898
- before { df.row[0, 2] = ['x', 'y'] }
899
-
900
- it { is_expected.to be_a DaruLite::DataFrame }
901
- its(:index) { is_expected.to eq idx }
902
- its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
903
- its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
904
- its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
905
- end
906
- end
907
-
908
- context "add new row" do
909
- # TODO
910
- end
911
- end
912
- end
913
-
914
444
  context "#row.at" do
915
445
  context DaruLite::Index do
916
446
  let(:idx) { DaruLite::Index.new [1, 0, :c] }
@@ -1139,311 +669,21 @@ describe DaruLite::DataFrame do
1139
669
  end
1140
670
  end
1141
671
 
1142
- context "#row.set_at" do
1143
- let(:df) do
1144
- DaruLite::DataFrame.new({
1145
- a: 1..3,
1146
- b: 'a'..'c'
1147
- })
1148
- end
672
+ context "#row[]" do
673
+ context DaruLite::Index do
674
+ before :each do
675
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
676
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
677
+ index: [:one, :two, :three, :four, :five])
678
+ end
1149
679
 
1150
- context "single position" do
1151
- subject { df }
1152
- before { df.row.set_at [1], ['x', 'y'] }
680
+ it "creates an index for assignment if not already specified" do
681
+ @df.row[:one] = [49, 99, 59]
1153
682
 
1154
- its(:size) { is_expected.to eq 3 }
1155
- its(:'a.to_a') { is_expected.to eq [1, 'x', 3] }
1156
- its(:'b.to_a') { is_expected.to eq ['a', 'y', 'c'] }
1157
- end
1158
-
1159
- context "multiple position" do
1160
- subject { df }
1161
- before { df.row.set_at [0, 2], ['x', 'y'] }
1162
-
1163
- its(:size) { is_expected.to eq 3 }
1164
- its(:'a.to_a') { is_expected.to eq ['x', 2, 'x'] }
1165
- its(:'b.to_a') { is_expected.to eq ['y', 'b', 'y'] }
1166
- end
1167
-
1168
- context "invalid position" do
1169
- it { expect { df.row.set_at [3], ['x', 'y'] }.to raise_error IndexError }
1170
- end
1171
-
1172
- context "invalid positions" do
1173
- it { expect { df.row.set_at [2, 3], ['x', 'y'] }.to raise_error IndexError }
1174
- end
1175
-
1176
- context "incorrect size" do
1177
- it { expect { df.row.set_at [1], ['x', 'y', 'z'] }.to raise_error SizeError }
1178
- end
1179
- end
1180
-
1181
- context "#at" do
1182
- context DaruLite::Index do
1183
- let(:idx) { DaruLite::Index.new [:a, :b, :c] }
1184
- let(:df) do
1185
- DaruLite::DataFrame.new({
1186
- 1 => 1..3,
1187
- a: 'a'..'c',
1188
- b: 11..13
1189
- }, index: idx)
1190
- end
1191
-
1192
- context "single position" do
1193
- subject { df.at 1 }
1194
-
1195
- it { is_expected.to be_a DaruLite::Vector }
1196
- its(:size) { is_expected.to eq 3 }
1197
- its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1198
- its(:index) { is_expected.to eq idx }
1199
- end
1200
-
1201
- context "multiple positions" do
1202
- subject { df.at 0, 2 }
1203
-
1204
- it { is_expected.to be_a DaruLite::DataFrame }
1205
- its(:shape) { is_expected.to eq [3, 2] }
1206
- its(:index) { is_expected.to eq idx }
1207
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1208
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1209
- end
1210
-
1211
- context "single invalid position" do
1212
- it { expect { df. at 3 }.to raise_error IndexError }
1213
- end
1214
-
1215
- context "multiple invalid positions" do
1216
- it { expect { df.at 2, 3 }.to raise_error IndexError }
1217
- end
1218
-
1219
- context "range" do
1220
- subject { df.at 0..1 }
1221
-
1222
- it { is_expected.to be_a DaruLite::DataFrame }
1223
- its(:shape) { is_expected.to eq [3, 2] }
1224
- its(:index) { is_expected.to eq idx }
1225
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1226
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1227
- end
1228
-
1229
- context "range with negative end" do
1230
- subject { df.at 0..-2 }
1231
-
1232
- it { is_expected.to be_a DaruLite::DataFrame }
1233
- its(:shape) { is_expected.to eq [3, 2] }
1234
- its(:index) { is_expected.to eq idx }
1235
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1236
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1237
- end
1238
-
1239
- context "range with single element" do
1240
- subject { df.at 1..1 }
1241
-
1242
- it { is_expected.to be_a DaruLite::DataFrame }
1243
- its(:shape) { is_expected.to eq [3, 1] }
1244
- its(:index) { is_expected.to eq idx }
1245
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1246
- end
1247
- end
1248
-
1249
- context DaruLite::MultiIndex do
1250
- let (:idx) do
1251
- DaruLite::MultiIndex.from_tuples [
1252
- [:a,:one,:bar],
1253
- [:a,:one,:baz],
1254
- [:b,:two,:bar],
1255
- ]
1256
- end
1257
- let(:df) do
1258
- DaruLite::DataFrame.new({
1259
- 1 => 1..3,
1260
- a: 'a'..'c',
1261
- b: 11..13
1262
- }, index: idx)
1263
- end
1264
-
1265
- context "single position" do
1266
- subject { df.at 1 }
1267
-
1268
- it { is_expected.to be_a DaruLite::Vector }
1269
- its(:size) { is_expected.to eq 3 }
1270
- its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1271
- its(:index) { is_expected.to eq idx }
1272
- end
1273
-
1274
- context "multiple positions" do
1275
- subject { df.at 0, 2 }
1276
-
1277
- it { is_expected.to be_a DaruLite::DataFrame }
1278
- its(:shape) { is_expected.to eq [3, 2] }
1279
- its(:index) { is_expected.to eq idx }
1280
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1281
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1282
- end
1283
-
1284
- context "single invalid position" do
1285
- it { expect { df. at 3 }.to raise_error IndexError }
1286
- end
1287
-
1288
- context "multiple invalid positions" do
1289
- it { expect { df.at 2, 3 }.to raise_error IndexError }
1290
- end
1291
-
1292
- context "range" do
1293
- subject { df.at 0..1 }
1294
-
1295
- it { is_expected.to be_a DaruLite::DataFrame }
1296
- its(:shape) { is_expected.to eq [3, 2] }
1297
- its(:index) { is_expected.to eq idx }
1298
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1299
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1300
- end
1301
-
1302
- context "range with negative end" do
1303
- subject { df.at 0..-2 }
1304
-
1305
- it { is_expected.to be_a DaruLite::DataFrame }
1306
- its(:shape) { is_expected.to eq [3, 2] }
1307
- its(:index) { is_expected.to eq idx }
1308
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1309
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1310
- end
1311
-
1312
- context "range with single element" do
1313
- subject { df.at 1..1 }
1314
-
1315
- it { is_expected.to be_a DaruLite::DataFrame }
1316
- its(:shape) { is_expected.to eq [3, 1] }
1317
- its(:index) { is_expected.to eq idx }
1318
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1319
- end
1320
- end
1321
-
1322
- context DaruLite::CategoricalIndex do
1323
- let (:idx) { DaruLite::CategoricalIndex.new [:a, 1, 1] }
1324
- let(:df) do
1325
- DaruLite::DataFrame.new({
1326
- 1 => 1..3,
1327
- a: 'a'..'c',
1328
- b: 11..13
1329
- }, index: idx)
1330
- end
1331
-
1332
- context "single position" do
1333
- subject { df.at 1 }
1334
-
1335
- it { is_expected.to be_a DaruLite::Vector }
1336
- its(:size) { is_expected.to eq 3 }
1337
- its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1338
- its(:index) { is_expected.to eq idx }
1339
- end
1340
-
1341
- context "multiple positions" do
1342
- subject { df.at 0, 2 }
1343
-
1344
- it { is_expected.to be_a DaruLite::DataFrame }
1345
- its(:shape) { is_expected.to eq [3, 2] }
1346
- its(:index) { is_expected.to eq idx }
1347
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1348
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1349
- end
1350
-
1351
- context "single invalid position" do
1352
- it { expect { df. at 3 }.to raise_error IndexError }
1353
- end
1354
-
1355
- context "multiple invalid positions" do
1356
- it { expect { df.at 2, 3 }.to raise_error IndexError }
1357
- end
1358
-
1359
- context "range" do
1360
- subject { df.at 0..1 }
1361
-
1362
- it { is_expected.to be_a DaruLite::DataFrame }
1363
- its(:shape) { is_expected.to eq [3, 2] }
1364
- its(:index) { is_expected.to eq idx }
1365
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1366
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1367
- end
1368
-
1369
- context "range with negative index" do
1370
- subject { df.at 0..-2 }
1371
-
1372
- it { is_expected.to be_a DaruLite::DataFrame }
1373
- its(:shape) { is_expected.to eq [3, 2] }
1374
- its(:index) { is_expected.to eq idx }
1375
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1376
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1377
- end
1378
-
1379
- context "range with single element" do
1380
- subject { df.at 1..1 }
1381
-
1382
- it { is_expected.to be_a DaruLite::DataFrame }
1383
- its(:shape) { is_expected.to eq [3, 1] }
1384
- its(:index) { is_expected.to eq idx }
1385
- its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1386
- end
1387
- end
1388
- end
1389
-
1390
- context "#set_at" do
1391
- let(:df) do
1392
- DaruLite::DataFrame.new({
1393
- 1 => 1..3,
1394
- a: 'a'..'c',
1395
- b: 11..13
1396
- })
1397
- end
1398
-
1399
- context "single position" do
1400
- subject { df }
1401
- before { df.set_at [1], ['x', 'y', 'z'] }
1402
-
1403
- its(:shape) { is_expected.to eq [3, 3] }
1404
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1405
- its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1406
- its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1407
- end
1408
-
1409
- context "multiple position" do
1410
- subject { df }
1411
- before { df.set_at [1, 2], ['x', 'y', 'z'] }
1412
-
1413
- its(:shape) { is_expected.to eq [3, 3] }
1414
- it { expect(df[1].to_a).to eq [1, 2, 3] }
1415
- its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1416
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1417
- end
1418
-
1419
- context "invalid position" do
1420
- it { expect { df.set_at [3], ['x', 'y', 'z'] }.to raise_error IndexError }
1421
- end
1422
-
1423
- context "invalid positions" do
1424
- it { expect { df.set_at [2, 3], ['x', 'y', 'z'] }.to raise_error IndexError }
1425
- end
1426
-
1427
- context "incorrect size" do
1428
- it { expect { df.set_at [1], ['x', 'y'] }.to raise_error SizeError }
1429
- end
1430
- end
1431
-
1432
- context "#row[]" do
1433
- context DaruLite::Index do
1434
- before :each do
1435
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1436
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
1437
- index: [:one, :two, :three, :four, :five])
1438
- end
1439
-
1440
- it "creates an index for assignment if not already specified" do
1441
- @df.row[:one] = [49, 99, 59]
1442
-
1443
- expect(@df[:one, :row]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
1444
- expect(@df[:one, :row].index).to eq([:a, :b, :c].to_index)
1445
- expect(@df[:one, :row].name) .to eq(:one)
1446
- end
683
+ expect(@df[:one, :row]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
684
+ expect(@df[:one, :row].index).to eq([:a, :b, :c].to_index)
685
+ expect(@df[:one, :row].name) .to eq(:one)
686
+ end
1447
687
 
1448
688
  it "returns a DataFrame when specifying numeric Range" do
1449
689
  expect(@df.row[0..2]).to eq(
@@ -1488,7 +728,7 @@ describe DaruLite::DataFrame do
1488
728
 
1489
729
  context DaruLite::MultiIndex do
1490
730
  it "returns a Vector when specifying integer index" do
1491
- expect(@df_mi.row[0]).to eq(DaruLite::Vector.new([11,1,11,1], index: @order_mi))
731
+ expect(df_mi.row[0]).to eq(DaruLite::Vector.new([11,1,11,1], index: order_mi))
1492
732
  end
1493
733
 
1494
734
  it "returns a DataFrame whecn specifying numeric range" do
@@ -1497,16 +737,16 @@ describe DaruLite::DataFrame do
1497
737
  [:a,:one,:baz]
1498
738
  ])
1499
739
 
1500
- expect(@df_mi.row[0..1]).to eq(DaruLite::DataFrame.new([
740
+ expect(df_mi.row[0..1]).to eq(DaruLite::DataFrame.new([
1501
741
  [11,12],
1502
742
  [1,2],
1503
743
  [11,12],
1504
744
  [1,2]
1505
- ], order: @order_mi, index: sub_index, name: :numeric_range))
745
+ ], order: order_mi, index: sub_index, name: :numeric_range))
1506
746
  end
1507
747
 
1508
748
  it "returns a Vector when specifying complete tuple" do
1509
- expect(@df_mi.row[:c,:two,:foo]).to eq(DaruLite::Vector.new([13,3,13,3], index: @order_mi))
749
+ expect(df_mi.row[:c,:two,:foo]).to eq(DaruLite::Vector.new([13,3,13,3], index: order_mi))
1510
750
  end
1511
751
 
1512
752
  it "returns DataFrame when specifying first layer of MultiIndex" do
@@ -1516,12 +756,12 @@ describe DaruLite::DataFrame do
1516
756
  [:two,:foo],
1517
757
  [:two,:bar]
1518
758
  ])
1519
- expect(@df_mi.row[:c]).to eq(DaruLite::DataFrame.new([
759
+ expect(df_mi.row[:c]).to eq(DaruLite::DataFrame.new([
1520
760
  [11,12,13,14],
1521
761
  [1,2,3,4],
1522
762
  [11,12,13,14],
1523
763
  [1,2,3,4]
1524
- ], index: sub_index, order: @order_mi))
764
+ ], index: sub_index, order: order_mi))
1525
765
  end
1526
766
 
1527
767
  it "returns DataFrame when specifying first and second layer of MultiIndex" do
@@ -1529,12 +769,12 @@ describe DaruLite::DataFrame do
1529
769
  [:bar],
1530
770
  [:baz]
1531
771
  ])
1532
- expect(@df_mi.row[:c,:one]).to eq(DaruLite::DataFrame.new([
772
+ expect(df_mi.row[:c,:one]).to eq(DaruLite::DataFrame.new([
1533
773
  [11,12],
1534
774
  [1,2],
1535
775
  [11,12],
1536
776
  [1,2]
1537
- ], index: sub_index, order: @order_mi))
777
+ ], index: sub_index, order: order_mi))
1538
778
  end
1539
779
  end
1540
780
 
@@ -1596,107 +836,6 @@ describe DaruLite::DataFrame do
1596
836
  end
1597
837
  end
1598
838
 
1599
- context "#add_row" do
1600
- subject(:data_frame) {
1601
- DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1602
- c: [11,22,33,44,55]}, order: [:a, :b, :c],
1603
- index: [:one, :two, :three, :four, :five])
1604
- }
1605
- context 'named' do
1606
- before {
1607
- data_frame.add_row [100,200,300], :six
1608
- }
1609
-
1610
- it { is_expected.to eq(DaruLite::DataFrame.new({
1611
- a: [1,2,3,4,5,100],
1612
- b: [11,12,13,14,15,200],
1613
- c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1614
- index: [:one, :two, :three, :four, :five, :six]))
1615
- }
1616
- end
1617
-
1618
- context 'unnamed' do
1619
- before {
1620
- data_frame.add_row [100,200,300]
1621
- }
1622
-
1623
- it { is_expected.to eq(DaruLite::DataFrame.new({
1624
- a: [1,2,3,4,5,100],
1625
- b: [11,12,13,14,15,200],
1626
- c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1627
- index: [:one, :two, :three, :four, :five, 5]))
1628
- }
1629
- end
1630
-
1631
- context 'with mulitiindex DF' do
1632
- subject(:data_frame) {
1633
- DaruLite::DataFrame.new({b: [11,12,13], a: [1,2,3],
1634
- c: [11,22,33]}, order: [:a, :b, :c],
1635
- index: DaruLite::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four]]))
1636
- }
1637
-
1638
- before { data_frame.add_row [100,200,300], [:two, :five] }
1639
-
1640
- it { is_expected.to eq(DaruLite::DataFrame.new({
1641
- b: [11,12,13,200], a: [1,2,3,100],
1642
- c: [11,22,33,300]}, order: [:a, :b, :c],
1643
- index: DaruLite::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four], [:two, :five]])))
1644
- }
1645
- end
1646
-
1647
- it "allows adding rows after making empty DF by specfying only order" do
1648
- df = DaruLite::DataFrame.new({}, order: [:a, :b, :c])
1649
- df.add_row [1,2,3]
1650
- df.add_row [5,6,7]
1651
-
1652
- expect(df[:a]).to eq(DaruLite::Vector.new([1,5]))
1653
- expect(df[:b]).to eq(DaruLite::Vector.new([2,6]))
1654
- expect(df[:c]).to eq(DaruLite::Vector.new([3,7]))
1655
- expect(df.index).to eq(DaruLite::Index.new([0,1]))
1656
- end
1657
- end
1658
-
1659
- context "#first" do
1660
- it 'works' do
1661
- expect(@data_frame.first(2)).to eq(
1662
- DaruLite::DataFrame.new({b: [11,12], a: [1,2], c: [11,22]},
1663
- order: [:a, :b, :c],
1664
- index: [:one, :two]))
1665
- end
1666
-
1667
- it 'works with too large values' do
1668
- expect(@data_frame.first(200)).to eq(@data_frame)
1669
- end
1670
-
1671
- it 'has synonym' do
1672
- expect(@data_frame.first(2)).to eq(@data_frame.head(2))
1673
- end
1674
-
1675
- it 'works on DateTime indexes' do
1676
- idx = DaruLite::DateTimeIndex.new(['2017-01-01', '2017-02-01', '2017-03-01'])
1677
- df = DaruLite::DataFrame.new({col1: ['a', 'b', 'c']}, index: idx)
1678
- first = DaruLite::DataFrame.new({col1: ['a']}, index: DaruLite::DateTimeIndex.new(['2017-01-01']))
1679
- expect(df.head(1)).to eq(first)
1680
- end
1681
- end
1682
-
1683
- context "#last" do
1684
- it 'works' do
1685
- expect(@data_frame.last(2)).to eq(
1686
- DaruLite::DataFrame.new({b: [14,15], a: [4,5], c: [44,55]},
1687
- order: [:a, :b, :c],
1688
- index: [:four, :five]))
1689
- end
1690
-
1691
- it 'works with too large values' do
1692
- expect(@data_frame.last(200)).to eq(@data_frame)
1693
- end
1694
-
1695
- it 'has synonym' do
1696
- expect(@data_frame.last(2)).to eq(@data_frame.tail(2))
1697
- end
1698
- end
1699
-
1700
839
  context "#==" do
1701
840
  it "compares by vectors, index and values of a DataFrame (ignores name)" do
1702
841
  a = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
@@ -1710,1083 +849,81 @@ describe DaruLite::DataFrame do
1710
849
  end
1711
850
 
1712
851
  context '#rename' do
1713
- subject { @data_frame.rename 'other' }
852
+ subject { df.rename 'other' }
1714
853
 
1715
854
  it { is_expected.to be_a DaruLite::DataFrame }
1716
855
  its(:name) { is_expected.to eq 'other' }
1717
856
  end
1718
857
 
1719
- context "#dup" do
858
+ context "#delete_vector" do
1720
859
  context DaruLite::Index do
1721
- it "dups every data structure inside DataFrame" do
1722
- clo = @data_frame.dup
1723
-
1724
- expect(clo.object_id) .not_to eq(@data_frame.object_id)
1725
- expect(clo.vectors.object_id).not_to eq(@data_frame.vectors.object_id)
1726
- expect(clo.index.object_id) .not_to eq(@data_frame.index.object_id)
860
+ it "deletes the specified vector" do
861
+ df.delete_vector :a
1727
862
 
1728
- @data_frame.each_vector_with_index do |vector, index|
1729
- expect(vector.object_id).not_to eq(clo[index].object_id)
1730
- expect(vector.to_a.object_id).not_to eq(clo[index].to_a.object_id)
1731
- end
863
+ expect(df).to eq(DaruLite::DataFrame.new({b: [11,12,13,14,15],
864
+ c: [11,22,33,44,55]}, order: [:b, :c],
865
+ index: [:one, :two, :three, :four, :five]))
1732
866
  end
1733
867
  end
868
+ end
1734
869
 
1735
- context DaruLite::MultiIndex do
1736
- it "duplicates with multi index" do
1737
- clo = @df_mi.dup
870
+ context "#delete_vectors" do
871
+ context DaruLite::Index do
872
+ it "deletes the specified vectors" do
873
+ df.delete_vectors :a, :b
1738
874
 
1739
- expect(clo) .to eq(@df_mi)
1740
- expect(clo.vectors.object_id).not_to eq(@df_mi.vectors.object_id)
1741
- expect(clo.index.object_id) .not_to eq(@df_mi.index.object_id)
875
+ expect(df).to eq(DaruLite::DataFrame.new({
876
+ c: [11,22,33,44,55]}, order: [:c],
877
+ index: [:one, :two, :three, :four, :five]))
1742
878
  end
1743
879
  end
1744
880
  end
1745
881
 
1746
- context '#reject_values' do
1747
- let(:df) do
1748
- DaruLite::DataFrame.new({
1749
- a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1750
- b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1751
- c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1752
- }, index: 11..18)
1753
- end
1754
- before { df.to_category :b }
1755
-
1756
- context 'remove nils only' do
1757
- subject { df.reject_values nil }
1758
- it { is_expected.to be_a DaruLite::DataFrame }
1759
- its(:'b.type') { is_expected.to eq :category }
1760
- its(:'a.to_a') { is_expected.to eq [1, 2, 7] }
1761
- its(:'b.to_a') { is_expected.to eq [:a, :b, 8] }
1762
- its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 7] }
1763
- its(:'index.to_a') { is_expected.to eq [11, 12, 18] }
1764
- end
1765
-
1766
- context 'remove Float::NAN only' do
1767
- subject { df.reject_values Float::NAN }
1768
- it { is_expected.to be_a DaruLite::DataFrame }
1769
- its(:'b.type') { is_expected.to eq :category }
1770
- its(:'a.to_a') { is_expected.to eq [1, 3, nil, 1, 7] }
1771
- its(:'b.to_a') { is_expected.to eq [:a, nil, 3, 5, 8] }
1772
- its(:'c.to_a') { is_expected.to eq ['a', 3, 5, nil, 7] }
1773
- its(:'index.to_a') { is_expected.to eq [11, 13, 16, 17, 18] }
1774
- end
882
+ context "#delete_row" do
883
+ it "deletes the specified row" do
884
+ df.delete_row :three
1775
885
 
1776
- context 'remove both nil and Float::NAN' do
1777
- subject { df.reject_values nil, Float::NAN }
1778
- it { is_expected.to be_a DaruLite::DataFrame }
1779
- its(:'b.type') { is_expected.to eq :category }
1780
- its(:'a.to_a') { is_expected.to eq [1, 7] }
1781
- its(:'b.to_a') { is_expected.to eq [:a, 8] }
1782
- its(:'c.to_a') { is_expected.to eq ['a', 7] }
1783
- its(:'index.to_a') { is_expected.to eq [11, 18] }
886
+ expect(df).to eq(DaruLite::DataFrame.new({b: [11,12,14,15], a: [1,2,4,5],
887
+ c: [11,22,44,55]}, order: [:a, :b, :c], index: [:one, :two, :four, :five]))
1784
888
  end
889
+ end
1785
890
 
1786
- context 'any other values' do
1787
- subject { df.reject_values 1, 5 }
1788
- it { is_expected.to be_a DaruLite::DataFrame }
1789
- its(:'b.type') { is_expected.to eq :category }
1790
- its(:'a.to_a') { is_expected.to eq [2, 3, nil, Float::NAN, 7] }
1791
- its(:'b.to_a') { is_expected.to eq [:b, nil, Float::NAN, nil, 8] }
1792
- its(:'c.to_a') { is_expected.to eq [Float::NAN, 3, 4, 3, 7] }
1793
- its(:'index.to_a') { is_expected.to eq [12, 13, 14, 15, 18] }
891
+ context "#rename_vectors!" do
892
+ before do
893
+ @df = DaruLite::DataFrame.new({
894
+ a: [1,2,3,4,5],
895
+ b: [11,22,33,44,55],
896
+ c: %w(a b c d e)
897
+ })
1794
898
  end
1795
899
 
1796
- context 'when resultant dataframe has one row' do
1797
- subject { df.reject_values 1, 2, 3, 4, 5, nil, Float::NAN }
1798
- it { is_expected.to be_a DaruLite::DataFrame }
1799
- its(:'b.type') { is_expected.to eq :category }
1800
- its(:'a.to_a') { is_expected.to eq [7] }
1801
- its(:'b.to_a') { is_expected.to eq [8] }
1802
- its(:'c.to_a') { is_expected.to eq [7] }
1803
- its(:'index.to_a') { is_expected.to eq [18] }
900
+ it "returns self as modified dataframe" do
901
+ expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
1804
902
  end
1805
903
 
1806
- context 'when resultant dataframe is empty' do
1807
- subject { df.reject_values 1, 2, 3, 4, 5, 6, 7, nil, Float::NAN }
1808
- it { is_expected.to be_a DaruLite::DataFrame }
1809
- its(:'b.type') { is_expected.to eq :category }
1810
- its(:'a.to_a') { is_expected.to eq [] }
1811
- its(:'b.to_a') { is_expected.to eq [] }
1812
- its(:'c.to_a') { is_expected.to eq [] }
1813
- its(:'index.to_a') { is_expected.to eq [] }
904
+ it "re-uses rename_vectors method" do
905
+ name_map = { :a => :alpha, :c => :gamma }
906
+ expect(@df).to receive(:rename_vectors).with(name_map)
907
+ @df.rename_vectors! name_map
1814
908
  end
1815
909
  end
1816
910
 
1817
- context '#replace_values' do
1818
- subject do
1819
- DaruLite::DataFrame.new({
1820
- a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1821
- b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1822
- c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
911
+ context "#rename_vectors" do
912
+ before do
913
+ @df = DaruLite::DataFrame.new({
914
+ a: [1,2,3,4,5],
915
+ b: [11,22,33,44,55],
916
+ c: %w(a b c d e)
1823
917
  })
1824
918
  end
1825
- before { subject.to_category :b }
1826
919
 
1827
- context 'replace nils only' do
1828
- before { subject.replace_values nil, 10 }
1829
- it { is_expected.to be_a DaruLite::DataFrame }
1830
- its(:'b.type') { is_expected.to eq :category }
1831
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, Float::NAN, 10, 1, 7] }
1832
- its(:'b.to_a') { is_expected.to eq [:a, :b, 10, Float::NAN, 10, 3, 5, 8] }
1833
- its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 5, 10, 7] }
920
+ it "returns DaruLite::Index" do
921
+ expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(DaruLite::Index)
1834
922
  end
1835
923
 
1836
- context 'replace Float::NAN only' do
1837
- before { subject.replace_values Float::NAN, 10 }
1838
- it { is_expected.to be_a DaruLite::DataFrame }
1839
- its(:'b.type') { is_expected.to eq :category }
1840
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, nil, 10, nil, 1, 7] }
1841
- its(:'b.to_a') { is_expected.to eq [:a, :b, nil, 10, nil, 3, 5, 8] }
1842
- its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, nil, 7] }
1843
- end
1844
-
1845
- context 'replace both nil and Float::NAN' do
1846
- before { subject.replace_values [nil, Float::NAN], 10 }
1847
- it { is_expected.to be_a DaruLite::DataFrame }
1848
- its(:'b.type') { is_expected.to eq :category }
1849
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, 10, 10, 1, 7] }
1850
- its(:'b.to_a') { is_expected.to eq [:a, :b, 10, 10, 10, 3, 5, 8] }
1851
- its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, 10, 7] }
1852
- end
1853
-
1854
- context 'replace other values' do
1855
- before { subject.replace_values [1, 5], 10 }
1856
- it { is_expected.to be_a DaruLite::DataFrame }
1857
- its(:'b.type') { is_expected.to eq :category }
1858
- its(:'a.to_a') { is_expected.to eq [10, 2, 3, nil, Float::NAN, nil, 10, 7] }
1859
- its(:'b.to_a') { is_expected.to eq [:a, :b, nil, Float::NAN, nil, 3, 10, 8] }
1860
- its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 10, nil, 7] }
1861
- end
1862
- end
1863
-
1864
- describe 'uniq' do
1865
- let(:df) do
1866
- DaruLite::DataFrame.from_csv 'spec/fixtures/duplicates.csv'
1867
- end
1868
-
1869
- context 'with no args' do
1870
- it do
1871
- result = df.uniq
1872
- expect(result.shape.first).to eq 30
1873
- end
1874
- end
1875
-
1876
- context 'given a vector' do
1877
- it do
1878
- result = df.uniq("color")
1879
- expect(result.shape.first).to eq 2
1880
- end
1881
- end
1882
-
1883
- context 'given an array of vectors' do
1884
- it do
1885
- result = df.uniq("color", "director_name")
1886
- expect(result.shape.first).to eq 29
1887
- end
1888
- end
1889
- end
1890
-
1891
- context '#rolling_fillna!' do
1892
- subject do
1893
- DaruLite::DataFrame.new({
1894
- a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1895
- b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
1896
- c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1897
- })
1898
- end
1899
-
1900
- context 'rolling_fillna! forwards' do
1901
- before { subject.rolling_fillna!(:forward) }
1902
- it { expect(subject.rolling_fillna!(:forward)).to eq(subject) }
1903
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
1904
- its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
1905
- its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
1906
- end
1907
-
1908
- context 'rolling_fillna! backwards' do
1909
- before { subject.rolling_fillna!(:backward) }
1910
- it { expect(subject.rolling_fillna!(:backward)).to eq(subject) }
1911
- its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
1912
- its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
1913
- its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
1914
- end
1915
- end
1916
-
1917
- context "#clone" do
1918
- it "returns a view of the whole dataframe" do
1919
- cloned = @data_frame.clone
1920
- expect(@data_frame.object_id).to_not eq(cloned.object_id)
1921
- expect(@data_frame[:a].object_id).to eq(cloned[:a].object_id)
1922
- expect(@data_frame[:b].object_id).to eq(cloned[:b].object_id)
1923
- expect(@data_frame[:c].object_id).to eq(cloned[:c].object_id)
1924
- end
1925
-
1926
- it "returns a view of selected vectors" do
1927
- cloned = @data_frame.clone(:a, :b)
1928
- expect(cloned.object_id).to_not eq(@data_frame.object_id)
1929
- expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
1930
- expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
1931
- end
1932
-
1933
- it "clones properly when supplied array" do
1934
- cloned = @data_frame.clone([:a, :b])
1935
- expect(cloned.object_id).to_not eq(@data_frame.object_id)
1936
- expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
1937
- expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
1938
- end
1939
-
1940
- it "original dataframe remains unaffected when operations are applied
1941
- on cloned data frame" do
1942
- original = @data_frame.dup
1943
- cloned = @data_frame.clone
1944
- cloned.delete_vector :a
1945
-
1946
- expect(@data_frame).to eq(original)
1947
- end
1948
-
1949
- end
1950
-
1951
- context "#clone_only_valid" do
1952
- let(:df_with_missing) {
1953
- DaruLite::DataFrame.new({
1954
- a: [1 , 2, 3, nil, 4, nil, 5],
1955
- b: [nil, 2, 3, nil, 4, nil, 5],
1956
- c: [1, 2, 3, 43 , 4, nil, 5]
1957
- })
1958
- }
1959
-
1960
- let(:df_without_missing) {
1961
- DaruLite::DataFrame.new({
1962
- a: [2,3,4,5],
1963
- c: [2,3,4,5]
1964
- })
1965
- }
1966
- it 'does the most reasonable thing' do
1967
- expect(df_with_missing.clone_only_valid).to eq(df_with_missing.reject_values(*DaruLite::MISSING_VALUES))
1968
- expect(df_without_missing.clone_only_valid).to eq(df_without_missing.clone)
1969
- end
1970
- end
1971
-
1972
- context "#clone_structure" do
1973
- it "clones only the index and vector structures of the data frame" do
1974
- cs = @data_frame.clone_structure
1975
-
1976
- expect(cs.vectors).to eq(@data_frame.vectors)
1977
- expect(cs.index).to eq(@data_frame.index)
1978
- expect(cs[:a]).to eq(DaruLite::Vector.new([nil] * cs[:a].size, index: @data_frame.index))
1979
- end
1980
- end
1981
-
1982
- context "#each_index" do
1983
- it "iterates over index" do
1984
- idxs = []
1985
- ret = @data_frame.each_index do |index|
1986
- idxs << index
1987
- end
1988
-
1989
- expect(idxs).to eq([:one, :two, :three, :four, :five])
1990
-
1991
- expect(ret).to eq(@data_frame)
1992
- end
1993
- end
1994
-
1995
- context "#each_vector_with_index" do
1996
- it "iterates over vectors with index" do
1997
- idxs = []
1998
- ret = @data_frame.each_vector_with_index do |vector, index|
1999
- idxs << index
2000
- expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
2001
- expect(vector.class).to eq(DaruLite::Vector)
2002
- end
2003
-
2004
- expect(idxs).to eq([:a, :b, :c])
2005
-
2006
- expect(ret).to eq(@data_frame)
2007
- end
2008
- end
2009
-
2010
- context "#each_row_with_index" do
2011
- it "iterates over rows with indexes" do
2012
- idxs = []
2013
- ret = @data_frame.each_row_with_index do |row, idx|
2014
- idxs << idx
2015
- expect(row.index).to eq([:a, :b, :c].to_index)
2016
- expect(row.class).to eq(DaruLite::Vector)
2017
- end
2018
-
2019
- expect(idxs).to eq([:one, :two, :three, :four, :five])
2020
- expect(ret) .to eq(@data_frame)
2021
- end
2022
- end
2023
-
2024
- context "#each" do
2025
- it "iterates over rows" do
2026
- ret = @data_frame.each(:row) do |row|
2027
- expect(row.index).to eq([:a, :b, :c].to_index)
2028
- expect(row.class).to eq(DaruLite::Vector)
2029
- end
2030
-
2031
- expect(ret).to eq(@data_frame)
2032
- end
2033
-
2034
- it "iterates over all vectors" do
2035
- ret = @data_frame.each do |vector|
2036
- expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
2037
- expect(vector.class).to eq(DaruLite::Vector)
2038
- end
2039
-
2040
- expect(ret).to eq(@data_frame)
2041
- end
2042
-
2043
- it "returns Enumerable if no block specified" do
2044
- ret = @data_frame.each
2045
- expect(ret.is_a?(Enumerator)).to eq(true)
2046
- end
2047
-
2048
- it "raises on unknown axis" do
2049
- expect { @data_frame.each(:kitten) }.to raise_error(ArgumentError, /axis/)
2050
- end
2051
- end
2052
-
2053
- context "#recode" do
2054
- before do
2055
- @ans_vector = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2056
- c: [21,32,43,54,65]}, order: [:a, :b, :c],
2057
- index: [:one, :two, :three, :four, :five])
2058
-
2059
- @ans_rows = DaruLite::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
2060
- c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
2061
- index: [:one, :two, :three, :four, :five])
2062
-
2063
- @data_frame_date_time = @data_frame.dup
2064
- @data_frame_date_time.index = DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
2065
-
2066
- @ans_vector_date_time = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2067
- c: [21,32,43,54,65]}, order: [:a, :b, :c],
2068
- index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
2069
-
2070
- @ans_rows_date_time = DaruLite::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
2071
- c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
2072
- index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
2073
- end
2074
-
2075
- it "maps over the vectors of a DataFrame and returns a DataFrame" do
2076
- ret = @data_frame.recode do |vector|
2077
- vector.map! { |e| e += 10}
2078
- end
2079
-
2080
- expect(ret).to eq(@ans_vector)
2081
- end
2082
-
2083
- it "maps over the rows of a DataFrame and returns a DataFrame" do
2084
- ret = @data_frame.recode(:row) do |row|
2085
- expect(row.class).to eq(DaruLite::Vector)
2086
- row.map! { |e| e*e }
2087
- end
2088
-
2089
- expect(ret).to eq(@ans_rows)
2090
- end
2091
-
2092
- it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
2093
- ret = @data_frame_date_time.recode do |vector|
2094
- vector.map! { |e| e += 10}
2095
- end
2096
-
2097
- expect(ret).to eq(@ans_vector_date_time)
2098
- end
2099
-
2100
- it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
2101
- ret = @data_frame_date_time.recode(:row) do |row|
2102
- expect(row.class).to eq(DaruLite::Vector)
2103
- row.map! { |e| e*e }
2104
- end
2105
-
2106
- expect(ret).to eq(@ans_rows_date_time)
2107
- end
2108
-
2109
- end
2110
-
2111
- context "#collect" do
2112
- before do
2113
- @df = DaruLite::DataFrame.new({
2114
- a: [1,2,3,4,5],
2115
- b: [11,22,33,44,55],
2116
- c: [1,2,3,4,5]
2117
- })
2118
- end
2119
-
2120
- it "collects calculation over rows and returns a Vector from the results" do
2121
- expect(@df.collect(:row) { |row| (row[:a] + row[:c]) * row[:c] }).to eq(
2122
- DaruLite::Vector.new([2,8,18,32,50])
2123
- )
2124
- end
2125
-
2126
- it "collects calculation over vectors and returns a Vector from the results" do
2127
- expect(@df.collect { |v| v[0] * v[1] + v[4] }).to eq(
2128
- DaruLite::Vector.new([7,297,7], index: [:a, :b, :c])
2129
- )
2130
- end
2131
- end
2132
-
2133
- context "#map" do
2134
- it "iterates over rows and returns an Array" do
2135
- ret = @data_frame.map(:row) do |row|
2136
- expect(row.class).to eq(DaruLite::Vector)
2137
- row[:a] * row[:c]
2138
- end
2139
-
2140
- expect(ret).to eq([11, 44, 99, 176, 275])
2141
- expect(@data_frame.vectors.to_a).to eq([:a, :b, :c])
2142
- end
2143
-
2144
- it "iterates over vectors and returns an Array" do
2145
- ret = @data_frame.map do |vector|
2146
- vector.mean
2147
- end
2148
- expect(ret).to eq([3.0, 13.0, 33.0])
2149
- end
2150
- end
2151
-
2152
- context "#map!" do
2153
- before do
2154
- @ans_vector = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2155
- c: [21,32,43,54,65]}, order: [:a, :b, :c],
2156
- index: [:one, :two, :three, :four, :five])
2157
-
2158
- @ans_row = DaruLite::DataFrame.new({b: [12,13,14,15,16], a: [2,3,4,5,6],
2159
- c: [12,23,34,45,56]}, order: [:a, :b, :c],
2160
- index: [:one, :two, :three, :four, :five])
2161
- end
2162
-
2163
- it "destructively maps over the vectors and changes the DF" do
2164
- @data_frame.map! do |vector|
2165
- vector + 10
2166
- end
2167
- expect(@data_frame).to eq(@ans_vector)
2168
- end
2169
-
2170
- it "destructively maps over the rows and changes the DF" do
2171
- @data_frame.map!(:row) do |row|
2172
- row + 1
2173
- end
2174
-
2175
- expect(@data_frame).to eq(@ans_row)
2176
- end
2177
- end
2178
-
2179
- context "#map_vectors_with_index" do
2180
- it "iterates over vectors with index and returns an Array" do
2181
- idx = []
2182
- ret = @data_frame.map_vectors_with_index do |vector, index|
2183
- idx << index
2184
- vector.recode { |e| e += 10}
2185
- end
2186
-
2187
- expect(ret).to eq([
2188
- DaruLite::Vector.new([11,12,13,14,15],index: [:one, :two, :three, :four, :five]),
2189
- DaruLite::Vector.new([21,22,23,24,25],index: [:one, :two, :three, :four, :five]),
2190
- DaruLite::Vector.new([21,32,43,54,65],index: [:one, :two, :three, :four, :five])])
2191
- expect(idx).to eq([:a, :b, :c])
2192
- end
2193
- end
2194
-
2195
- # FIXME: collect_VECTORS_with_index, but map_VECTOR_with_index -- ??? -- zverok
2196
- # (Not saying about unfortunate difference between them...)
2197
- context "#collect_vector_with_index" do
2198
- it "iterates over vectors with index and returns an Array" do
2199
- idx = []
2200
- ret = @data_frame.collect_vector_with_index do |vector, index|
2201
- idx << index
2202
- vector.sum
2203
- end
2204
-
2205
- expect(ret).to eq(DaruLite::Vector.new([15, 65, 165], index: [:a, :b, :c]))
2206
- expect(idx).to eq([:a, :b, :c])
2207
- end
2208
- end
2209
-
2210
- context "#map_rows_with_index" do
2211
- it "iterates over rows with index and returns an Array" do
2212
- idx = []
2213
- ret = @data_frame.map_rows_with_index do |row, index|
2214
- idx << index
2215
- expect(row.class).to eq(DaruLite::Vector)
2216
- row[:a] * row[:c]
2217
- end
2218
-
2219
- expect(ret).to eq([11, 44, 99, 176, 275])
2220
- expect(idx).to eq([:one, :two, :three, :four, :five])
2221
- end
2222
- end
2223
-
2224
- context '#collect_row_with_index' do
2225
- it "iterates over rows with index and returns a Vector" do
2226
- idx = []
2227
- ret = @data_frame.collect_row_with_index do |row, index|
2228
- idx << index
2229
- expect(row.class).to eq(DaruLite::Vector)
2230
- row[:a] * row[:c]
2231
- end
2232
-
2233
- expected = DaruLite::Vector.new([11, 44, 99, 176, 275], index: @data_frame.index)
2234
- expect(ret).to eq(expected)
2235
- expect(idx).to eq([:one, :two, :three, :four, :five])
2236
- end
2237
- end
2238
-
2239
- context "#delete_vector" do
2240
- context DaruLite::Index do
2241
- it "deletes the specified vector" do
2242
- @data_frame.delete_vector :a
2243
-
2244
- expect(@data_frame).to eq(DaruLite::DataFrame.new({b: [11,12,13,14,15],
2245
- c: [11,22,33,44,55]}, order: [:b, :c],
2246
- index: [:one, :two, :three, :four, :five]))
2247
- end
2248
- end
2249
- end
2250
-
2251
- context "#delete_vectors" do
2252
- context DaruLite::Index do
2253
- it "deletes the specified vectors" do
2254
- @data_frame.delete_vectors :a, :b
2255
-
2256
- expect(@data_frame).to eq(DaruLite::DataFrame.new({
2257
- c: [11,22,33,44,55]}, order: [:c],
2258
- index: [:one, :two, :three, :four, :five]))
2259
- end
2260
- end
2261
- end
2262
-
2263
- context "#delete_row" do
2264
- it "deletes the specified row" do
2265
- @data_frame.delete_row :three
2266
-
2267
- expect(@data_frame).to eq(DaruLite::DataFrame.new({b: [11,12,14,15], a: [1,2,4,5],
2268
- c: [11,22,44,55]}, order: [:a, :b, :c], index: [:one, :two, :four, :five]))
2269
- end
2270
- end
2271
-
2272
- context "#keep_row_if" do
2273
- pending "changing row from under the iterator trips this"
2274
- it "keeps row if block evaluates to true" do
2275
- df = DaruLite::DataFrame.new({b: [10,12,20,23,30], a: [50,30,30,1,5],
2276
- c: [10,20,30,40,50]}, order: [:a, :b, :c],
2277
- index: [:one, :two, :three, :four, :five])
2278
-
2279
- df.keep_row_if do |row|
2280
- row[:a] % 10 == 0
2281
- end
2282
- # TODO: write expectation
2283
- end
2284
- end
2285
-
2286
- context "#keep_vector_if" do
2287
- it "keeps vector if block evaluates to true" do
2288
- @data_frame.keep_vector_if do |vector|
2289
- vector == [1,2,3,4,5].dv(nil, [:one, :two, :three, :four, :five])
2290
- end
2291
-
2292
- expect(@data_frame).to eq(DaruLite::DataFrame.new({a: [1,2,3,4,5]}, order: [:a],
2293
- index: [:one, :two, :three, :four, :five]))
2294
- end
2295
- end
2296
-
2297
- context "#filter_field" do
2298
- before do
2299
- @df = DaruLite::DataFrame.new({
2300
- :id => DaruLite::Vector.new([1, 2, 3, 4, 5]),
2301
- :name => DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
2302
- :age => DaruLite::Vector.new([20, 23, 25, 27, 5]),
2303
- :city => DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
2304
- :a1 => DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
2305
- order: [:id, :name, :age, :city, :a1])
2306
- end
2307
-
2308
- it "creates new vector with the data of a given field for which block returns true" do
2309
- filtered = @df.filter_vector(:id) { |c| c[:id] == 2 or c[:id] == 4 }
2310
- expect(filtered).to eq(DaruLite::Vector.new([2,4]))
2311
- end
2312
- end
2313
-
2314
- context "#filter_rows" do
2315
- context DaruLite::Index do
2316
- context "when specified no index" do
2317
- it "filters rows" do
2318
- df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]})
2319
-
2320
- a = df.filter_rows do |row|
2321
- row[:a] % 2 == 0
2322
- end
2323
-
2324
- expect(a).to eq(DaruLite::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [1]))
2325
- end
2326
- end
2327
-
2328
- context "when specified numerical index" do
2329
- it "filters rows" do
2330
- df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]}, index: [1,2,3])
2331
-
2332
- a = df.filter_rows do |row|
2333
- row[:a] % 2 == 0
2334
- end
2335
-
2336
- expect(a).to eq(DaruLite::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [2]))
2337
- end
2338
- end
2339
-
2340
- it "preserves names of vectors" do
2341
- df = DaruLite::DataFrame.new a: 1..3, b: 4..6
2342
- df1 = df.filter_rows { |r| r[:a] != 2 }
2343
-
2344
- expect(df1[:a].name).to eq(df[:a].name)
2345
- end
2346
- end
2347
- end
2348
-
2349
- context "#filter_vectors" do
2350
- context DaruLite::Index do
2351
- it "filters vectors" do
2352
- df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]})
2353
-
2354
- a = df.filter_vectors do |vector|
2355
- vector[0] == 1
2356
- end
2357
-
2358
- expect(a).to eq(DaruLite::DataFrame.new({a: [1,2,3]}))
2359
- end
2360
- end
2361
- end
2362
-
2363
- context "#filter" do
2364
- let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]}) }
2365
- it "dispatches" do
2366
- expect(df.filter(:row){|r| r[:a] % 2 == 0 }).to \
2367
- eq df.filter_rows{|r| r[:a] % 2 == 0 }
2368
-
2369
- expect(df.filter(:vector){|v| v[0] == 1}).to \
2370
- eq df.filter_vectors{|v| v[0] == 1}
2371
-
2372
- expect { df.filter(:kitten){} }.to raise_error ArgumentError, /axis/
2373
- end
2374
- end
2375
-
2376
- context "#to_a" do
2377
- context DaruLite::Index do
2378
- it "converts DataFrame into array of hashes" do
2379
- arry = @data_frame.to_a
2380
-
2381
- expect(arry).to eq(
2382
- [
2383
- [
2384
- {a: 1, b: 11, c: 11},
2385
- {a: 2, b: 12, c: 22},
2386
- {a: 3, b: 13, c: 33},
2387
- {a: 4, b: 14, c: 44},
2388
- {a: 5, b: 15, c: 55}
2389
- ],
2390
- [
2391
- :one, :two, :three, :four, :five
2392
- ]
2393
- ])
2394
- end
2395
- end
2396
-
2397
- context DaruLite::MultiIndex do
2398
- pending
2399
- end
2400
- end
2401
-
2402
- context "#to_h" do
2403
- it "converts to a hash" do
2404
- expect(@data_frame.to_h).to eq(
2405
- {
2406
- a: DaruLite::Vector.new([1,2,3,4,5],
2407
- index: [:one, :two, :three, :four, :five]),
2408
- b: DaruLite::Vector.new([11,12,13,14,15],
2409
- index: [:one, :two, :three, :four, :five]),
2410
- c: DaruLite::Vector.new([11,22,33,44,55],
2411
- index: [:one, :two, :three, :four, :five])
2412
- }
2413
- )
2414
- end
2415
- end
2416
-
2417
- context "#sort" do
2418
- context DaruLite::Index do
2419
- before :each do
2420
- @df = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
2421
- end
2422
-
2423
- it "sorts according to given vector order (bang)" do
2424
- a_sorter = lambda { |a| a }
2425
- ans = @df.sort([:a], by: { a: a_sorter })
2426
-
2427
- expect(ans).to eq(
2428
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3], c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']},
2429
- index: [2,1,0,4,5,3])
2430
- )
2431
- expect(ans).to_not eq(@df)
2432
- end
2433
-
2434
- it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
2435
- ans = @df.sort([:a, :b])
2436
- expect(ans).to eq(
2437
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
2438
- index: [2,1,0,5,4,3])
2439
- )
2440
- expect(ans).to_not eq(@df)
2441
- end
2442
- end
2443
-
2444
- context DaruLite::MultiIndex do
2445
- pending
2446
- end
2447
-
2448
- context DaruLite::CategoricalIndex do
2449
- let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
2450
- let(:df) do
2451
- DaruLite::DataFrame.new({
2452
- a: [2, -1, 3, 4, 5],
2453
- b: ['x', 'y', 'x', 'a', 'y'],
2454
- c: [nil, nil, -2, 2, 1]
2455
- }, index: idx)
2456
- end
2457
-
2458
- context "ascending order" do
2459
- context "single vector" do
2460
- subject { df.sort [:a] }
2461
-
2462
- its(:'index.to_a') { is_expected.to eq [1, :a, :a, 1, :c] }
2463
- its(:'a.to_a') { is_expected.to eq [-1, 2, 3, 4, 5] }
2464
- its(:'b.to_a') { is_expected.to eq ['y', 'x', 'x', 'a', 'y'] }
2465
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2466
- end
2467
-
2468
- context "multiple vectors" do
2469
- subject { df.sort [:c, :b] }
2470
-
2471
- its(:'index.to_a') { is_expected.to eq [:a, 1, :a, :c, 1] }
2472
- its(:'a.to_a') { is_expected.to eq [2, -1, 3, 5, 4] }
2473
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'y', 'a'] }
2474
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 1, 2] }
2475
- end
2476
-
2477
- context "block" do
2478
- context "automatic handle nils" do
2479
- subject do
2480
- df.sort [:c], by: {c: lambda { |a| a.abs } }, handle_nils: true
2481
- end
2482
-
2483
- its(:'index.to_a') { is_expected.to eq [:a, 1, :c, :a, 1] }
2484
- its(:'a.to_a') { is_expected.to eq [2, -1, 5, 3, 4] }
2485
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'y', 'x', 'a'] }
2486
- its(:'c.to_a') { is_expected.to eq [nil, nil, 1, -2, 2] }
2487
- end
2488
-
2489
- context "manually handle nils" do
2490
- subject do
2491
- df.sort [:c], by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } }
2492
- end
2493
-
2494
- its(:'index.to_a') { is_expected.to eq [:c, :a, 1, :a, 1] }
2495
- its(:'a.to_a') { is_expected.to eq [5, 3, 4, 2, -1] }
2496
- its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'x', 'y'] }
2497
- its(:'c.to_a') { is_expected.to eq [1, -2, 2, nil, nil] }
2498
- end
2499
- end
2500
- end
2501
-
2502
- context "descending order" do
2503
- context "single vector" do
2504
- subject { df.sort [:a], ascending: false }
2505
-
2506
- its(:'index.to_a') { is_expected.to eq [:c, 1, :a, :a, 1] }
2507
- its(:'a.to_a') { is_expected.to eq [5, 4, 3, 2, -1] }
2508
- its(:'b.to_a') { is_expected.to eq ['y', 'a', 'x', 'x', 'y'] }
2509
- its(:'c.to_a') { is_expected.to eq [1, 2, -2, nil, nil] }
2510
- end
2511
-
2512
- context "multiple vectors" do
2513
- subject { df.sort [:c, :b], ascending: false }
2514
-
2515
- its(:'index.to_a') { is_expected.to eq [1, :a, 1, :c, :a] }
2516
- its(:'a.to_a') { is_expected.to eq [-1, 2, 4, 5, 3] }
2517
- its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'y', 'x'] }
2518
- its(:'c.to_a') { is_expected.to eq [nil, nil, 2, 1, -2] }
2519
- end
2520
-
2521
- context "block" do
2522
- context "automatic handle nils" do
2523
- subject do
2524
- df.sort [:c],
2525
- by: {c: lambda { |a| a.abs } },
2526
- handle_nils: true,
2527
- ascending: false
2528
- end
2529
-
2530
- its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2531
- its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2532
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2533
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2534
- end
2535
-
2536
- context "manually handle nils" do
2537
- subject do
2538
- df.sort [:c],
2539
- by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } },
2540
- ascending: false
2541
- end
2542
-
2543
- its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2544
- its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2545
- its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2546
- its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2547
- end
2548
- end
2549
- end
2550
- end
2551
- end
2552
-
2553
- context "#sort!" do
2554
- context DaruLite::Index do
2555
- before :each do
2556
- @df = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1],
2557
- c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
2558
- end
2559
-
2560
- it "sorts according to given vector order (bang)" do
2561
- a_sorter = lambda { |a| a }
2562
-
2563
- expect(@df.sort!([:a], by: { a: a_sorter })).to eq(
2564
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3],
2565
- c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']}, index: [2,1,0,4,5,3])
2566
- )
2567
- end
2568
-
2569
- it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
2570
- expect(@df.sort!([:a, :b])).to eq(
2571
- DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
2572
- index: [2,1,0,5,4,3])
2573
- )
2574
- end
2575
-
2576
- it "sorts both vectors in descending order" do
2577
- expect(@df.sort!([:a,:b], ascending: [false, false])).to eq(
2578
- DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,9,1,-2,-1,5], c: ['aaaa','aaaaa','aaaaaa', 'a','aa', 'aaa'] },
2579
- index: [3,4,5,0,1,2])
2580
- )
2581
- end
2582
-
2583
- it "sorts one vector in desc and other is asc" do
2584
- expect(@df.sort!([:a, :b], ascending: [false, true])).to eq(
2585
- DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,-2,1,9,-1,5], c: ['aaaa','a','aaaaaa','aaaaa','aa','aaa']},
2586
- index: [3,0,5,4,1,2])
2587
- )
2588
- end
2589
-
2590
- it "sorts many vectors" do
2591
- d = DaruLite::DataFrame.new({a: [1,1,1,222,44,5,5,544], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2592
-
2593
- expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
2594
- DaruLite::DataFrame.new({a: [544,222,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
2595
- index: [7,3,4,6,5,0,1,2])
2596
- )
2597
- end
2598
-
2599
- it "places nils at the beginning when sorting ascedingly" do
2600
- d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2601
-
2602
- expect(d.sort!([:a, :b, :c], ascending: [true, true, false])).to eq(
2603
- DaruLite::DataFrame.new({a: [nil,nil,1,1,1,5,5,44], b: [3,222,44,44,333,22,554,111], c: [5,3,3,2,5,5,1,3]},
2604
- index: [7,3,0,1,2,6,5,4])
2605
- )
2606
- end
2607
-
2608
- it "places nils at the beginning when sorting decendingly" do
2609
- d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2610
-
2611
- expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
2612
- DaruLite::DataFrame.new({a: [nil,nil,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
2613
- index: [7,3,4,6,5,0,1,2])
2614
- )
2615
- end
2616
-
2617
- it "sorts vectors of non-numeric types with nils in ascending order" do
2618
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2619
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2620
-
2621
- expect(non_numeric.sort!([:c], ascending: [true])).to eq(
2622
- DaruLite::DataFrame.new({a: [-6, 5, 5, 1, 7, 5], b: [1, 1, nil, -1, nil, -1],
2623
- c: [nil, nil, "aaa", "aaa", "baaa", "xxx"]},
2624
- index: [2, 5, 0, 1, 3, 4])
2625
- )
2626
- end
2627
-
2628
- it "sorts vectors of non-numeric types with nils in descending order" do
2629
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2630
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2631
-
2632
- expect(non_numeric.sort!([:c], ascending: [false])).to eq(
2633
- DaruLite::DataFrame.new({a: [-6, 5, 5, 7, 5, 1], b: [1, 1, -1, nil, nil, -1],
2634
- c: [nil, nil, "xxx", "baaa", "aaa", "aaa"]},
2635
- index: [2, 5, 4, 3, 0, 1])
2636
- )
2637
- end
2638
-
2639
- it "sorts vectors with block provided and handle nils automatically" do
2640
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2641
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2642
-
2643
- expect(non_numeric.sort!([:b], by: {b: lambda { |a| a.abs } }, handle_nils: true)).to eq(
2644
- DaruLite::DataFrame.new({a: [5, 7, 1, -6, 5, 5], b: [nil, nil, -1, 1, -1, 1],
2645
- c: ["aaa", "baaa", "aaa", nil, "xxx", nil]},
2646
- index: [0, 3, 1, 2, 4, 5])
2647
- )
2648
- end
2649
-
2650
- it "sorts vectors with block provided and nils handled manually" do
2651
- non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2652
- c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2653
-
2654
- expect(non_numeric.sort!([:b], by: {b: lambda { |a| (a.nil?)?[1]:[0, a.abs]} }, handle_nils: false)).to eq(
2655
- DaruLite::DataFrame.new({a: [1, -6, 5, 5, 5, 7], b: [-1, 1, -1, 1, nil, nil],
2656
- c: ["aaa", nil, "xxx", nil, "aaa", "baaa"]},
2657
- index: [1, 2, 4, 5, 0, 3])
2658
- )
2659
- end
2660
- end
2661
-
2662
- context DaruLite::MultiIndex do
2663
- pending
2664
- it "sorts the DataFrame when specified full tuple" do
2665
- @df_mi.sort([[:a,:one,:bar]])
2666
- end
2667
- end
2668
- end
2669
-
2670
- context "#index=" do
2671
- before :each do
2672
- @df = DaruLite::DataFrame.new({
2673
- a: [1,2,3,4,5],
2674
- b: [11,22,33,44,55],
2675
- c: %w(a b c d e)
2676
- })
2677
- end
2678
-
2679
- it "simply reassigns the index" do
2680
- @df.index = DaruLite::Index.new(['4','foo', :bar, 0, 23])
2681
- expect(@df.row['foo']).to eq(DaruLite::Vector.new([2,22,'b'], index: [:a,:b,:c]))
2682
- end
2683
-
2684
- it "raises error for improper length index" do
2685
- expect {
2686
- @df.index = DaruLite::Index.new([1,2])
2687
- }.to raise_error(ArgumentError)
2688
- end
2689
-
2690
- it "is able to accept array" do
2691
- @df.index = (1..5).to_a
2692
- expect(@df.index).to eq DaruLite::Index.new (1..5).to_a
2693
- end
2694
- end
2695
-
2696
- context '#order=' do
2697
- let(:df) do
2698
- DaruLite::DataFrame.new({
2699
- a: [1, 2, 3],
2700
- b: [4, 5, 6]
2701
- }, order: [:a, :b])
2702
- end
2703
-
2704
- context 'correct order' do
2705
- before { df.order = [:b, :a] }
2706
- subject { df }
2707
-
2708
- its(:'vectors.to_a') { is_expected.to eq [:b, :a] }
2709
- its(:'b.to_a') { is_expected.to eq [4, 5, 6] }
2710
- its(:'a.to_a') { is_expected.to eq [1, 2, 3] }
2711
- end
2712
-
2713
- context 'insufficient vectors' do
2714
- it { expect { df.order = [:a] }.to raise_error }
2715
- end
2716
-
2717
- context 'wrong vectors' do
2718
- it { expect { df.order = [:a, :b, 'b'] }.to raise_error }
2719
- end
2720
- end
2721
-
2722
- context "#vectors=" do
2723
- before :each do
2724
- @df = DaruLite::DataFrame.new({
2725
- a: [1,2,3,4,5],
2726
- b: [11,22,33,44,55],
2727
- c: %w(a b c d e)
2728
- })
2729
- end
2730
-
2731
- it "simply reassigns vectors" do
2732
- @df.vectors = DaruLite::Index.new(['b',0,'m'])
2733
-
2734
- expect(@df.vectors).to eq(DaruLite::Index.new(['b',0,'m']))
2735
- expect(@df['b']).to eq(DaruLite::Vector.new([1,2,3,4,5]))
2736
- expect(@df[0]).to eq(DaruLite::Vector.new([11,22,33,44,55]))
2737
- expect(@df['m']).to eq(DaruLite::Vector.new(%w(a b c d e)))
2738
- end
2739
-
2740
- it "raises error for improper length index" do
2741
- expect {
2742
- @df.vectors = DaruLite::Index.new([1,2,'3',4,'5'])
2743
- }.to raise_error(ArgumentError)
2744
- end
2745
-
2746
- it "change name of vectors in @data" do
2747
- new_index_array = [:k, :l, :m]
2748
- @df.vectors = DaruLite::Index.new(new_index_array)
2749
-
2750
- expect(@df.data.map { |vector| vector.name }).to eq(new_index_array)
2751
- end
2752
- end
2753
-
2754
- context "#rename_vectors!" do
2755
- before do
2756
- @df = DaruLite::DataFrame.new({
2757
- a: [1,2,3,4,5],
2758
- b: [11,22,33,44,55],
2759
- c: %w(a b c d e)
2760
- })
2761
- end
2762
-
2763
- it "returns self as modified dataframe" do
2764
- expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
2765
- end
2766
-
2767
- it "re-uses rename_vectors method" do
2768
- name_map = { :a => :alpha, :c => :gamma }
2769
- expect(@df).to receive(:rename_vectors).with(name_map)
2770
- @df.rename_vectors! name_map
2771
- end
2772
- end
2773
-
2774
- context "#rename_vectors" do
2775
- before do
2776
- @df = DaruLite::DataFrame.new({
2777
- a: [1,2,3,4,5],
2778
- b: [11,22,33,44,55],
2779
- c: %w(a b c d e)
2780
- })
2781
- end
2782
-
2783
- it "returns DaruLite::Index" do
2784
- expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(DaruLite::Index)
2785
- end
2786
-
2787
- it "renames vectors using a hash map" do
2788
- @df.rename_vectors :a => :alpha, :c => :gamma
2789
- expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
924
+ it "renames vectors using a hash map" do
925
+ @df.rename_vectors :a => :alpha, :c => :gamma
926
+ expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
2790
927
  end
2791
928
 
2792
929
  it "overwrites vectors if the new name already exists" do
@@ -2832,69 +969,10 @@ describe DaruLite::DataFrame do
2832
969
  end
2833
970
  end
2834
971
 
2835
- context "#reindex" do
2836
- it "re indexes and aligns accordingly" do
2837
- df = DaruLite::DataFrame.new({
2838
- a: [1,2,3,4,5],
2839
- b: [11,22,33,44,55],
2840
- c: %w(a b c d e)
2841
- })
2842
-
2843
- ans = df.reindex(DaruLite::Index.new([1,3,0,8,2]))
2844
- expect(ans).to eq(DaruLite::DataFrame.new({
2845
- a: [2,4,1,nil,3],
2846
- b: [22,44,11,nil,33],
2847
- c: ['b','d','a',nil,'c']
2848
- }, index: DaruLite::Index.new([1,3,0,8,2])))
2849
- expect(ans).to_not eq(df)
2850
- end
2851
- end
2852
-
2853
- context "#reindex_vectors" do
2854
- it "re indexes vectors and aligns accordingly" do
2855
- df = DaruLite::DataFrame.new({
2856
- a: [1,2,3,4,5],
2857
- b: [11,22,33,44,55],
2858
- c: %w(a b c d e)
2859
- })
2860
-
2861
- ans = df.reindex_vectors(DaruLite::Index.new([:b, 'a', :a]))
2862
- expect(ans).to eq(DaruLite::DataFrame.new({
2863
- :b => [11,22,33,44,55],
2864
- 'a' => [nil, nil, nil, nil, nil],
2865
- :a => [1,2,3,4,5]
2866
- }, order: [:b, 'a', :a]))
2867
- end
2868
-
2869
- it 'raises ArgumentError if argument was not an index' do
2870
- df = DaruLite::DataFrame.new([])
2871
- expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
2872
- end
2873
- end
2874
-
2875
- context "#to_matrix" do
2876
- before do
2877
- @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
2878
- c: [11,22,33,44,55], d: [5,4,nil,2,1], e: ['this', 'has', 'string','data','too']},
2879
- order: [:a, :b, :c,:d,:e],
2880
- index: [:one, :two, :three, :four, :five])
2881
- end
2882
-
2883
- it "concats numeric non-nil vectors to Matrix" do
2884
- expect(@df.to_matrix).to eq(Matrix[
2885
- [1,11,11,5],
2886
- [2,12,22,4],
2887
- [3,13,33,nil],
2888
- [4,14,44,2],
2889
- [5,15,55,1]
2890
- ])
2891
- end
2892
- end
2893
-
2894
972
  context "#transpose" do
2895
973
  context DaruLite::Index do
2896
974
  it "transposes a DataFrame including row and column indexing" do
2897
- expect(@data_frame.transpose).to eq(DaruLite::DataFrame.new({
975
+ expect(df.transpose).to eq(DaruLite::DataFrame.new({
2898
976
  one: [1,11,11],
2899
977
  two: [2,12,22],
2900
978
  three: [3,13,33],
@@ -2903,533 +981,37 @@ describe DaruLite::DataFrame do
2903
981
  }, index: [:a, :b, :c],
2904
982
  order: [:one, :two, :three, :four, :five])
2905
983
  )
2906
- end
2907
- end
2908
-
2909
- context DaruLite::MultiIndex do
2910
- it "transposes a DataFrame including row and column indexing" do
2911
- expect(@df_mi.transpose).to eq(DaruLite::DataFrame.new([
2912
- @vector_arry1,
2913
- @vector_arry2,
2914
- @vector_arry1,
2915
- @vector_arry2].transpose, index: @order_mi, order: @multi_index))
2916
- end
2917
- end
2918
- end
2919
-
2920
- context "#pivot_table" do
2921
- before do
2922
- @df = DaruLite::DataFrame.new({
2923
- a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
2924
- b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
2925
- c: ['small','large','large','small','small','large','small','large','small'],
2926
- d: [1,2,2,3,3,4,5,6,7],
2927
- e: [2,4,4,6,6,8,10,12,14]
2928
- })
2929
- end
2930
-
2931
- it "creates row index as per (single) index argument and default aggregates to mean" do
2932
- expect(@df.pivot_table(index: [:a])).to eq(DaruLite::DataFrame.new({
2933
- d: [5.5,2.2],
2934
- e: [11.0,4.4]
2935
- }, index: ['bar', 'foo']))
2936
- end
2937
-
2938
- it "creates row index as per (double) index argument and default aggregates to mean" do
2939
- agg_mi = DaruLite::MultiIndex.from_tuples(
2940
- [
2941
- ['bar', 'large'],
2942
- ['bar', 'small'],
2943
- ['foo', 'large'],
2944
- ['foo', 'small']
2945
- ]
2946
- )
2947
- expect(@df.pivot_table(index: [:a, :c]).round(2)).to eq(DaruLite::DataFrame.new({
2948
- d: [5.0 , 6.0, 2.0, 2.33],
2949
- e: [10.0, 12.0, 4.0, 4.67]
2950
- }, index: agg_mi))
2951
- end
2952
-
2953
- it "creates row and vector index as per (single) index and (single) vectors args" do
2954
- agg_vectors = DaruLite::MultiIndex.from_tuples([
2955
- [:d, 'one'],
2956
- [:d, 'two'],
2957
- [:e, 'one'],
2958
- [:e, 'two']
2959
- ])
2960
- agg_index = DaruLite::MultiIndex.from_tuples(
2961
- [
2962
- ['bar'],
2963
- ['foo']
2964
- ]
2965
- )
2966
-
2967
- expect(@df.pivot_table(index: [:a], vectors: [:b]).round(2)).to eq(
2968
- DaruLite::DataFrame.new(
2969
- [
2970
- [4.5, 1.67],
2971
- [6.5, 3.0],
2972
- [9.0, 3.33],
2973
- [13, 6]
2974
- ], order: agg_vectors, index: agg_index)
2975
- )
2976
- end
2977
-
2978
- it "creates row and vector index as per (single) index and (double) vector args" do
2979
- agg_vectors = DaruLite::MultiIndex.from_tuples(
2980
- [
2981
- [:d, 'one', 'large'],
2982
- [:d, 'one', 'small'],
2983
- [:d, 'two', 'large'],
2984
- [:d, 'two', 'small'],
2985
- [:e, 'one', 'large'],
2986
- [:e, 'one', 'small'],
2987
- [:e, 'two', 'large'],
2988
- [:e, 'two', 'small']
2989
- ]
2990
- )
2991
-
2992
- agg_index = DaruLite::MultiIndex.from_tuples(
2993
- [
2994
- ['bar'],
2995
- ['foo']
2996
- ]
2997
- )
2998
-
2999
- expect(@df.pivot_table(index: [:a], vectors: [:b, :c])).to eq(DaruLite::DataFrame.new(
3000
- [
3001
- [4.0,2.0],
3002
- [5.0,1.0],
3003
- [6.0,nil],
3004
- [7.0,3.0],
3005
- [8.0,4.0],
3006
- [10.0,2.0],
3007
- [12.0,nil],
3008
- [14.0,6.0]
3009
- ], order: agg_vectors, index: agg_index
3010
- ))
3011
- end
3012
-
3013
- it "creates row and vector index with (double) index and (double) vector args" do
3014
- agg_index = DaruLite::MultiIndex.from_tuples([
3015
- ['bar', 4],
3016
- ['bar', 5],
3017
- ['bar', 6],
3018
- ['bar', 7],
3019
- ['foo', 1],
3020
- ['foo', 2],
3021
- ['foo', 3]
3022
- ])
3023
-
3024
- agg_vectors = DaruLite::MultiIndex.from_tuples([
3025
- [:e, 'one', 'large'],
3026
- [:e, 'one', 'small'],
3027
- [:e, 'two', 'large'],
3028
- [:e, 'two', 'small']
3029
- ])
3030
-
3031
- expect(@df.pivot_table(index: [:a, :d], vectors: [:b, :c])).to eq(
3032
- DaruLite::DataFrame.new(
3033
- [
3034
- [8 ,nil,nil,nil,nil, 4,nil],
3035
- [nil, 10,nil,nil, 2,nil,nil],
3036
- [nil,nil, 12,nil,nil,nil,nil],
3037
- [nil,nil,nil, 14,nil,nil, 6],
3038
- ], index: agg_index, order: agg_vectors)
3039
- )
3040
- end
3041
-
3042
- it "only aggregates over the vector specified in the values argument" do
3043
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3044
- [
3045
- [:e, 'one', 'large'],
3046
- [:e, 'one', 'small'],
3047
- [:e, 'two', 'large'],
3048
- [:e, 'two', 'small']
3049
- ]
3050
- )
3051
- agg_index = DaruLite::MultiIndex.from_tuples(
3052
- [
3053
- ['bar'],
3054
- ['foo']
3055
- ]
3056
- )
3057
- expect(@df.pivot_table(index: [:a], vectors: [:b, :c], values: :e)).to eq(
3058
- DaruLite::DataFrame.new(
3059
- [
3060
- [8, 4],
3061
- [10, 2],
3062
- [12,nil],
3063
- [14, 6]
3064
- ], order: agg_vectors, index: agg_index
3065
- )
3066
- )
3067
-
3068
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3069
- [
3070
- [:d, 'one'],
3071
- [:d, 'two'],
3072
- [:e, 'one'],
3073
- [:e, 'two']
3074
- ]
3075
- )
3076
- expect(@df.pivot_table(index: [:a], vectors: [:b], values: [:d, :e])).to eq(
3077
- DaruLite::DataFrame.new(
3078
- [
3079
- [4.5, 5.0/3],
3080
- [6.5, 3.0],
3081
- [9.0, 10.0/3],
3082
- [13.0, 6.0]
3083
- ], order: agg_vectors, index: agg_index
3084
- )
3085
- )
3086
- end
3087
-
3088
- it "overrides default aggregate function to aggregate over sum" do
3089
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3090
- [
3091
- [:e, 'one', 'large'],
3092
- [:e, 'one', 'small'],
3093
- [:e, 'two', 'large'],
3094
- [:e, 'two', 'small']
3095
- ]
3096
- )
3097
- agg_index = DaruLite::MultiIndex.from_tuples(
3098
- [
3099
- ['bar'],
3100
- ['foo']
3101
- ]
3102
- )
3103
- expect(@df.pivot_table(index: [:a], vectors: [:b, :c], values: :e, agg: :sum)).to eq(
3104
- DaruLite::DataFrame.new(
3105
- [
3106
- [8, 8],
3107
- [10, 2],
3108
- [12,nil],
3109
- [14, 12]
3110
- ], order: agg_vectors, index: agg_index
3111
- )
3112
- )
3113
- end
3114
-
3115
- it "raises error if no non-numeric vectors are present" do
3116
- df = DaruLite::DataFrame.new({a: ['a', 'b', 'c'], b: ['b', 'e', 'd']})
3117
- expect {
3118
- df.pivot_table(index: [:a])
3119
- }.to raise_error
3120
- end
3121
-
3122
- it "raises error if atleast a row index is not specified" do
3123
- expect {
3124
- @df.pivot_table
3125
- }.to raise_error
3126
- end
3127
-
3128
- it "aggregates when nils are present in value vector" do
3129
- df = DaruLite::DataFrame.new({
3130
- a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
3131
- b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
3132
- c: ['small','large','large','small','small','large','small','large','small'],
3133
- d: [1,2,2,3,3,4,5,6,7],
3134
- e: [2,nil,4,6,6,8,10,12,nil]
3135
- })
3136
-
3137
- expect(df.pivot_table index: [:a]).to eq(
3138
- DaruLite::DataFrame.new({
3139
- d: [5.0, 2.2, 7],
3140
- e: [10.0, 4.5, nil]
3141
- }, index: DaruLite::Index.new(['bar', 'foo', 'ice'])))
3142
- end
3143
-
3144
- it "works when nils are present in value vector" do
3145
- df = DaruLite::DataFrame.new({
3146
- a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
3147
- b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
3148
- c: ['small','large','large','small','small','large','small','large','small'],
3149
- d: [1,2,2,3,3,4,5,6,7],
3150
- e: [2,nil,4,6,6,8,10,12,nil]
3151
- })
3152
-
3153
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3154
- [
3155
- [:e, 'one'],
3156
- [:e, 'two']
3157
- ]
3158
- )
3159
-
3160
- agg_index = DaruLite::MultiIndex.from_tuples(
3161
- [
3162
- ['bar'],
3163
- ['foo'],
3164
- ['ice']
3165
- ]
3166
- )
3167
-
3168
- expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
3169
- DaruLite::DataFrame.new(
3170
- [
3171
- [9, 3, nil],
3172
- [12, 6, nil]
3173
- ], order: agg_vectors, index: agg_index
3174
- )
3175
- )
3176
- end
3177
-
3178
- it 'performs date pivoting' do
3179
- categories = %i[jan feb mar apr may jun jul aug sep oct nov dec]
3180
- df = DaruLite::DataFrame.rows([
3181
- [2014, 2, 1600.0, 20.0],
3182
- [2014, 3, 1680.0, 21.0],
3183
- [2016, 2, 1600.0, 20.0],
3184
- [2016, 4, 1520.0, 19.0],
3185
- ], order: [:year, :month, :visitors, :days])
3186
- df[:averages] = df[:visitors] / df[:days]
3187
- df[:month] = df[:month].map{|i| categories[i - 1]}
3188
- actual = df.pivot_table(index: :month, vectors: [:year], values: :averages)
3189
-
3190
- # NB: As you can see, there are some "illogical" parts:
3191
- # months are sorted lexicographically, then made into multi-index
3192
- # with one-element-per-tuple, then order of columns is dependent
3193
- # on which month is lexicographically first (its apr, so, apr-2016
3194
- # is first row to gather, so 2016 is first column).
3195
- #
3196
- # All of it is descendance of our group_by implementation (which
3197
- # always sorts results & always make array keys). I hope that fixing
3198
- # group_by, even to the extend described at https://github.com/v0dro/daru/issues/152,
3199
- # will be fix this case also.
3200
- expected =
3201
- DaruLite::DataFrame.new(
3202
- [
3203
- [80.0, 80.0, nil],
3204
- [nil, 80.0, 80.0],
3205
- ], index: DaruLite::MultiIndex.from_tuples([[:apr], [:feb], [:mar]]),
3206
- order: DaruLite::MultiIndex.from_tuples([[:averages, 2016], [:averages, 2014]])
3207
- )
3208
- # Comparing their parts previous to full comparison allows to
3209
- # find complicated differences.
3210
- expect(actual.vectors).to eq expected.vectors
3211
- expect(actual.index).to eq expected.index
3212
- expect(actual).to eq expected
3213
- end
3214
- end
3215
-
3216
- context "#shape" do
3217
- it "returns an array containing number of rows and columns" do
3218
- expect(@data_frame.shape).to eq([5,3])
3219
- end
3220
- end
3221
-
3222
- context "#nest" do
3223
- it "nests in a hash" do
3224
- df = DaruLite::DataFrame.new({
3225
- :a => DaruLite::Vector.new(%w(a a a b b b)),
3226
- :b => DaruLite::Vector.new(%w(c c d d e e)),
3227
- :c => DaruLite::Vector.new(%w(f g h i j k))
3228
- })
3229
- nest = df.nest :a, :b
3230
- expect(nest['a']['c']).to eq([{ :c => 'f' }, { :c => 'g' }])
3231
- expect(nest['a']['d']).to eq([{ :c => 'h' }])
3232
- expect(nest['b']['e']).to eq([{ :c => 'j' }, { :c => 'k' }])
3233
- end
3234
- end
3235
-
3236
- context "#summary" do
3237
- subject { df.summary }
3238
-
3239
- context "DataFrame" do
3240
- let(:df) { DaruLite::DataFrame.new({a: [1,2,5], b: [1,2,"string"]}, order: [:a, :b], index: [:one, :two, :three], name: 'frame') }
3241
- it { is_expected.to eq %Q{
3242
- |= frame
3243
- | Number of rows: 3
3244
- | Element:[a]
3245
- | == a
3246
- | n :3
3247
- | non-missing:3
3248
- | median: 2
3249
- | mean: 2.6667
3250
- | std.dev.: 2.0817
3251
- | std.err.: 1.2019
3252
- | skew: 0.2874
3253
- | kurtosis: -2.3333
3254
- | Element:[b]
3255
- | == b
3256
- | n :3
3257
- | non-missing:3
3258
- | factors: 1,2,string
3259
- | mode: 1,2,string
3260
- | Distribution
3261
- | 1 1 100.00%
3262
- | 2 1 100.00%
3263
- | string 1 100.00%
3264
- }.unindent }
3265
- end
3266
- end
3267
-
3268
- context '#to_df' do
3269
- it 'returns the dataframe' do
3270
- @data_frame.to_df == @data_frame
3271
- end
3272
- end
3273
-
3274
- context "#merge" do
3275
- it "merges one dataframe with another" do
3276
- a = DaruLite::Vector.new [1, 2, 3]
3277
- b = DaruLite::Vector.new [3, 4, 5]
3278
- c = DaruLite::Vector.new [4, 5, 6]
3279
- d = DaruLite::Vector.new [7, 8, 9]
3280
- e = DaruLite::Vector.new [10, 20, 30]
3281
- ds1 = DaruLite::DataFrame.new({ :a => a, :b => b })
3282
- ds2 = DaruLite::DataFrame.new({ :c => c, :d => d })
3283
- exp = DaruLite::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
3284
-
3285
- expect(ds1.merge(ds2)).to eq(exp)
3286
- expect(ds2.merge(ds1)).to eq(
3287
- DaruLite::DataFrame.new({c: c, d: d, a: a, b: b}, order: [:c, :d, :a, :b]))
3288
-
3289
- ds3 = DaruLite::DataFrame.new({ :a => e })
3290
- exp = DaruLite::DataFrame.new({ :a_1 => a, :a_2 => e, :b => b },
3291
- order: [:a_1, :b, :a_2])
3292
-
3293
- expect(ds1.merge(ds3)).to eq(exp)
3294
- end
3295
-
3296
- context "preserves type of vector names" do
3297
- let(:df1) { DaruLite::DataFrame.new({'a'=> [1, 2, 3]}) }
3298
- let(:df2) { DaruLite::DataFrame.new({:b=> [4, 5, 6]}) }
3299
- subject { df1.merge df2 }
3300
-
3301
- it { is_expected.to be_a DaruLite::DataFrame }
3302
- it { expect(subject['a'].to_a).to eq [1, 2, 3] }
3303
- it { expect(subject[:b].to_a).to eq [4, 5, 6] }
3304
- end
3305
-
3306
- context "preserves indices for dataframes with same index" do
3307
- let(:index) { ['one','two','three'] }
3308
- let(:df1) { DaruLite::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
3309
- let(:df2) { DaruLite::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
3310
- subject { df1.merge df2 }
3311
-
3312
- its(:index) { is_expected.to eq DaruLite::Index.new(index) }
3313
- end
3314
- end
3315
-
3316
- context "#vector_by_calculation" do
3317
- it "DSL for returning vector of each calculation" do
3318
- a1 = DaruLite::Vector.new([1, 2, 3, 4, 5, 6, 7])
3319
- a2 = DaruLite::Vector.new([10, 20, 30, 40, 50, 60, 70])
3320
- a3 = DaruLite::Vector.new([100, 200, 300, 400, 500, 600, 700])
3321
- ds = DaruLite::DataFrame.new({ :a => a1, :b => a2, :c => a3 })
3322
- total = ds.vector_by_calculation { a + b + c }
3323
- expected = DaruLite::Vector.new([111, 222, 333, 444, 555, 666, 777])
3324
- expect(total).to eq(expected)
3325
- end
3326
- end
3327
-
3328
- context "group_by" do
3329
- context "on a single row DataFrame" do
3330
- let(:df){ DaruLite::DataFrame.new(city: %w[Kyiv], year: [2015], value: [1]) }
3331
- it "returns a groupby object" do
3332
- expect(df.group_by([:city])).to be_a(DaruLite::Core::GroupBy)
3333
- end
3334
- it "has the correct index" do
3335
- expect(df.group_by([:city]).groups).to eq({["Kyiv"]=>[0]})
3336
- end
3337
- end
3338
- end
3339
-
3340
- context "#vector_sum" do
3341
- before do
3342
- a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil, nil]
3343
- a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30, nil]
3344
- b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2, nil]
3345
- b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3, nil]
3346
- @df = DaruLite::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2 })
3347
- end
3348
-
3349
- it "calculates complete vector sum" do
3350
- expect(@df.vector_sum).to eq(DaruLite::Vector.new [nil, 15, 26, nil, 28, nil, nil])
3351
- end
3352
-
3353
- it "ignores nils if skipnil is true" do
3354
- expect(@df.vector_sum skipnil: true).to eq(DaruLite::Vector.new [13, 15, 26, 25, 28, 35, 0])
3355
- end
3356
-
3357
- it "calculates partial vector sum" do
3358
- a = @df.vector_sum([:a1, :a2])
3359
- b = @df.vector_sum([:b1, :b2])
3360
-
3361
- expect(a).to eq(DaruLite::Vector.new [11, 12, 23, 24, 25, nil, nil])
3362
- expect(b).to eq(DaruLite::Vector.new [nil, 3, 3, nil, 3, 5, nil])
3363
- end
3364
- end
3365
-
3366
- context "#missing_values_rows" do
3367
- it "returns number of missing values in each row" do
3368
- a1 = DaruLite::Vector.new [1, nil, 3, 4, 5, nil]
3369
- a2 = DaruLite::Vector.new [10, nil, 20, 20, 20, 30]
3370
- b1 = DaruLite::Vector.new [nil, nil, 1, 1, 1, 2]
3371
- b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
3372
- c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
3373
- df = DaruLite::DataFrame.new({
3374
- :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3375
-
3376
- expect(df.missing_values_rows).to eq(DaruLite::Vector.new [2, 3, 0, 1, 0, 1])
984
+ end
3377
985
  end
3378
- end
3379
986
 
3380
- context "#vector_count_characters" do
3381
- it "" do
3382
- a1 = DaruLite::Vector.new( [1, 'abcde', 3, 4, 5, nil])
3383
- a2 = DaruLite::Vector.new( [10, 20.3, 20, 20, 20, 30])
3384
- b1 = DaruLite::Vector.new( [nil, '343434', 1, 1, 1, 2])
3385
- b2 = DaruLite::Vector.new( [2, 2, 2, nil, 2, 3])
3386
- c = DaruLite::Vector.new([nil, 2, 'This is a nice example', 2, 2, 2])
3387
- ds = DaruLite::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3388
-
3389
- expect(ds.vector_count_characters).to eq(DaruLite::Vector.new([4, 17, 27, 5, 6, 5]))
987
+ context DaruLite::MultiIndex do
988
+ it "transposes a DataFrame including row and column indexing" do
989
+ expect(df_mi.transpose).to eq(DaruLite::DataFrame.new([
990
+ vector_arry1,
991
+ vector_arry2,
992
+ vector_arry1,
993
+ vector_arry2].transpose, index: order_mi, order: multi_index))
994
+ end
3390
995
  end
3391
996
  end
3392
997
 
3393
- context '#include_values?' do
3394
- let(:df) do
3395
- DaruLite::DataFrame.new({
3396
- a: [1, 2, 3, 4, Float::NAN, 6, 1],
3397
- b: [:a, :b, nil, Float::NAN, nil, 3, 5],
3398
- c: ['a', 6, 3, 4, 3, 5, 3],
3399
- d: [1, 2, 3, 5, 1, 2, 5]
3400
- })
3401
- end
3402
- before { df.to_category :b }
3403
-
3404
- context 'true' do
3405
- it { expect(df.include_values? nil).to eq true }
3406
- it { expect(df.include_values? Float::NAN).to eq true }
3407
- it { expect(df.include_values? nil, Float::NAN).to eq true }
3408
- it { expect(df.include_values? 1, 30).to eq true }
3409
- end
3410
-
3411
- context 'false' do
3412
- it { expect(df[:a, :c].include_values? nil).to eq false }
3413
- it { expect(df[:c, :d].include_values? Float::NAN).to eq false }
3414
- it { expect(df[:c, :d].include_values? nil, Float::NAN).to eq false }
3415
- it { expect(df.include_values? 10, 20).to eq false }
998
+ context "#shape" do
999
+ it "returns an array containing number of rows and columns" do
1000
+ expect(df.shape).to eq([5,3])
3416
1001
  end
3417
1002
  end
3418
1003
 
3419
- context "#vector_mean" do
3420
- before do
3421
- a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil]
3422
- a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30]
3423
- b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2]
3424
- b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
3425
- c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
3426
- @df = DaruLite::DataFrame.new({
3427
- :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3428
- end
3429
-
3430
- it "calculates complete vector mean" do
3431
- expect(@df.vector_mean).to eq(
3432
- DaruLite::Vector.new [nil, 3.4, 6, nil, 6.0, nil])
1004
+ context "#nest" do
1005
+ it "nests in a hash" do
1006
+ df = DaruLite::DataFrame.new({
1007
+ :a => DaruLite::Vector.new(%w(a a a b b b)),
1008
+ :b => DaruLite::Vector.new(%w(c c d d e e)),
1009
+ :c => DaruLite::Vector.new(%w(f g h i j k))
1010
+ })
1011
+ nest = df.nest :a, :b
1012
+ expect(nest['a']['c']).to eq([{ :c => 'f' }, { :c => 'g' }])
1013
+ expect(nest['a']['d']).to eq([{ :c => 'h' }])
1014
+ expect(nest['b']['e']).to eq([{ :c => 'j' }, { :c => 'k' }])
3433
1015
  end
3434
1016
  end
3435
1017
 
@@ -3473,64 +1055,6 @@ describe DaruLite::DataFrame do
3473
1055
  end
3474
1056
  end
3475
1057
 
3476
- context "#verify" do
3477
- def create_test(*args, &proc)
3478
- description = args.shift
3479
- fields = args
3480
- [description, fields, proc]
3481
- end
3482
-
3483
- before do
3484
- name = DaruLite::Vector.new %w(r1 r2 r3 r4)
3485
- v1 = DaruLite::Vector.new [1, 2, 3, 4]
3486
- v2 = DaruLite::Vector.new [4, 3, 2, 1]
3487
- v3 = DaruLite::Vector.new [10, 20, 30, 40]
3488
- v4 = DaruLite::Vector.new %w(a b a b)
3489
- @df = DaruLite::DataFrame.new({
3490
- :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :id => name
3491
- }, order: [:v1, :v2, :v3, :v4, :id])
3492
- end
3493
-
3494
- it "correctly verifies data as per the block" do
3495
- # Correct
3496
- t1 = create_test('If v4=a, v1 odd') do |r|
3497
- r[:v4] == 'b' or (r[:v4] == 'a' and r[:v1].odd?)
3498
- end
3499
- t2 = create_test('v3=v1*10') { |r| r[:v3] == r[:v1] * 10 }
3500
- # Fail!
3501
- t3 = create_test("v4='b'") { |r| r[:v4] == 'b' }
3502
- exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
3503
- exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
3504
-
3505
- dataf = @df.verify(t3, t1, t2)
3506
- expect(dataf).to eq(exp1)
3507
- end
3508
-
3509
- it "uses additional fields to extend error messages" do
3510
- t = create_test("v4='b'", :v2, :v3) { |r| r[:v4] == 'b' }
3511
-
3512
- dataf = @df.verify(:id, t)
3513
- expect(dataf).to eq(["1 [r1]: v4='b' (v2=4, v3=10)", "3 [r3]: v4='b' (v2=2, v3=30)"])
3514
- end
3515
- end
3516
-
3517
- context "#compute" do
3518
- it "performs a computation when supplied in a string" do
3519
- v1 = DaruLite::Vector.new [1, 2, 3, 4]
3520
- v2 = DaruLite::Vector.new [4, 3, 2, 1]
3521
- v3 = DaruLite::Vector.new [10, 20, 30, 40]
3522
- vnumeric = DaruLite::Vector.new [0, 0, 1, 4]
3523
- vsum = DaruLite::Vector.new [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0]
3524
- vmult = DaruLite::Vector.new [1 * 4, 2 * 3, 3 * 2, 4 * 1]
3525
-
3526
- df = DaruLite::DataFrame.new({:v1 => v1, :v2 => v2, :v3 => v3})
3527
-
3528
- expect(df.compute("v1/v2")).to eq(vnumeric)
3529
- expect(df.compute("v1+v2+v3")).to eq(vsum)
3530
- expect(df.compute("v1*v2")).to eq(vmult)
3531
- end
3532
- end
3533
-
3534
1058
  context ".crosstab_by_assignation" do
3535
1059
  it "" do
3536
1060
  v1 = DaruLite::Vector.new %w(a a a b b b c c c)
@@ -3554,394 +1078,6 @@ describe DaruLite::DataFrame do
3554
1078
  end
3555
1079
  end
3556
1080
 
3557
- context "#one_to_many" do
3558
- it "" do
3559
- rows = [
3560
- ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
3561
- ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
3562
- ['3', 'alfred', nil, nil, nil, nil, nil, nil]
3563
- ]
3564
-
3565
- df = DaruLite::DataFrame.rows(rows,
3566
- order: ['id', 'name', 'car_color1', 'car_value1', 'car_color2',
3567
- 'car_value2', 'car_color3', 'car_value3'])
3568
-
3569
- ids = DaruLite::Vector.new %w(1 1 2 2 2)
3570
- colors = DaruLite::Vector.new %w(red blue green orange white)
3571
- values = DaruLite::Vector.new [10, 20, 15, 30, 20]
3572
- col_ids = DaruLite::Vector.new [1, 2, 1, 2, 3]
3573
-
3574
- df_expected = DaruLite::DataFrame.new({
3575
- 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values
3576
- }, order: ['id', '_col_id', 'color', 'value'])
3577
-
3578
- expect(df.one_to_many(['id'], 'car_%v%n')).to eq(df_expected)
3579
- end
3580
- end
3581
-
3582
- context "#any?" do
3583
- before do
3584
- @df = DaruLite::DataFrame.new({
3585
- a: [1,2,3,4,5],
3586
- b: [10,20,30,40,50],
3587
- c: [11,22,33,44,55]})
3588
- end
3589
-
3590
- it "returns true if any one of the vectors satisfy condition" do
3591
- expect(@df.any? { |v| v[0] == 1 }).to eq(true)
3592
- end
3593
-
3594
- it "returns false if none of the vectors satisfy the condition" do
3595
- expect(@df.any? { |v| v.mean > 100 }).to eq(false)
3596
- end
3597
-
3598
- it "returns true if any one of the rows satisfy condition" do
3599
- expect(@df.any?(:row) { |r| r[:a] == 1 and r[:c] == 11 }).to eq(true)
3600
- end
3601
-
3602
- it "returns false if none of the rows satisfy the condition" do
3603
- expect(@df.any?(:row) { |r| r.mean > 100 }).to eq(false)
3604
- end
3605
-
3606
- it 'fails on unknown axis' do
3607
- expect { @df.any?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3608
- end
3609
- end
3610
-
3611
- context "#all?" do
3612
- before do
3613
- @df = DaruLite::DataFrame.new({
3614
- a: [1,2,3,4,5],
3615
- b: [10,20,30,40,50],
3616
- c: [11,22,33,44,55]})
3617
- end
3618
-
3619
- it "returns true if all of the vectors satisfy condition" do
3620
- expect(@df.all? { |v| v.mean < 40 }).to eq(true)
3621
- end
3622
-
3623
- it "returns false if any one of the vectors does not satisfy condition" do
3624
- expect(@df.all? { |v| v.mean == 30 }).to eq(false)
3625
- end
3626
-
3627
- it "returns true if all of the rows satisfy condition" do
3628
- expect(@df.all?(:row) { |r| r.mean < 70 }).to eq(true)
3629
- end
3630
-
3631
- it "returns false if any one of the rows does not satisfy condition" do
3632
- expect(@df.all?(:row) { |r| r.mean == 30 }).to eq(false)
3633
- end
3634
-
3635
- it 'fails on unknown axis' do
3636
- expect { @df.all?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3637
- end
3638
- end
3639
-
3640
- context "#only_numerics" do
3641
- before do
3642
- @v1 = DaruLite::Vector.new([1,2,3,4,5])
3643
- @v2 = DaruLite::Vector.new(%w(one two three four five))
3644
- @v3 = DaruLite::Vector.new([11,22,33,44,55])
3645
- @df = DaruLite::DataFrame.new({
3646
- a: @v1, b: @v2, c: @v3 }, clone: false)
3647
- end
3648
-
3649
- it "returns a view of only the numeric vectors" do
3650
- dfon = @df.only_numerics(clone: false)
3651
-
3652
- expect(dfon).to eq(
3653
- DaruLite::DataFrame.new({ a: @v1, c: @v3 }, clone: false))
3654
- expect(dfon[:a].object_id).to eq(@v1.object_id)
3655
- end
3656
-
3657
- it "returns a clone of numeric vectors" do
3658
- dfon = @df.only_numerics
3659
-
3660
- expect(dfon).to eq(
3661
- DaruLite::DataFrame.new({ a: @v1, c: @v3}, clone: false)
3662
- )
3663
- expect(dfon[:a].object_id).to_not eq(@v1.object_id)
3664
- end
3665
-
3666
- context DaruLite::MultiIndex do
3667
- before do
3668
- agg_vectors = DaruLite::MultiIndex.from_tuples(
3669
- [
3670
- [:d, :one, :large],
3671
- [:d, :one, :small],
3672
- [:d, :two, :large],
3673
- [:d, :two, :small],
3674
- [:e, :one, :large],
3675
- [:e, :one, :small],
3676
- [:e, :two, :large],
3677
- [:e, :two, :small]
3678
- ]
3679
- )
3680
-
3681
- agg_index = DaruLite::MultiIndex.from_tuples(
3682
- [
3683
- [:bar],
3684
- [:foo]
3685
- ]
3686
- )
3687
- @df = DaruLite::DataFrame.new(
3688
- [
3689
- [4.112,2.234],
3690
- %w(a b),
3691
- [6.342,nil],
3692
- [7.2344,3.23214],
3693
- [8.234,4.533],
3694
- [10.342,2.3432],
3695
- [12.0,nil],
3696
- %w(a b)
3697
- ], order: agg_vectors, index: agg_index
3698
- )
3699
- end
3700
-
3701
- it "returns numeric vectors" do
3702
- vectors = DaruLite::MultiIndex.from_tuples(
3703
- [
3704
- [:d, :one, :large],
3705
- [:d, :two, :large],
3706
- [:d, :two, :small],
3707
- [:e, :one, :large],
3708
- [:e, :one, :small],
3709
- [:e, :two, :large]
3710
- ]
3711
- )
3712
-
3713
- index = DaruLite::MultiIndex.from_tuples(
3714
- [
3715
- [:bar],
3716
- [:foo]
3717
- ]
3718
- )
3719
- answer = DaruLite::DataFrame.new(
3720
- [
3721
- [4.112,2.234],
3722
- [6.342,nil],
3723
- [7.2344,3.23214],
3724
- [8.234,4.533],
3725
- [10.342,2.3432],
3726
- [12.0,nil],
3727
- ], order: vectors, index: index
3728
- )
3729
-
3730
- expect(@df.only_numerics).to eq(answer)
3731
- end
3732
- end
3733
- end
3734
-
3735
- context '#reset_index' do
3736
- context 'when Index' do
3737
- subject do
3738
- DaruLite::DataFrame.new(
3739
- {'vals' => [1,2,3,4,5]},
3740
- index: DaruLite::Index.new(%w[a b c d e], name: 'indices')
3741
- ).reset_index
3742
- end
3743
-
3744
- it { is_expected.to eq DaruLite::DataFrame.new(
3745
- 'indices' => %w[a b c d e],
3746
- 'vals' => [1,2,3,4,5]
3747
- )}
3748
- end
3749
-
3750
- context 'when MultiIndex' do
3751
- subject do
3752
- mi = DaruLite::MultiIndex.from_tuples([
3753
- [0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
3754
- ])
3755
- mi.name = %w[nums alphas]
3756
- DaruLite::DataFrame.new(
3757
- {'vals' => [1,2,3,4]},
3758
- index: mi
3759
- ).reset_index
3760
- end
3761
-
3762
- it { is_expected.to eq DaruLite::DataFrame.new(
3763
- 'nums' => [0,0,1,1],
3764
- 'alphas' => %w[a b a b],
3765
- 'vals' => [1,2,3,4]
3766
- )}
3767
- end
3768
- end
3769
-
3770
- context "#set_index" do
3771
- before(:each) do
3772
- @df = DaruLite::DataFrame.new({
3773
- a: [1,2,3,4,5],
3774
- b: ['a','b','c','d','e'],
3775
- c: [11,22,33,44,55]
3776
- })
3777
- end
3778
-
3779
- it "sets a particular column as the index and deletes that column" do
3780
- @df.set_index(:b)
3781
- expect(@df).to eq(
3782
- DaruLite::DataFrame.new({
3783
- a: [1,2,3,4,5],
3784
- c: [11,22,33,44,55]
3785
- }, index: ['a','b','c','d','e'])
3786
- )
3787
- end
3788
-
3789
- it "sets a particular column as index but keeps that column" do
3790
- expect(@df.set_index(:c, keep: true)).to eq(
3791
- DaruLite::DataFrame.new({
3792
- a: [1,2,3,4,5],
3793
- b: ['a','b','c','d','e'],
3794
- c: [11,22,33,44,55]
3795
- }, index: [11,22,33,44,55]))
3796
- expect(@df[:c]).to eq(@df[:c])
3797
- end
3798
-
3799
- it "sets categorical index if categorical is true" do
3800
- data = {
3801
- a: [1, 2, 3, 4, 5],
3802
- b: [:a, 1, :a, 1, 'c'],
3803
- c: %w[a b c d e]
3804
- }
3805
- df = DaruLite::DataFrame.new(data)
3806
- df.set_index(:b, categorical: true)
3807
- expected = DaruLite::DataFrame.new(
3808
- data.slice(:a, :c),
3809
- index: DaruLite::CategoricalIndex.new(data[:b])
3810
- )
3811
- expect(df).to eq(expected)
3812
- end
3813
-
3814
- it "raises error if all elements in the column aren't unique" do
3815
- jholu = DaruLite::DataFrame.new({
3816
- a: ['a','b','a'],
3817
- b: [1,2,4]
3818
- })
3819
-
3820
- expect {
3821
- jholu.set_index(:a)
3822
- }.to raise_error(ArgumentError)
3823
- end
3824
-
3825
- it "sets multiindex if array is given" do
3826
- df = DaruLite::DataFrame.new({
3827
- a: %w[a a b b],
3828
- b: [1, 2, 1, 2],
3829
- c: %w[a b c d]
3830
- })
3831
- df.set_index(%i[a b])
3832
- expected =
3833
- DaruLite::DataFrame.new(
3834
- { c: %w[a b c d] },
3835
- index: DaruLite::MultiIndex.from_tuples(
3836
- [['a', 1], ['a', 2], ['b', 1], ['b', 2]]
3837
- )
3838
- ).tap do |df|
3839
- df.index.name = %i[a b]
3840
- df
3841
- end
3842
- expect(df).to eq(expected)
3843
- end
3844
- end
3845
-
3846
- context "#concat" do
3847
- before do
3848
- @df1 = DaruLite::DataFrame.new({
3849
- a: [1, 2, 3],
3850
- b: [1, 2, 3]
3851
- })
3852
-
3853
- @df2 = DaruLite::DataFrame.new({
3854
- a: [4, 5, 6],
3855
- c: [4, 5, 6]
3856
- })
3857
- end
3858
-
3859
- it 'does not modify the original dataframes' do
3860
- df1_a = @df1[:a].to_a.dup
3861
- df2_a = @df2[:a].to_a.dup
3862
-
3863
- df_concat = @df1.concat @df2
3864
- expect(@df1[:a].to_a).to eq df1_a
3865
- expect(@df2[:a].to_a).to eq df2_a
3866
- end
3867
-
3868
- it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
3869
- df1_a = @df1[:a].to_a.dup
3870
- df2_a = @df2[:a].to_a.dup
3871
-
3872
- df_concat = @df1.concat @df2
3873
- expect(df_concat[:a].to_a).to eq df1_a + df2_a
3874
- end
3875
-
3876
- it 'fills in missing vectors with nils' do
3877
- df1_b = @df1[:b].to_a.dup
3878
- df2_c = @df2[:c].to_a.dup
3879
-
3880
- df_concat = @df1.concat @df2
3881
- expect(df_concat[:b].to_a).to eq df1_b + [nil] * @df2.size
3882
- expect(df_concat[:c].to_a).to eq [nil] * @df1.size + df2_c
3883
- end
3884
-
3885
- end
3886
-
3887
- context "#union" do
3888
- before do
3889
- @df1 = DaruLite::DataFrame.new({
3890
- a: [1, 2, 3],
3891
- b: [1, 2, 3]},
3892
- index: [1,3,5] )
3893
-
3894
- @df2 = DaruLite::DataFrame.new({
3895
- a: [4, 5, 6],
3896
- c: [4, 5, 6]},
3897
- index: [7,9,11])
3898
-
3899
- @df3 = DaruLite::DataFrame.new({
3900
- a: [4, 5, 6],
3901
- c: [4, 5, 6]},
3902
- index: [5,7,9])
3903
- end
3904
-
3905
- it 'does not modify the original dataframes' do
3906
- df1_a = @df1[:a].to_a.dup
3907
- df2_a = @df2[:a].to_a.dup
3908
-
3909
- _ = @df1.union @df2
3910
- expect(@df1[:a].to_a).to eq df1_a
3911
- expect(@df2[:a].to_a).to eq df2_a
3912
- end
3913
-
3914
- it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
3915
- df1_a = @df1[:a].to_a.dup
3916
- df2_a = @df2[:a].to_a.dup
3917
-
3918
- df_union = @df1.union @df2
3919
- expect(df_union[:a].to_a).to eq df1_a + df2_a
3920
- end
3921
-
3922
- it 'fills in missing vectors with nils' do
3923
- df1_b = @df1[:b].to_a.dup
3924
- df2_c = @df2[:c].to_a.dup
3925
-
3926
- df_union = @df1.union @df2
3927
- expect(df_union[:b].to_a).to eq df1_b + [nil] * @df2.size
3928
- expect(df_union[:c].to_a).to eq [nil] * @df1.size + df2_c
3929
- end
3930
-
3931
- it 'overwrites part of the first dataframe if there are double indices' do
3932
- vec = DaruLite::Vector.new({a: 4, b: nil, c: 4})
3933
- expect(@df1.union(@df3).row[5]).to eq vec
3934
- end
3935
-
3936
- it 'concats the indices' do
3937
- v1 = @df1.index.to_a
3938
- v2 = @df2.index.to_a
3939
-
3940
- df_union = @df1.union @df2
3941
- expect(df_union.index.to_a).to eq v1 + v2
3942
- end
3943
- end
3944
-
3945
1081
  context '#inspect' do
3946
1082
  subject { df.inspect }
3947
1083
 
@@ -4021,6 +1157,18 @@ describe DaruLite::DataFrame do
4021
1157
  }.unindent}
4022
1158
  end
4023
1159
 
1160
+ context 'with integers as vectors names' do
1161
+ let(:df) { DaruLite::DataFrame.new({ 1 => [1,2,3], b: [3,4,5], c: [6,7,8] }, name: 'test')}
1162
+
1163
+ it { is_expected.to eq %Q{
1164
+ |#<DaruLite::DataFrame: test (3x3)>
1165
+ | 1 b c
1166
+ | 0 1 3 6
1167
+ | 1 2 4 7
1168
+ | 2 3 5 8
1169
+ }.unindent}
1170
+ end
1171
+
4024
1172
  context 'very long' do
4025
1173
  let(:df) { DaruLite::DataFrame.new({a: [1,1,1]*20, b: [1,1,1]*20, c: [1,1,1]*20}, name: 'test')}
4026
1174
  it { is_expected.to eq %Q{
@@ -4110,189 +1258,6 @@ describe DaruLite::DataFrame do
4110
1258
  end
4111
1259
  end
4112
1260
 
4113
- context '#to_s' do
4114
- it 'produces a class, size description' do
4115
- expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame(5x3)>"
4116
- end
4117
-
4118
- it 'produces a class, name, size description' do
4119
- @data_frame.name = "Test"
4120
- expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame: Test(5x3)>"
4121
- end
4122
-
4123
- it 'produces a class, name, size description when the name is a symbol' do
4124
- @data_frame.name = :Test
4125
- expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame: Test(5x3)>"
4126
- end
4127
- end
4128
-
4129
- context '#to_json' do
4130
- let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, index: [:one, :two, :three], name: 'test')}
4131
- subject { JSON.parse(json) }
4132
-
4133
- context 'with index' do
4134
- let(:json) { df.to_json(false) }
4135
- # FIXME: is it most reasonable we can do?.. -- zverok
4136
- # For me, more resonable thing would be something like
4137
- #
4138
- # [
4139
- # {"index" => "one" , "a"=>1, "b"=>3, "c"=>6},
4140
- # {"index" => "two" , "a"=>2, "b"=>4, "c"=>7},
4141
- # {"index" => "three", "a"=>3, "b"=>5, "c"=>8}
4142
- # ]
4143
- #
4144
- # Or maybe
4145
- #
4146
- # [
4147
- # ["one" , {"a"=>1, "b"=>3, "c"=>6}],
4148
- # ["two" , {"a"=>2, "b"=>4, "c"=>7}],
4149
- # ["three", {"a"=>3, "b"=>5, "c"=>8}]
4150
- # ]
4151
- #
4152
- # Or even
4153
- #
4154
- # {
4155
- # "one" => {"a"=>1, "b"=>3, "c"=>6},
4156
- # "two" => {"a"=>2, "b"=>4, "c"=>7},
4157
- # "three" => {"a"=>3, "b"=>5, "c"=>8}
4158
- # }
4159
- #
4160
- it { is_expected.to eq(
4161
- [
4162
- [
4163
- {"a"=>1, "b"=>3, "c"=>6},
4164
- {"a"=>2, "b"=>4, "c"=>7},
4165
- {"a"=>3, "b"=>5, "c"=>8}
4166
- ],
4167
- ["one", "two", "three"]
4168
- ]
4169
- )}
4170
- end
4171
-
4172
- context 'without index' do
4173
- let(:json) { df.to_json(true) }
4174
- it { is_expected.to eq(
4175
- [
4176
- {"a"=>1, "b"=>3, "c"=>6},
4177
- {"a"=>2, "b"=>4, "c"=>7},
4178
- {"a"=>3, "b"=>5, "c"=>8}
4179
- ]
4180
- )}
4181
- end
4182
- end
4183
-
4184
- context '#access_row_tuples_by_indexs' do
4185
- let(:df) {
4186
- DaruLite::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
4187
- let(:df_idx) {
4188
- DaruLite::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
4189
- }
4190
- let (:mi_idx) do
4191
- DaruLite::MultiIndex.from_tuples [
4192
- [:a,:one,:bar],
4193
- [:a,:one,:baz],
4194
- [:b,:two,:bar],
4195
- [:a,:two,:baz],
4196
- ]
4197
- end
4198
- let (:df_mi) do
4199
- DaruLite::DataFrame.new({
4200
- a: 1..4,
4201
- b: 'a'..'d'
4202
- }, index: mi_idx )
4203
- end
4204
- context 'when no index is given' do
4205
- it 'returns empty Array' do
4206
- expect(df.access_row_tuples_by_indexs()).to eq([])
4207
- end
4208
- end
4209
- context 'when index(s) are given' do
4210
- it 'returns Array of row tuples' do
4211
- expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
4212
- expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
4213
- end
4214
- end
4215
- context 'when custom index(s) are given' do
4216
- it 'returns Array of row tuples' do
4217
- expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
4218
- [[52, 1], [7, 3]]
4219
- )
4220
- end
4221
- end
4222
- context 'when multi index is given' do
4223
- it 'returns Array of row tuples' do
4224
- expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
4225
- [[1, "a"], [2, "b"], [4, "d"]]
4226
- )
4227
- expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
4228
- [[2, "b"]]
4229
- )
4230
- end
4231
- end
4232
- end
4233
-
4234
- context '#aggregate' do
4235
- let(:cat_idx) { DaruLite::CategoricalIndex.new [:a, :b, :a, :a, :c] }
4236
- let(:df) { DaruLite::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
4237
- let(:df_cat_idx) {
4238
- DaruLite::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx) }
4239
-
4240
- it 'lambda function on particular column' do
4241
- expect(df.aggregate(num_100_times: ->(df) { (df.num*100).first })).to eq(
4242
- DaruLite::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
4243
- )
4244
- end
4245
- it 'aggregate sum on particular column' do
4246
- expect(df_cat_idx.aggregate(num: :sum)).to eq(
4247
- DaruLite::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
4248
- )
4249
- end
4250
- end
4251
-
4252
- context '#group_by_and_aggregate' do
4253
- let(:spending_df) {
4254
- DaruLite::DataFrame.rows([
4255
- [2010, 'dev', 50, 1],
4256
- [2010, 'dev', 150, 1],
4257
- [2010, 'dev', 200, 1],
4258
- [2011, 'dev', 50, 1],
4259
- [2012, 'dev', 150, 1],
4260
-
4261
- [2011, 'office', 300, 1],
4262
-
4263
- [2010, 'market', 50, 1],
4264
- [2011, 'market', 500, 1],
4265
- [2012, 'market', 500, 1],
4266
- [2012, 'market', 300, 1],
4267
-
4268
- [2012, 'R&D', 10, 1],],
4269
- order: [:year, :category, :spending, :nb_spending])
4270
- }
4271
-
4272
- it 'works as group_by + aggregate' do
4273
- expect(spending_df.group_by_and_aggregate(:year, spending: :sum)).to eq(
4274
- spending_df.group_by(:year).aggregate(spending: :sum))
4275
- expect(spending_df.group_by_and_aggregate([:year, :category], spending: :sum, nb_spending: :size)).to eq(
4276
- spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :size))
4277
- end
4278
- end
4279
-
4280
- context '#create_sql' do
4281
- let(:df) { DaruLite::DataFrame.new({
4282
- a: [1,2,3],
4283
- b: ['test', 'me', 'please'],
4284
- c: ['2015-06-01', '2015-06-02', '2015-06-03']
4285
- },
4286
- name: 'test'
4287
- )}
4288
- subject { df.create_sql('foo') }
4289
- it { is_expected.to eq %Q{
4290
- |CREATE TABLE foo (a INTEGER,
4291
- | b VARCHAR (255),
4292
- | c DATE) CHARACTER SET=UTF8;
4293
- }.unindent}
4294
- end
4295
-
4296
1261
  context "#by_single_key" do
4297
1262
  let(:df) { DaruLite::DataFrame.new(a: [1, 2, 3], b: [4, 5, 6] ) }
4298
1263
 
@@ -4300,31 +1265,4 @@ describe DaruLite::DataFrame do
4300
1265
  expect { df[:c] }.to raise_error(IndexError, /Specified vector c does not exist/)
4301
1266
  end
4302
1267
  end
4303
-
4304
- context "#rotate_vectors" do
4305
- subject { df.rotate_vectors(-1) }
4306
-
4307
- context "several vectors in the dataframe" do
4308
- let(:df) do
4309
- DaruLite::DataFrame.new({
4310
- a: [1,2,3],
4311
- b: [4,5,6],
4312
- total: [5,7,9]
4313
- })
4314
- end
4315
- let(:new_order) { [:total, :a, :b] }
4316
-
4317
- it "return the dataframe with the position of the last vector change to first" do
4318
- expect(subject.vectors.to_a).to eq(new_order)
4319
- end
4320
- end
4321
-
4322
- context "only one vector in the dataframe" do
4323
- let(:df) { DaruLite::DataFrame.new({ a: [1,2,3] }) }
4324
-
4325
- it "return the dataframe without any change" do
4326
- expect(subject).to eq(df)
4327
- end
4328
- end
4329
- end
4330
1268
  end if mri?