daru 0.1.5 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. checksums.yaml +5 -5
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +21 -7
  5. data/.travis.yml +10 -5
  6. data/CONTRIBUTING.md +15 -10
  7. data/History.md +124 -2
  8. data/README.md +37 -9
  9. data/ReleasePolicy.md +20 -0
  10. data/benchmarks/db_loading.rb +34 -0
  11. data/benchmarks/statistics.rb +6 -6
  12. data/benchmarks/where_clause.rb +1 -1
  13. data/benchmarks/where_vs_filter.rb +1 -1
  14. data/daru.gemspec +17 -41
  15. data/lib/daru.rb +10 -13
  16. data/lib/daru/accessors/gsl_wrapper.rb +1 -1
  17. data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
  18. data/lib/daru/category.rb +29 -15
  19. data/lib/daru/configuration.rb +34 -0
  20. data/lib/daru/core/group_by.rb +158 -77
  21. data/lib/daru/core/merge.rb +12 -3
  22. data/lib/daru/core/query.rb +20 -4
  23. data/lib/daru/dataframe.rb +692 -118
  24. data/lib/daru/date_time/index.rb +14 -11
  25. data/lib/daru/date_time/offsets.rb +9 -1
  26. data/lib/daru/extensions/which_dsl.rb +55 -0
  27. data/lib/daru/formatters/table.rb +3 -5
  28. data/lib/daru/index/categorical_index.rb +4 -4
  29. data/lib/daru/index/index.rb +131 -42
  30. data/lib/daru/index/multi_index.rb +118 -10
  31. data/lib/daru/io/csv/converters.rb +21 -0
  32. data/lib/daru/io/io.rb +105 -33
  33. data/lib/daru/io/sql_data_source.rb +10 -0
  34. data/lib/daru/iruby/templates/dataframe.html.erb +4 -51
  35. data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
  36. data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  37. data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  38. data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
  39. data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
  40. data/lib/daru/iruby/templates/vector.html.erb +3 -25
  41. data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
  42. data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
  43. data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
  44. data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
  45. data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
  46. data/lib/daru/maths/arithmetic/vector.rb +38 -2
  47. data/lib/daru/maths/statistics/dataframe.rb +28 -30
  48. data/lib/daru/maths/statistics/vector.rb +295 -41
  49. data/lib/daru/plotting/gruff/dataframe.rb +13 -15
  50. data/lib/daru/plotting/nyaplot/category.rb +1 -1
  51. data/lib/daru/plotting/nyaplot/dataframe.rb +15 -4
  52. data/lib/daru/plotting/nyaplot/vector.rb +1 -2
  53. data/lib/daru/vector.rb +308 -96
  54. data/lib/daru/version.rb +1 -1
  55. data/profile/vector_new.rb +9 -0
  56. data/spec/accessors/gsl_wrapper_spec.rb +38 -35
  57. data/spec/accessors/nmatrix_wrapper_spec.rb +25 -22
  58. data/spec/category_spec.rb +24 -20
  59. data/spec/core/group_by_spec.rb +238 -4
  60. data/spec/core/merge_spec.rb +1 -1
  61. data/spec/core/query_spec.rb +65 -50
  62. data/spec/daru_spec.rb +22 -0
  63. data/spec/dataframe_spec.rb +473 -16
  64. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  65. data/spec/date_time/index_spec.rb +34 -16
  66. data/spec/date_time/offsets_spec.rb +14 -0
  67. data/spec/extensions/rserve_spec.rb +1 -1
  68. data/spec/extensions/which_dsl_spec.rb +38 -0
  69. data/spec/fixtures/boolean_converter_test.csv +5 -0
  70. data/spec/fixtures/duplicates.csv +32 -0
  71. data/spec/fixtures/eciresults.html +394 -0
  72. data/spec/fixtures/empty_rows_test.csv +17 -0
  73. data/spec/fixtures/macau.html +3691 -0
  74. data/spec/fixtures/macd_data.csv +150 -0
  75. data/spec/fixtures/matrix_test.csv +55 -55
  76. data/spec/fixtures/moneycontrol.html +6812 -0
  77. data/spec/fixtures/string_converter_test.csv +5 -0
  78. data/spec/fixtures/test_xls.xls +0 -0
  79. data/spec/fixtures/test_xls_2.xls +0 -0
  80. data/spec/fixtures/url_test.txt~ +0 -0
  81. data/spec/fixtures/valid_markup.html +62 -0
  82. data/spec/fixtures/wiki_climate.html +1243 -0
  83. data/spec/fixtures/wiki_table_info.html +631 -0
  84. data/spec/formatters/table_formatter_spec.rb +29 -0
  85. data/spec/index/categorical_index_spec.rb +33 -33
  86. data/spec/index/index_spec.rb +160 -41
  87. data/spec/index/multi_index_spec.rb +143 -33
  88. data/spec/io/io_spec.rb +246 -2
  89. data/spec/io/sql_data_source_spec.rb +31 -41
  90. data/spec/iruby/dataframe_spec.rb +17 -19
  91. data/spec/iruby/vector_spec.rb +26 -28
  92. data/spec/maths/arithmetic/dataframe_spec.rb +1 -1
  93. data/spec/maths/arithmetic/vector_spec.rb +18 -0
  94. data/spec/maths/statistics/vector_spec.rb +153 -15
  95. data/spec/plotting/gruff/category_spec.rb +3 -3
  96. data/spec/plotting/gruff/dataframe_spec.rb +14 -4
  97. data/spec/plotting/gruff/vector_spec.rb +9 -9
  98. data/spec/plotting/nyaplot/category_spec.rb +5 -9
  99. data/spec/plotting/nyaplot/dataframe_spec.rb +95 -47
  100. data/spec/plotting/nyaplot/vector_spec.rb +5 -11
  101. data/spec/shared/vector_display_spec.rb +12 -14
  102. data/spec/spec_helper.rb +30 -7
  103. data/spec/support/matchers.rb +5 -0
  104. data/spec/vector_spec.rb +306 -72
  105. metadata +96 -55
  106. data/spec/fixtures/stock_data.csv +0 -500
@@ -35,6 +35,54 @@ describe Daru::MultiIndex do
35
35
  labels: [[0,0,1,1,2,2]])
36
36
  }.to raise_error
37
37
  end
38
+
39
+ context "create an MultiIndex with name" do
40
+ context 'if no name is set' do
41
+ subject { Daru::MultiIndex.new(
42
+ levels: [[:a,:b,:c], [:one, :two]],
43
+ labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]]) }
44
+ its(:name) { is_expected.to be_nil }
45
+ end
46
+
47
+ context 'correctly return the MultiIndex name' do
48
+ subject { Daru::MultiIndex.new(
49
+ levels: [[:a,:b,:c], [:one, :two]],
50
+ labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]], name: ['n1', 'n2']) }
51
+ its(:name) { is_expected.to eq ['n1', 'n2'] }
52
+ end
53
+
54
+ context "set new MultiIndex name" do
55
+ subject {
56
+ Daru::MultiIndex.new(
57
+ levels: [[:a,:b,:c], [:one, :two]],
58
+ labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]], name: ['n1', 'n2']) }
59
+ before(:each) { subject.name = ['k1', 'k2'] }
60
+ its(:name) { is_expected.to eq ['k1', 'k2'] }
61
+ end
62
+
63
+ context "set new MultiIndex name having empty string" do
64
+ subject {
65
+ Daru::MultiIndex.new(
66
+ levels: [[:a,:b,:c], [:one, :two]],
67
+ labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]], name: ['n1', 'n2']) }
68
+ before { subject.name = ['k1', ''] }
69
+ its(:name) { is_expected.to eq ['k1', ''] }
70
+ end
71
+
72
+ it "raises SizeError for wrong number of name" do
73
+ error_msg = "\'names\' and \'levels\' should be of same size. Size of the \'name\' array is 2 and size of the MultiIndex \'levels\' and \'labels\' is 3.\nIf you don\'t want to set name for particular level (say level \'i\') then put empty string on index \'i\' of the \'name\' Array."
74
+ expect { @multi_mi.name = ['n1', 'n2'] }.to raise_error(SizeError, error_msg)
75
+
76
+ error_msg = "'names' and 'levels' should be of same size. Size of the 'name' array is 0 and size of the MultiIndex 'levels' and 'labels' is 3.\nIf you don\'t want to set name for particular level (say level 'i') then put empty string on index 'i' of the 'name' Array."
77
+ expect { @multi_mi.name = [ ] }.to raise_error(SizeError, error_msg)
78
+
79
+ error_msg = "'names' and 'levels' should be of same size. Size of the 'name' array is 1 and size of the MultiIndex 'levels' and 'labels' is 3.\nIf you don\'t want to set name for particular level (say level 'i') then put empty string on index 'i' of the 'name' Array."
80
+ expect { @multi_mi.name = [''] }.to raise_error(SizeError, error_msg)
81
+
82
+ error_msg = "'names' and 'levels' should be of same size. Size of the 'name' array is 4 and size of the MultiIndex 'levels' and 'labels' is 3."
83
+ expect { @multi_mi.name = ['n1', 'n2', 'n3', 'n4'] }.to raise_error(SizeError, error_msg)
84
+ end
85
+ end
38
86
  end
39
87
 
40
88
  context ".from_tuples" do
@@ -154,8 +202,16 @@ describe Daru::MultiIndex do
154
202
  expect(@multi_mi.include?([:a, :one])).to eq(true)
155
203
  end
156
204
 
157
- it "checks for non-existence of a tuple" do
158
- expect(@multi_mi.include?([:boo])).to eq(false)
205
+ it "checks for non-existence of completely specified tuple" do
206
+ expect(@multi_mi.include?([:b, :two, :foo])).to eq(false)
207
+ end
208
+
209
+ it "checks for non-existence of a top layer incomplete tuple" do
210
+ expect(@multi_mi.include?([:d])).to eq(false)
211
+ end
212
+
213
+ it "checks for non-existence of a middle layer incomplete tuple" do
214
+ expect(@multi_mi.include?([:c, :three])).to eq(false)
159
215
  end
160
216
  end
161
217
 
@@ -255,6 +311,42 @@ describe Daru::MultiIndex do
255
311
  }.unindent
256
312
  }
257
313
  end
314
+
315
+ context 'multi index with name' do
316
+ subject {
317
+ Daru::MultiIndex.new(
318
+ levels: [[:a,:b,:c],[:one,:two],[:bar, :baz, :foo]],
319
+ labels: [
320
+ [0,0,0,0,1,1,1,1,2,2,2,2],
321
+ [0,0,1,1,0,1,1,0,0,0,1,1],
322
+ [0,1,0,1,0,0,1,2,0,1,2,0]], name: ['n1', 'n2', 'n3'])
323
+ }
324
+
325
+ its(:inspect) { is_expected.to start_with %Q{
326
+ |#<Daru::MultiIndex(12x3)>
327
+ | n1 n2 n3
328
+ }.unindent
329
+ }
330
+ end
331
+
332
+ context 'multi index with name having empty string' do
333
+ subject {
334
+ mi= Daru::MultiIndex.new(
335
+ levels: [[:a,:b,:c],[:one,:two],[:bar, :baz, :foo]],
336
+ labels: [
337
+ [0,0,0,0,1,1,1,1,2,2,2,2],
338
+ [0,0,1,1,0,1,1,0,0,0,1,1],
339
+ [0,1,0,1,0,0,1,2,0,1,2,0]], name: ['n1', 'n2', 'n3'])
340
+ }
341
+ before { subject.name = ['n1', '', 'n3'] }
342
+
343
+ its(:inspect) { is_expected.to start_with %Q{
344
+ |#<Daru::MultiIndex(12x3)>
345
+ | n1 n3
346
+ }.unindent
347
+ }
348
+ end
349
+
258
350
  end
259
351
 
260
352
  context "#==" do
@@ -410,31 +502,31 @@ describe Daru::MultiIndex do
410
502
  [:b,:one,:foo]
411
503
  ])
412
504
  end
413
-
505
+
414
506
  context "single index" do
415
507
  it { expect(idx.pos :b, :one, :bar).to eq 0 }
416
508
  end
417
-
509
+
418
510
  context "multiple indexes" do
419
511
  subject { idx.pos :b, :one }
420
-
512
+
421
513
  it { is_expected.to be_a Array }
422
514
  its(:size) { is_expected.to eq 2 }
423
515
  it { is_expected.to eq [0, 3] }
424
516
  end
425
-
517
+
426
518
  context "single positional index" do
427
519
  it { expect(idx.pos 0).to eq 0 }
428
520
  end
429
-
521
+
430
522
  context "multiple positional indexes" do
431
523
  subject { idx.pos 0, 1 }
432
-
524
+
433
525
  it { is_expected.to be_a Array }
434
526
  its(:size) { is_expected.to eq 2 }
435
527
  it { is_expected.to eq [0, 1] }
436
528
  end
437
-
529
+
438
530
  # TODO: Add specs for IndexError
439
531
  end
440
532
 
@@ -447,23 +539,23 @@ describe Daru::MultiIndex do
447
539
  [:b, :one, :foo]
448
540
  ])
449
541
  end
450
-
542
+
451
543
  context "multiple indexes" do
452
544
  subject { idx.subset :b, :one }
453
-
545
+
454
546
  it { is_expected.to be_a described_class }
455
547
  its(:size) { is_expected.to eq 2 }
456
548
  its(:to_a) { is_expected.to eq [[:bar], [:foo]] }
457
549
  end
458
-
550
+
459
551
  context "multiple positional indexes" do
460
552
  subject { idx.subset 0, 1 }
461
-
553
+
462
554
  it { is_expected.to be_a described_class }
463
555
  its(:size) { is_expected.to eq 2 }
464
556
  its(:to_a) { is_expected.to eq [[:b, :one, :bar], [:b, :two, :bar]] }
465
557
  end
466
-
558
+
467
559
  # TODO: Checks for invalid indexes
468
560
  end
469
561
 
@@ -476,53 +568,53 @@ describe Daru::MultiIndex do
476
568
  [:b, :one, :foo]
477
569
  ])
478
570
  end
479
-
571
+
480
572
  context "single position" do
481
573
  it { expect(idx.at 2).to eq [:b, :two, :baz] }
482
574
  end
483
-
575
+
484
576
  context "multiple positions" do
485
577
  subject { idx.at 1, 2 }
486
-
578
+
487
579
  it { is_expected.to be_a described_class }
488
580
  its(:size) { is_expected.to eq 2 }
489
581
  its(:to_a) { is_expected.to eq [[:b, :two, :bar],
490
582
  [:b, :two, :baz]] }
491
583
  end
492
-
584
+
493
585
  context "range" do
494
586
  subject { idx.at 1..2 }
495
-
587
+
496
588
  it { is_expected.to be_a described_class }
497
589
  its(:size) { is_expected.to eq 2 }
498
590
  its(:to_a) { is_expected.to eq [[:b, :two, :bar],
499
- [:b, :two, :baz]] }
591
+ [:b, :two, :baz]] }
500
592
  end
501
-
593
+
502
594
  context "range with negative integers" do
503
595
  subject { idx.at 1..-2 }
504
-
596
+
505
597
  it { is_expected.to be_a described_class }
506
598
  its(:size) { is_expected.to eq 2 }
507
599
  its(:to_a) { is_expected.to eq [[:b, :two, :bar],
508
- [:b, :two, :baz]] }
509
- end
510
-
600
+ [:b, :two, :baz]] }
601
+ end
602
+
511
603
  context "rangle with single element" do
512
604
  subject { idx.at 1..1 }
513
-
605
+
514
606
  it { is_expected.to be_a described_class }
515
607
  its(:size) { is_expected.to eq 1 }
516
608
  its(:to_a) { is_expected.to eq [[:b, :two, :bar]] }
517
609
  end
518
-
610
+
519
611
  context "invalid position" do
520
612
  it { expect { idx.at 4 }.to raise_error IndexError }
521
613
  end
522
-
614
+
523
615
  context "invalid positions" do
524
616
  it { expect { idx.at 2, 4 }.to raise_error IndexError }
525
- end
617
+ end
526
618
  end
527
619
 
528
620
  context "#add" do
@@ -534,7 +626,7 @@ describe Daru::MultiIndex do
534
626
  [:b, :one, :foo]
535
627
  ]
536
628
  end
537
-
629
+
538
630
  context "single index" do
539
631
  subject { idx.add :b, :two, :baz }
540
632
 
@@ -556,15 +648,33 @@ describe Daru::MultiIndex do
556
648
  [:b, :one, :foo]
557
649
  ]
558
650
  end
559
-
651
+
560
652
  context "single index" do
561
653
  it { expect(idx.valid? :a, :one, :bar).to eq true }
562
654
  it { expect(idx.valid? :b, :two, :three).to eq false }
563
655
  end
564
-
656
+
565
657
  context "multiple indexes" do
566
658
  it { expect(idx.valid? :a, :one).to eq true }
567
659
  it { expect(idx.valid? :a, :three).to eq false }
568
660
  end
569
- end
661
+ end
662
+
663
+ context '#to_df' do
664
+ let(:idx) do
665
+ described_class.from_tuples([
666
+ %w[a one bar],
667
+ %w[a two bar],
668
+ %w[b two baz],
669
+ %w[b one foo]
670
+ ]).tap { |idx| idx.name = %w[col1 col2 col3] }
671
+ end
672
+
673
+ subject { idx.to_df }
674
+ it { is_expected.to eq Daru::DataFrame.new(
675
+ 'col1' => %w[a a b b],
676
+ 'col2' => %w[one two two one],
677
+ 'col3' => %w[bar bar baz foo]
678
+ )}
679
+ end
570
680
  end
@@ -1,12 +1,21 @@
1
+ # -*- coding: utf-8 -*-
1
2
  describe Daru::IO do
2
3
  describe Daru::DataFrame do
3
4
  context ".from_csv" do
5
+ before do
6
+ %w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file|
7
+ WebMock
8
+ .stub_request(:get,"http://dummy-remote-url/#{file}.csv")
9
+ .to_return(status: 200, body: File.read("spec/fixtures/#{file}.csv"))
10
+ end
11
+ end
12
+
4
13
  it "loads from a CSV file" do
5
14
  df = Daru::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
6
15
  col_sep: ' ', headers: true)
7
16
 
8
- df.vectors = [:image_resolution, :mls, :true_transform].to_index
9
- expect(df.vectors).to eq([:image_resolution, :mls, :true_transform].to_index)
17
+ df.vectors = [:image_resolution, :true_transform, :mls].to_index
18
+ expect(df.vectors).to eq([:image_resolution, :true_transform, :mls].to_index)
10
19
  expect(df[:image_resolution].first).to eq(6.55779)
11
20
  expect(df[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
12
21
  end
@@ -32,6 +41,34 @@ describe Daru::IO do
32
41
  df = Daru::DataFrame.from_csv 'spec/fixtures/sales-funnel.csv'
33
42
  expect(df.vectors.to_a).to eq(%W[Account Name Rep Manager Product Quantity Price Status])
34
43
  end
44
+
45
+ it "handles empty rows in the CSV" do
46
+ df = Daru::DataFrame.from_csv 'spec/fixtures/empty_rows_test.csv'
47
+ expect(df.nrows).to eq(13)
48
+ end
49
+
50
+ it "uses the custom boolean converter correctly" do
51
+ df = Daru::DataFrame.from_csv 'spec/fixtures/boolean_converter_test.csv', converters: [:boolean]
52
+ expect(df['Domestic'].to_a).to all be_boolean
53
+ end
54
+
55
+ it "uses the custom string converter correctly" do
56
+ df = Daru::DataFrame.from_csv 'spec/fixtures/string_converter_test.csv', converters: [:string]
57
+ expect(df['Case Number'].to_a.all? {|x| String === x }).to be_truthy
58
+ end
59
+
60
+ it "allow symbol to converters option" do
61
+ df = Daru::DataFrame.from_csv 'spec/fixtures/boolean_converter_test.csv', converters: :boolean
62
+ expect(df['Domestic'].to_a).to all be_boolean
63
+ end
64
+
65
+ it "checks for equal parsing of local CSV files and remote CSV files" do
66
+ %w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file|
67
+ df_local = Daru::DataFrame.from_csv("spec/fixtures/#{file}.csv")
68
+ df_remote = Daru::DataFrame.from_csv("http://dummy-remote-url/#{file}.csv")
69
+ expect(df_local).to eq(df_remote)
70
+ end
71
+ end
35
72
  end
36
73
 
37
74
  context "#write_csv" do
@@ -86,6 +123,38 @@ describe Daru::IO do
86
123
  end
87
124
  end
88
125
 
126
+ context "#from_excel with row_id" do
127
+ before do
128
+ id = Daru::Vector.new(['id', 1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
129
+ name = Daru::Vector.new(%w(name Alex Claude Peter Franz George Fernand))
130
+ age = Daru::Vector.new(['age', 20.0, 23.0, 25.0, nil, 5.5, nil])
131
+ city = Daru::Vector.new(['city', 'New York', 'London', 'London', 'Paris', 'Tome', nil])
132
+ a1 = Daru::Vector.new(['a1', 'a,b', 'b,c', 'a', nil, 'a,b,c', nil])
133
+ @expected_1 = Daru::DataFrame.new({:id2 => id, :name2 => name, :age2 => age}, order: [:id2, :name2, :age2])
134
+ @expected_2 = Daru::DataFrame.new({
135
+ :id => id, :name => name, :age => age, :city => city, :a1 => a1
136
+ }, order: [:id, :name, :age, :city, :a1])
137
+ end
138
+
139
+ it "loads DataFrame from test_xls_2.xls" do
140
+ df = Daru::DataFrame.from_excel 'spec/fixtures/test_xls_2.xls'
141
+
142
+ expect(df.nrows).to eq(7)
143
+ expect(df.vectors.to_a).to eq([:id2, :name2, :age2])
144
+ expect(df[:age2][6]).to eq(nil)
145
+ expect(@expected_1).to eq(df)
146
+ end
147
+
148
+ it "loads DataFrame from test_xls_2.xls with row_id" do
149
+ df = Daru::DataFrame.from_excel 'spec/fixtures/test_xls_2.xls', {row_id: 1}
150
+
151
+ expect(df.nrows).to eq(7)
152
+ expect(df.vectors.to_a).to eq([:id, :name, :age, :city, :a1])
153
+ expect(df[:age][6]).to eq(nil)
154
+ expect(@expected_2).to eq(df)
155
+ end
156
+ end
157
+
89
158
  context "#write_excel" do
90
159
  before do
91
160
  a = Daru::Vector.new(100.times.map { rand(100) })
@@ -269,6 +338,181 @@ describe Daru::IO do
269
338
  expect(a).to eq(@data_frame)
270
339
  end
271
340
  end
341
+
342
+ context "#from_html" do
343
+ context "in wiki info table" do
344
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/wiki_table_info.html" }
345
+ let(:order) { ["FName", "LName", "Age"] }
346
+ let(:index) { ["One", "Two", "Three", "Four", "Five", "Six", "Seven"] }
347
+ let(:name) { "Wikipedia Information Table" }
348
+
349
+ context "returns default dataframe" do
350
+ subject { Daru::DataFrame.from_html(path) }
351
+
352
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
353
+ its(:first) { is_expected.to eq (Daru::DataFrame.new(
354
+ [["Tinu", "Blaszczyk", "Lily", "Olatunkboh", "Adrienne", "Axelia", "Jon-Kabat"],
355
+ ["Elejogun", "Kostrzewski", "McGarrett", "Chijiaku", "Anthoula", "Athanasios", "Zinn"],
356
+ ["14", "25", "16", "22", "22", "22", "22"]],
357
+ order: ["First name","Last name","Age"]
358
+ )
359
+ )
360
+ }
361
+ end
362
+
363
+ context "returns user-modified dataframe" do
364
+ subject { Daru::DataFrame.from_html(path, order: order, index: index, name: name) }
365
+
366
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
367
+ its(:first) { is_expected.to eq(Daru::DataFrame.new(
368
+ [["Tinu", "Blaszczyk", "Lily", "Olatunkboh", "Adrienne", "Axelia", "Jon-Kabat"],
369
+ ["Elejogun", "Kostrzewski", "McGarrett", "Chijiaku", "Anthoula", "Athanasios", "Zinn"],
370
+ ["14", "25", "16", "22", "22", "22", "22"]],
371
+ order: ["FName","LName", "Age"],
372
+ index: ["One", "Two", "Three", "Four", "Five", "Six", "Seven"],
373
+ name: "Wikipedia Information Table"
374
+ )
375
+ )
376
+ }
377
+ end
378
+ end
379
+
380
+ context "in wiki climate data" do
381
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/wiki_climate.html" }
382
+
383
+ context "returns default dataframe" do
384
+ subject { Daru::DataFrame.from_html(path) }
385
+
386
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
387
+ its('first.index') { is_expected.to eq(Daru::Index.new(
388
+ ["Record high °C (°F)", "Average high °C (°F)", "Daily mean °C (°F)", "Average low °C (°F)", "Record low °C (°F)", "Average rainfall mm (inches)", "Average rainy days", "Average relative humidity (%)", "Mean monthly sunshine hours", "Mean daily sunshine hours"]
389
+ )
390
+ )
391
+ }
392
+
393
+ end
394
+ end
395
+
396
+ context "with valid html table markups" do
397
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/valid_markup.html" }
398
+ let(:index) { ["W","X","Y","Z"] }
399
+ let(:name) { "Small HTML table with index" }
400
+
401
+ context "returns user-modified dataframe" do
402
+ subject { Daru::DataFrame.from_html(path, index: index, name: name) }
403
+
404
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
405
+ its(:first) { is_expected.to eq(Daru::DataFrame.new(
406
+ [["6", "4","9","7"],["7","0","4","0"]],
407
+ order: ["a","b"],
408
+ index: ["W","X","Y","Z"],
409
+ name: "Small HTML table with index"
410
+ )
411
+ )
412
+ }
413
+ end
414
+ end
415
+
416
+ context "in year-wise passengers figure" do
417
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/macau.html" }
418
+ let(:match) { "2001" }
419
+ let(:name) { "Year-wise Passengers Figure" }
420
+
421
+ context "returns matching dataframes with index" do
422
+ subject { Daru::DataFrame.from_html(path, match: match, name: name) }
423
+
424
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
425
+ its('first.index') { is_expected.to eq(Daru::Index.new(
426
+ ["January","February","March","April","May","June","July","August","September","October","November","December","Total"]
427
+ )
428
+ )
429
+ }
430
+ its(:first) { is_expected.to eq(Daru::DataFrame.new(
431
+ [
432
+ ["265,603","184,381","161,264","161,432","117,984",""],
433
+ ["249,259","264,066","209,569","168,777","150,772",""],
434
+ ["312,319","226,483","186,965","172,060","149,795",""],
435
+ ["351,793","296,541","237,449","180,241","179,049",""],
436
+ ["338,692","288,949","230,691","172,391","189,925",""],
437
+ ["332,630","271,181","231,328","157,519","175,402",""],
438
+ ["344,658","304,276","243,534","205,595","173,103",""],
439
+ ["360,899","300,418","257,616","241,140","178,118",""],
440
+ ["291,817","280,803","210,885","183,954","163,385",""],
441
+ ["327,232","298,873","231,251","205,726","176,879",""],
442
+ ["315,538","265,528","228,637","181,677","146,804",""],
443
+ ["314,866","257,929","210,922","183,975","151,362",""],
444
+ ["3,805,306","3,239,428","2,640,111","2,214,487","1,952,578","0"]
445
+ ].transpose,
446
+ order: ["2001","2000","1999","1998","1997","1996"],
447
+ index: ["January","February","March","April","May","June","July","August","September","October","November","December","Total"],
448
+ name: "Year-wise Passengers Figure"
449
+ )
450
+ )
451
+ }
452
+ end
453
+ end
454
+
455
+ context "in share market data" do
456
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/moneycontrol.html" }
457
+ let(:match) { "Sun Pharma" }
458
+ let(:index) { ["Alpha", "Beta", "Gamma", "Delta", "Misc"] }
459
+ let(:name) { "Share Market Analysis" }
460
+
461
+ context "returns matching dataframes" do
462
+ subject { Daru::DataFrame.from_html(path, match: match) }
463
+
464
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
465
+ its(:first) { is_expected.to eq(Daru::DataFrame.new(
466
+ [
467
+ ["Sun Pharma","502.60","-65.05","2,117.87"],
468
+ ["Reliance","1356.90","19.60","745.10"],
469
+ ["Tech Mahindra","379.45","-49.70","650.22"],
470
+ ["ITC","315.85","6.75","621.12"],
471
+ ["HDFC","1598.85","50.95","553.91"]
472
+ ].transpose,
473
+ order: ["Company","Price","Change","Value (Rs Cr.)"]
474
+ )
475
+ )
476
+ }
477
+ end
478
+
479
+ context "returns user-modified matching dataframes" do
480
+ subject { Daru::DataFrame.from_html(path, match: match, index: index, name: name) }
481
+
482
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
483
+ its(:last) { is_expected.to eq(Daru::DataFrame.new(
484
+ [
485
+ ["Sun Pharma","502.60","-65.05","2,117.87"],
486
+ ["Reliance","1356.90","19.60","745.10"],
487
+ ["Tech Mahindra","379.45","-49.70","650.22"],
488
+ ["ITC","315.85","6.75","621.12"],
489
+ ["HDFC","1598.85","50.95","553.91"]
490
+ ].transpose,
491
+ order: ["Company","Price","Change","Value (Rs Cr.)"],
492
+ index: ["Alpha", "Beta", "Gamma", "Delta", "Misc"],
493
+ name: "Share Market Analysis"
494
+ )
495
+ )
496
+ }
497
+ end
498
+
499
+ end
500
+
501
+ context "in election results data" do
502
+ let(:path) { "file://#{Dir.pwd}/spec/fixtures/eciresults.html" }
503
+
504
+ context "returns default dataframes" do
505
+ subject { Daru::DataFrame.from_html(path) }
506
+
507
+ it { is_expected.to be_an(Array).and all be_a(Daru::DataFrame) }
508
+ its('first.vectors') { is_expected.to eq(Daru::Index.new(
509
+ ["PartyName", "Votes Wise(%)"]
510
+ )
511
+ )
512
+ }
513
+ end
514
+ end
515
+ end
272
516
  end
273
517
 
274
518
  describe Daru::Vector do