daru_lite 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,137 @@
1
+ describe DaruLite::Formatters::Table do
2
+ let(:options) { {} }
3
+ subject {
4
+ DaruLite::Formatters::Table
5
+ .format(data, options.merge(headers: headers, row_headers: row_headers))
6
+ }
7
+
8
+ let(:data) { [[1,2,3], [4,5,6], [7,8,9]] }
9
+ let(:headers) { [:col1, :col2, :col3] }
10
+ let(:row_headers) { [:row1, :row2, :row3] }
11
+
12
+ context 'simple table' do
13
+ it { is_expected.to eq %Q{
14
+ | col1 col2 col3
15
+ | row1 1 2 3
16
+ | row2 4 5 6
17
+ | row3 7 8 9
18
+ }.unindent}
19
+ end
20
+
21
+ context 'with nils' do
22
+ let(:data) { [[1,nil,3], [4,5,nil], [7,8,9]] }
23
+ let(:headers) { [:col1, :col2, nil] }
24
+ let(:row_headers) { [:row1, nil, :row3] }
25
+
26
+ it { is_expected.to eq %Q{
27
+ | col1 col2 |
28
+ | row1 1 nil 3|
29
+ | 4 5 nil|
30
+ | row3 7 8 9|
31
+ }.unindent}
32
+ end
33
+
34
+ context 'with multivalue row headers' do
35
+ let(:row_headers) { [[:row,1], [:row,2], [:row,3]] }
36
+ it { is_expected.to eq %Q{
37
+ | col1 col2 col3
38
+ | row 1 1 2 3
39
+ | row 2 4 5 6
40
+ | row 3 7 8 9
41
+ }.unindent}
42
+ end
43
+
44
+ context 'with multivalue column headers' do
45
+ let(:headers) { [[:col,1], [:col,2], [:col,3]] }
46
+ end
47
+
48
+ context 'rows only' do
49
+ let(:data) { [] }
50
+ let(:headers) { nil }
51
+ it { is_expected.to eq %Q{
52
+ | row1
53
+ | row2
54
+ | row3
55
+ }.unindent}
56
+ end
57
+
58
+ context 'columns only' do
59
+ let(:data) { [] }
60
+ let(:row_headers) { nil }
61
+ it { is_expected.to eq %Q{
62
+ | col1 col2 col3
63
+ }.unindent}
64
+ end
65
+
66
+ context 'wide values' do
67
+ let(:options) { {spacing: 2} }
68
+
69
+ it { is_expected.to eq %Q{
70
+ | co co co
71
+ | ro 1 2 3
72
+ | ro 4 5 6
73
+ | ro 7 8 9
74
+ }.unindent}
75
+ end
76
+
77
+ context 'with empty data' do
78
+ let(:data) { [] }
79
+ let(:headers) { [] }
80
+ let(:row_headers) { [] }
81
+
82
+ it { is_expected.to eq '' }
83
+ end
84
+
85
+
86
+ context '<more> threshold' do
87
+ let(:options) { {threshold: threshold} }
88
+ context 'lower than data size' do
89
+ let(:threshold) { 2 }
90
+ it { is_expected.to eq %Q{
91
+ | col1 col2 col3
92
+ | row1 1 2 3
93
+ | row2 4 5 6
94
+ | ... ... ... ...
95
+ }.unindent}
96
+ end
97
+
98
+ context 'greater than data size' do
99
+ let(:threshold) { 5 }
100
+ it { is_expected.to eq %Q{
101
+ | col1 col2 col3
102
+ | row1 1 2 3
103
+ | row2 4 5 6
104
+ | row3 7 8 9
105
+ }.unindent}
106
+ end
107
+ end
108
+
109
+ context 'no row and column headers' do
110
+ let(:headers) { nil }
111
+ let(:row_headers) { nil }
112
+ it { is_expected.to eq %Q{
113
+ | 1 2 3
114
+ | 4 5 6
115
+ | 7 8 9
116
+ }.unindent }
117
+ end
118
+
119
+ context 'row headers only' do
120
+ let(:headers) { nil }
121
+ it { is_expected.to eq %Q{
122
+ | row1 1 2 3
123
+ | row2 4 5 6
124
+ | row3 7 8 9
125
+ }.unindent }
126
+ end
127
+
128
+ context 'column headers only' do
129
+ let(:row_headers) { nil }
130
+ it { is_expected.to eq %Q{
131
+ | col1 col2 col3
132
+ | 1 2 3
133
+ | 4 5 6
134
+ | 7 8 9
135
+ }.unindent }
136
+ end
137
+ end
@@ -0,0 +1,8 @@
1
+ describe DaruLite::ArrayHelper do
2
+ context '#recode_repeated' do
3
+ let(:source) { [1,'a',1,'a','b',:c,2] }
4
+ subject { described_class.recode_repeated(source) }
5
+
6
+ it { is_expected.to eq ['1_1','a_1', '1_2','a_2','b',:c,2] }
7
+ end
8
+ end
@@ -0,0 +1,170 @@
1
+ require 'spec_helper.rb'
2
+
3
+ describe DaruLite::CategoricalIndex do
4
+ context "#pos" do
5
+ context "when the category is non-numeric" do
6
+ let(:idx) { described_class.new [:a, :b, :a, :a, :c] }
7
+
8
+ context "single category" do
9
+ subject { idx.pos :a }
10
+
11
+ it { is_expected.to eq [0, 2, 3] }
12
+ end
13
+
14
+ context "multiple categories" do
15
+ subject { idx.pos :a, :c }
16
+
17
+ it { is_expected.to eq [0, 2, 3, 4] }
18
+ end
19
+
20
+ context "invalid category" do
21
+ it { expect { idx.pos :e }.to raise_error IndexError }
22
+ end
23
+
24
+ context "positional index" do
25
+ it { expect(idx.pos 0).to eq 0 }
26
+ end
27
+
28
+ context "invalid positional index" do
29
+ it { expect { idx.pos 5 }.to raise_error IndexError }
30
+ end
31
+
32
+ context "multiple positional indexes" do
33
+ subject { idx.pos 0, 1, 2 }
34
+
35
+ it { is_expected.to be_a Array }
36
+ its(:size) { is_expected.to eq 3 }
37
+ it { is_expected.to eq [0, 1, 2] }
38
+ end
39
+ end
40
+
41
+ context "when the category is numeric" do
42
+ let(:idx) { described_class.new [0, 1, 0, 0, 2] }
43
+
44
+ context "first preference to category" do
45
+ subject { idx.pos 0 }
46
+
47
+ it { is_expected.to be_a Array }
48
+ its(:size) { is_expected.to eq 3 }
49
+ it { is_expected.to eq [0, 2, 3] }
50
+ end
51
+
52
+ context "second preference to positional index" do
53
+ subject { idx.pos 3 }
54
+
55
+ it { is_expected.to eq 3 }
56
+ end
57
+ end
58
+ end
59
+
60
+ context "#subset" do
61
+ let(:idx) { described_class.new [:a, 1, :a, 1, :c] }
62
+
63
+ context "single index" do
64
+ context "multiple instances" do
65
+ subject { idx.subset :a }
66
+
67
+ it { is_expected.to be_a described_class }
68
+ its(:size) { is_expected.to eq 2 }
69
+ its(:to_a) { is_expected.to eq [:a, :a] }
70
+ end
71
+ end
72
+
73
+ context "multiple indexes" do
74
+ subject { idx.subset :a, 1 }
75
+
76
+ it { is_expected.to be_a described_class }
77
+ its(:size) { is_expected.to eq 4 }
78
+ its(:to_a) { is_expected.to eq [:a, 1, :a, 1] }
79
+ end
80
+
81
+ context "multiple positional indexes" do
82
+ subject { idx.subset 0, 2 }
83
+
84
+ it { is_expected.to be_a described_class }
85
+ its(:size) { is_expected.to eq 2 }
86
+ its(:to_a) { is_expected.to eq [:a, :a] }
87
+ end
88
+ end
89
+
90
+ context "#at" do
91
+ let(:idx) { described_class.new [:a, :a, :a, 1, :c] }
92
+
93
+ context "single position" do
94
+ it { expect(idx.at 1).to eq :a }
95
+ end
96
+
97
+ context "multiple positions" do
98
+ subject { idx.at 0, 2, 3 }
99
+
100
+ it { is_expected.to be_a described_class }
101
+ its(:size) { is_expected.to eq 3 }
102
+ its(:to_a) { is_expected.to eq [:a, :a, 1] }
103
+ end
104
+
105
+ context "range" do
106
+ subject { idx.at 2..3 }
107
+
108
+ it { is_expected.to be_a described_class }
109
+ its(:size) { is_expected.to eq 2 }
110
+ its(:to_a) { is_expected.to eq [:a, 1] }
111
+ end
112
+
113
+ context "range with negative integers" do
114
+ subject { idx.at 2..-2 }
115
+
116
+ it { is_expected.to be_a described_class }
117
+ its(:size) { is_expected.to eq 2 }
118
+ its(:to_a) { is_expected.to eq [:a, 1] }
119
+ end
120
+
121
+ context "rangle with single element" do
122
+ subject { idx.at 2..2 }
123
+
124
+ it { is_expected.to be_a described_class }
125
+ its(:size) { is_expected.to eq 1 }
126
+ its(:to_a) { is_expected.to eq [:a] }
127
+ end
128
+
129
+ context "invalid position" do
130
+ it { expect { idx.at 5 }.to raise_error IndexError }
131
+ end
132
+
133
+ context "invalid positions" do
134
+ it { expect { idx.at 2, 5 }.to raise_error IndexError }
135
+ end
136
+ end
137
+
138
+ context "#add" do
139
+ let(:idx) { described_class.new [:a, 1, :a, 1] }
140
+
141
+ context "single index" do
142
+ subject { idx.add :c }
143
+
144
+ its(:to_a) { is_expected.to eq [:a, 1, :a, 1, :c] }
145
+ its(:categories) { is_expected.to eq [:a, 1, :c] }
146
+ end
147
+
148
+ context "multiple indexes" do
149
+ subject { idx.add :c, :d }
150
+
151
+ its(:to_a) { is_expected.to eq [:a, 1, :a, 1, :c, :d] }
152
+ its(:categories) { is_expected.to eq [:a, 1, :c, :d] }
153
+ end
154
+ end
155
+
156
+ context "#valid?" do
157
+ let(:idx) { described_class.new [:a, 1, :a, 1] }
158
+
159
+ context "single index" do
160
+ it { expect(idx.valid? :a).to eq true }
161
+ it { expect(idx.valid? 2).to eq true }
162
+ it { expect(idx.valid? 4).to eq false }
163
+ end
164
+
165
+ context "multiple indexes" do
166
+ it { expect(idx.valid? :a, 1).to eq true }
167
+ it { expect(idx.valid? :a, 1, 5).to eq false }
168
+ end
169
+ end
170
+ end