daru_lite 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,42 @@
1
+ describe "Monkeys" do
2
+ context Matrix do
3
+ it "performs elementwise division" do
4
+ left = Matrix[[3,6,9],[4,8,12],[2,4,6]]
5
+ right = Matrix[[3,6,9],[4,8,12],[2,4,6]]
6
+
7
+ expect(left.elementwise_division(right)).to eq(Matrix[[1,1,1],[1,1,1],[1,1,1]])
8
+ end
9
+ end
10
+
11
+ describe '#daru_lite_vector' do
12
+ it 'converts Array' do
13
+ expect([1,2,3].daru_lite_vector).to eq DaruLite::Vector.new [1,2,3]
14
+ expect([1,2,3].daru_lite_vector('test', [:a, :b, :c])).to eq \
15
+ DaruLite::Vector.new [1,2,3], name: 'test', index: [:a, :b, :c]
16
+ end
17
+
18
+ it 'converts Range' do
19
+ expect((1..3).daru_lite_vector).to eq DaruLite::Vector.new [1,2,3]
20
+ expect((1..3).daru_lite_vector('test', [:a, :b, :c])).to eq \
21
+ DaruLite::Vector.new [1,2,3], name: 'test', index: [:a, :b, :c]
22
+ end
23
+
24
+ it 'converts Hash' do
25
+ # FIXME: is it most useful way of converting hashes?..
26
+ # I'd prefer something like
27
+ # expect({a: 1, b: 2, c: 3}.daru_lite_vector('test')).to eq DaruLite::Vector.new [1,2,3], name: 'test', index: [:a, :b, :c]
28
+ #
29
+ expect({test: [1, 2, 3]}.daru_lite_vector).to eq DaruLite::Vector.new [1,2,3], name: :test
30
+ end
31
+ end
32
+
33
+ describe '#to_index' do
34
+ it 'converts Array' do
35
+ expect([1,2,3].to_index).to eq DaruLite::Index.new [1,2,3]
36
+ end
37
+
38
+ it 'converts Range' do
39
+ expect((1..3).to_index).to eq DaruLite::Index.new [1,2,3]
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,213 @@
1
+ describe DaruLite::Vector do
2
+ # TODO: Add inspect specs for category
3
+ context '#inspect' do
4
+ context 'simple' do
5
+ subject(:vector) { DaruLite::Vector.new [1,2,3],
6
+ index: [:a, :b, :c], name: 'test'}
7
+ its(:inspect) { is_expected.to eq %Q{
8
+ |#<DaruLite::Vector(3)>
9
+ | test
10
+ | a 1
11
+ | b 2
12
+ | c 3
13
+ }.unindent }
14
+ end
15
+
16
+ context 'no name' do
17
+ subject(:vector) { DaruLite::Vector.new [1,2,3], index: [:a, :b, :c]}
18
+ its(:inspect) { is_expected.to eq %Q{
19
+ |#<DaruLite::Vector(3)>
20
+ | a 1
21
+ | b 2
22
+ | c 3
23
+ }.unindent }
24
+ end
25
+
26
+ context 'with nils' do
27
+ subject(:vector) { DaruLite::Vector.new [1,nil,3],
28
+ index: [:a, :b, :c], name: 'test'}
29
+ its(:inspect) { is_expected.to eq %Q{
30
+ |#<DaruLite::Vector(3)>
31
+ | test
32
+ | a 1
33
+ | b nil
34
+ | c 3
35
+ }.unindent }
36
+ end
37
+
38
+ context 'very large amount of data' do
39
+ subject(:vector) { DaruLite::Vector.new [1,2,3] * 100, name: 'test'}
40
+ its(:inspect) { is_expected.to eq %Q{
41
+ |#<DaruLite::Vector(300)>
42
+ | test
43
+ | 0 1
44
+ | 1 2
45
+ | 2 3
46
+ | 3 1
47
+ | 4 2
48
+ | 5 3
49
+ | 6 1
50
+ | 7 2
51
+ | 8 3
52
+ | 9 1
53
+ | 10 2
54
+ | 11 3
55
+ | 12 1
56
+ | 13 2
57
+ | 14 3
58
+ | ... ...
59
+ }.unindent }
60
+ end
61
+
62
+ context 'really long name or data' do
63
+ subject(:vector) { DaruLite::Vector.new [1,2,'this is ridiculously long'],
64
+ index: [:a, :b, :c], name: 'and this is not much better faithfully'}
65
+ its(:inspect) { is_expected.to eq %Q{
66
+ |#<DaruLite::Vector(3)>
67
+ | and this is not much
68
+ | a 1
69
+ | b 2
70
+ | c this is ridiculously
71
+ }.unindent }
72
+ end
73
+
74
+ context 'with multiindex' do
75
+ subject(:vector) {
76
+ DaruLite::Vector.new(
77
+ [1,2,3,4,5,6,7],
78
+ name: 'test',
79
+ index: DaruLite::MultiIndex.from_tuples([
80
+ %w[foo one],
81
+ %w[foo two],
82
+ %w[foo three],
83
+ %w[bar one],
84
+ %w[bar two],
85
+ %w[bar three],
86
+ %w[baz one],
87
+ ]),
88
+ )
89
+ }
90
+
91
+ its(:inspect) { is_expected.to eq %Q{
92
+ |#<DaruLite::Vector(7)>
93
+ | test
94
+ | foo one 1
95
+ | two 2
96
+ | three 3
97
+ | bar one 4
98
+ | two 5
99
+ | three 6
100
+ | baz one 7
101
+ }.unindent}
102
+ end
103
+
104
+ context 'threshold and spacing settings' do
105
+ end
106
+ end
107
+
108
+ [nil, :category].each do |type|
109
+ context '#to_html' do
110
+ let(:doc) { Nokogiri::HTML(vector.to_html) }
111
+ subject(:table) { doc.at('table') }
112
+ let(:header) { doc.at('b') }
113
+
114
+ context 'simple' do
115
+ let(:vector) { DaruLite::Vector.new [1,nil,3],
116
+ index: [:a, :b, :c], name: 'test', type: type }
117
+ it { is_expected.not_to be_nil }
118
+
119
+ describe 'header' do
120
+ subject { header }
121
+ it { is_expected.not_to be_nil }
122
+ its(:text) { is_expected.to eq " DaruLite::Vector(3)"\
123
+ "#{":category" if type == :category} " }
124
+ end
125
+
126
+ describe 'name' do
127
+ subject(:name) { table.at('thead > tr:first-child > th:nth-child(2)') }
128
+ it { is_expected.not_to be_nil }
129
+ its(:text) { is_expected.to eq 'test' }
130
+
131
+ context 'withought name' do
132
+ let(:vector) { DaruLite::Vector.new [1,nil,3], index: [:a, :b, :c], type: type }
133
+
134
+ it { is_expected.to be_nil }
135
+ end
136
+ end
137
+
138
+ describe 'index' do
139
+ subject(:indexes) { table.search('tr > td:first-child').map(&:text) }
140
+ its(:count) { is_expected.to eq vector.size }
141
+ it { is_expected.to eq vector.index.to_a.map(&:to_s) }
142
+ end
143
+
144
+ describe 'values' do
145
+ subject(:indexes) { table.search('tr > td:last-child').map(&:text) }
146
+ its(:count) { is_expected.to eq vector.size }
147
+ it { is_expected.to eq vector.to_a.map(&:to_s) }
148
+ end
149
+ end
150
+
151
+ context 'large vector' do
152
+ subject(:vector) { DaruLite::Vector.new [1,2,3] * 100, name: 'test', type: type }
153
+ it 'has only 30 rows (+ 1 header rows, + 2 finishing rows)' do
154
+ expect(table.search('tr').size).to eq 33
155
+ end
156
+
157
+ describe '"skipped" row' do
158
+ subject(:row) { table.search('tr:nth-child(31) td').map(&:text) }
159
+ its(:count) { is_expected.to eq 2 }
160
+ it { is_expected.to eq ['...', '...'] }
161
+ end
162
+
163
+ describe 'last row' do
164
+ subject(:row) { table.search('tr:nth-child(32) td').map(&:text) }
165
+ its(:count) { is_expected.to eq 2 }
166
+ it { is_expected.to eq ['299', '3'] }
167
+ end
168
+ end
169
+
170
+ context 'multi-index' do
171
+ subject(:vector) {
172
+ DaruLite::Vector.new(
173
+ [1,2,3,4,5,6,7],
174
+ name: 'test',
175
+ type: type,
176
+ index: DaruLite::MultiIndex.from_tuples([
177
+ %w[foo one],
178
+ %w[foo two],
179
+ %w[foo three],
180
+ %w[bar one],
181
+ %w[bar two],
182
+ %w[bar three],
183
+ %w[baz one],
184
+ ]),
185
+ )
186
+ }
187
+
188
+ describe 'header' do
189
+ subject { header }
190
+ it { is_expected.not_to be_nil }
191
+ its(:text) { is_expected.to eq " DaruLite::Vector(7)"\
192
+ "#{":category" if type == :category} " }
193
+ end
194
+
195
+ describe 'name row' do
196
+ subject(:row) { table.at('thead > tr:nth-child(1)').search('th') }
197
+ its(:count) { should == 2 }
198
+ it { expect(row.first['colspan']).to eq '2' }
199
+ end
200
+
201
+ describe 'first data row' do
202
+ let(:row) { table.at('tbody > tr:first-child') }
203
+ subject { row.inner_html.scan(/<t[dh].+?<\/t[dh]>/) }
204
+ it { is_expected.to eq [
205
+ '<th rowspan="3">foo</th>',
206
+ '<th rowspan="1">one</th>',
207
+ '<td>1</td>'
208
+ ]}
209
+ end
210
+ end
211
+ end
212
+ end
213
+ end
@@ -0,0 +1,87 @@
1
+ require 'rspec'
2
+ require 'rspec/its'
3
+ require 'rspec/expectations'
4
+ require 'matrix'
5
+ require 'awesome_print'
6
+ require 'distribution'
7
+ require 'tempfile'
8
+ require 'pry-byebug'
9
+ require 'nokogiri'
10
+ require 'webmock/rspec'
11
+
12
+ def mri?
13
+ RUBY_ENGINE == 'ruby'
14
+ end
15
+
16
+ def jruby?
17
+ RUBY_ENGINE == 'jruby'
18
+ end
19
+
20
+ RSpec::Expectations.configuration.warn_about_potential_false_positives = false
21
+
22
+ require 'simplecov'
23
+ SimpleCov.start do
24
+ add_filter 'vendor'
25
+ add_filter 'spec'
26
+ # minimum_coverage_by_file 95 -- too strict for now. Reconsider after specs redesign.
27
+ end
28
+
29
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
30
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
31
+ require 'daru_lite'
32
+
33
+ if jruby?
34
+ require 'mdarray'
35
+ end
36
+
37
+ ALL_DTYPES = [:array]
38
+
39
+ # FIXME: This must go! Need to be able to use be_within
40
+ def expect_correct_vector_in_delta v1, v2, delta
41
+ expect(v1.size).to eq(v2.size)
42
+ (0...v1.size).each do |v|
43
+ expect(v1[v]).to be_within(delta).of(v2[v])
44
+ end
45
+ end
46
+
47
+ def expect_correct_df_in_delta df1, df2, delta
48
+ df1.each_vector_with_index do |vector, i|
49
+ expect_correct_vector_in_delta vector, df2[i], delta
50
+ end
51
+ end
52
+
53
+ RSpec::Matchers.define :be_all_within do |delta|
54
+ match do |actual|
55
+ expect(@expected).to_not be_nil
56
+ expect(actual.size).to equal(actual.size)
57
+ (@act, @exp), @idx = actual.zip(@expected).each_with_index.detect { |(a, e), _| (a - e).abs > delta }
58
+ @idx.nil?
59
+ end
60
+
61
+ chain :of do |expected|
62
+ @expected = expected
63
+ end
64
+
65
+ failure_message do |actual|
66
+ return "expected value must be provided using '.of'." if @expected.nil?
67
+ return "expected.size must equal actual.size." if @expected.size != actual.size
68
+ "at index=[#{@idx}], expected '#{actual[@idx]}' to be within '#{delta}' of '#{@expected[@idx]}'."
69
+ end
70
+ end
71
+
72
+ class String
73
+ # allows to pretty test agains multiline strings:
74
+ # %Q{
75
+ # |test
76
+ # |me
77
+ # }.unindent # =>
78
+ # "test
79
+ # me"
80
+ def unindent
81
+ gsub(/\n\s+?\|/, "\n") # for all lines looking like "<spaces>|" -- remove this.
82
+ .gsub(/\|\n/, "\n") # allow to write trailing space not removed by editor
83
+ .gsub(/^\n|\n\s+$/, '') # remove empty strings before and after
84
+ end
85
+ end
86
+
87
+ Dir[File.expand_path('../support/**/*.rb', __FILE__)].each {|f| require f }
@@ -0,0 +1,30 @@
1
+ require 'sqlite3'
2
+ require 'dbi'
3
+ require 'active_record'
4
+
5
+ module DaruLite::RSpec
6
+ class Account < ActiveRecord::Base
7
+ self.table_name = 'accounts'
8
+ end
9
+ end
10
+
11
+ shared_context 'with accounts table in sqlite3 database' do
12
+ let(:db_name) do
13
+ 'daru_lite_test'
14
+ end
15
+
16
+ before do
17
+ # just in case
18
+ FileUtils.rm(db_name) if File.file?(db_name)
19
+
20
+ SQLite3::Database.new(db_name).tap do |db|
21
+ db.execute "create table accounts(id integer, name varchar, age integer, primary key(id))"
22
+ db.execute "insert into accounts values(1, 'Homer', 20)"
23
+ db.execute "insert into accounts values(2, 'Marge', 30)"
24
+ end
25
+ end
26
+
27
+ after do
28
+ FileUtils.rm(db_name)
29
+ end
30
+ end
@@ -0,0 +1,5 @@
1
+ RSpec::Matchers.define :be_boolean do
2
+ match do |actual|
3
+ expect(actual.is_a?(TrueClass) || actual.is_a?(FalseClass)).to be true
4
+ end
5
+ end