daru_lite 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,42 @@
1
+ describe "Monkeys" do
2
+ context Matrix do
3
+ it "performs elementwise division" do
4
+ left = Matrix[[3,6,9],[4,8,12],[2,4,6]]
5
+ right = Matrix[[3,6,9],[4,8,12],[2,4,6]]
6
+
7
+ expect(left.elementwise_division(right)).to eq(Matrix[[1,1,1],[1,1,1],[1,1,1]])
8
+ end
9
+ end
10
+
11
+ describe '#daru_lite_vector' do
12
+ it 'converts Array' do
13
+ expect([1,2,3].daru_lite_vector).to eq DaruLite::Vector.new [1,2,3]
14
+ expect([1,2,3].daru_lite_vector('test', [:a, :b, :c])).to eq \
15
+ DaruLite::Vector.new [1,2,3], name: 'test', index: [:a, :b, :c]
16
+ end
17
+
18
+ it 'converts Range' do
19
+ expect((1..3).daru_lite_vector).to eq DaruLite::Vector.new [1,2,3]
20
+ expect((1..3).daru_lite_vector('test', [:a, :b, :c])).to eq \
21
+ DaruLite::Vector.new [1,2,3], name: 'test', index: [:a, :b, :c]
22
+ end
23
+
24
+ it 'converts Hash' do
25
+ # FIXME: is it most useful way of converting hashes?..
26
+ # I'd prefer something like
27
+ # expect({a: 1, b: 2, c: 3}.daru_lite_vector('test')).to eq DaruLite::Vector.new [1,2,3], name: 'test', index: [:a, :b, :c]
28
+ #
29
+ expect({test: [1, 2, 3]}.daru_lite_vector).to eq DaruLite::Vector.new [1,2,3], name: :test
30
+ end
31
+ end
32
+
33
+ describe '#to_index' do
34
+ it 'converts Array' do
35
+ expect([1,2,3].to_index).to eq DaruLite::Index.new [1,2,3]
36
+ end
37
+
38
+ it 'converts Range' do
39
+ expect((1..3).to_index).to eq DaruLite::Index.new [1,2,3]
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,213 @@
1
+ describe DaruLite::Vector do
2
+ # TODO: Add inspect specs for category
3
+ context '#inspect' do
4
+ context 'simple' do
5
+ subject(:vector) { DaruLite::Vector.new [1,2,3],
6
+ index: [:a, :b, :c], name: 'test'}
7
+ its(:inspect) { is_expected.to eq %Q{
8
+ |#<DaruLite::Vector(3)>
9
+ | test
10
+ | a 1
11
+ | b 2
12
+ | c 3
13
+ }.unindent }
14
+ end
15
+
16
+ context 'no name' do
17
+ subject(:vector) { DaruLite::Vector.new [1,2,3], index: [:a, :b, :c]}
18
+ its(:inspect) { is_expected.to eq %Q{
19
+ |#<DaruLite::Vector(3)>
20
+ | a 1
21
+ | b 2
22
+ | c 3
23
+ }.unindent }
24
+ end
25
+
26
+ context 'with nils' do
27
+ subject(:vector) { DaruLite::Vector.new [1,nil,3],
28
+ index: [:a, :b, :c], name: 'test'}
29
+ its(:inspect) { is_expected.to eq %Q{
30
+ |#<DaruLite::Vector(3)>
31
+ | test
32
+ | a 1
33
+ | b nil
34
+ | c 3
35
+ }.unindent }
36
+ end
37
+
38
+ context 'very large amount of data' do
39
+ subject(:vector) { DaruLite::Vector.new [1,2,3] * 100, name: 'test'}
40
+ its(:inspect) { is_expected.to eq %Q{
41
+ |#<DaruLite::Vector(300)>
42
+ | test
43
+ | 0 1
44
+ | 1 2
45
+ | 2 3
46
+ | 3 1
47
+ | 4 2
48
+ | 5 3
49
+ | 6 1
50
+ | 7 2
51
+ | 8 3
52
+ | 9 1
53
+ | 10 2
54
+ | 11 3
55
+ | 12 1
56
+ | 13 2
57
+ | 14 3
58
+ | ... ...
59
+ }.unindent }
60
+ end
61
+
62
+ context 'really long name or data' do
63
+ subject(:vector) { DaruLite::Vector.new [1,2,'this is ridiculously long'],
64
+ index: [:a, :b, :c], name: 'and this is not much better faithfully'}
65
+ its(:inspect) { is_expected.to eq %Q{
66
+ |#<DaruLite::Vector(3)>
67
+ | and this is not much
68
+ | a 1
69
+ | b 2
70
+ | c this is ridiculously
71
+ }.unindent }
72
+ end
73
+
74
+ context 'with multiindex' do
75
+ subject(:vector) {
76
+ DaruLite::Vector.new(
77
+ [1,2,3,4,5,6,7],
78
+ name: 'test',
79
+ index: DaruLite::MultiIndex.from_tuples([
80
+ %w[foo one],
81
+ %w[foo two],
82
+ %w[foo three],
83
+ %w[bar one],
84
+ %w[bar two],
85
+ %w[bar three],
86
+ %w[baz one],
87
+ ]),
88
+ )
89
+ }
90
+
91
+ its(:inspect) { is_expected.to eq %Q{
92
+ |#<DaruLite::Vector(7)>
93
+ | test
94
+ | foo one 1
95
+ | two 2
96
+ | three 3
97
+ | bar one 4
98
+ | two 5
99
+ | three 6
100
+ | baz one 7
101
+ }.unindent}
102
+ end
103
+
104
+ context 'threshold and spacing settings' do
105
+ end
106
+ end
107
+
108
+ [nil, :category].each do |type|
109
+ context '#to_html' do
110
+ let(:doc) { Nokogiri::HTML(vector.to_html) }
111
+ subject(:table) { doc.at('table') }
112
+ let(:header) { doc.at('b') }
113
+
114
+ context 'simple' do
115
+ let(:vector) { DaruLite::Vector.new [1,nil,3],
116
+ index: [:a, :b, :c], name: 'test', type: type }
117
+ it { is_expected.not_to be_nil }
118
+
119
+ describe 'header' do
120
+ subject { header }
121
+ it { is_expected.not_to be_nil }
122
+ its(:text) { is_expected.to eq " DaruLite::Vector(3)"\
123
+ "#{":category" if type == :category} " }
124
+ end
125
+
126
+ describe 'name' do
127
+ subject(:name) { table.at('thead > tr:first-child > th:nth-child(2)') }
128
+ it { is_expected.not_to be_nil }
129
+ its(:text) { is_expected.to eq 'test' }
130
+
131
+ context 'withought name' do
132
+ let(:vector) { DaruLite::Vector.new [1,nil,3], index: [:a, :b, :c], type: type }
133
+
134
+ it { is_expected.to be_nil }
135
+ end
136
+ end
137
+
138
+ describe 'index' do
139
+ subject(:indexes) { table.search('tr > td:first-child').map(&:text) }
140
+ its(:count) { is_expected.to eq vector.size }
141
+ it { is_expected.to eq vector.index.to_a.map(&:to_s) }
142
+ end
143
+
144
+ describe 'values' do
145
+ subject(:indexes) { table.search('tr > td:last-child').map(&:text) }
146
+ its(:count) { is_expected.to eq vector.size }
147
+ it { is_expected.to eq vector.to_a.map(&:to_s) }
148
+ end
149
+ end
150
+
151
+ context 'large vector' do
152
+ subject(:vector) { DaruLite::Vector.new [1,2,3] * 100, name: 'test', type: type }
153
+ it 'has only 30 rows (+ 1 header rows, + 2 finishing rows)' do
154
+ expect(table.search('tr').size).to eq 33
155
+ end
156
+
157
+ describe '"skipped" row' do
158
+ subject(:row) { table.search('tr:nth-child(31) td').map(&:text) }
159
+ its(:count) { is_expected.to eq 2 }
160
+ it { is_expected.to eq ['...', '...'] }
161
+ end
162
+
163
+ describe 'last row' do
164
+ subject(:row) { table.search('tr:nth-child(32) td').map(&:text) }
165
+ its(:count) { is_expected.to eq 2 }
166
+ it { is_expected.to eq ['299', '3'] }
167
+ end
168
+ end
169
+
170
+ context 'multi-index' do
171
+ subject(:vector) {
172
+ DaruLite::Vector.new(
173
+ [1,2,3,4,5,6,7],
174
+ name: 'test',
175
+ type: type,
176
+ index: DaruLite::MultiIndex.from_tuples([
177
+ %w[foo one],
178
+ %w[foo two],
179
+ %w[foo three],
180
+ %w[bar one],
181
+ %w[bar two],
182
+ %w[bar three],
183
+ %w[baz one],
184
+ ]),
185
+ )
186
+ }
187
+
188
+ describe 'header' do
189
+ subject { header }
190
+ it { is_expected.not_to be_nil }
191
+ its(:text) { is_expected.to eq " DaruLite::Vector(7)"\
192
+ "#{":category" if type == :category} " }
193
+ end
194
+
195
+ describe 'name row' do
196
+ subject(:row) { table.at('thead > tr:nth-child(1)').search('th') }
197
+ its(:count) { should == 2 }
198
+ it { expect(row.first['colspan']).to eq '2' }
199
+ end
200
+
201
+ describe 'first data row' do
202
+ let(:row) { table.at('tbody > tr:first-child') }
203
+ subject { row.inner_html.scan(/<t[dh].+?<\/t[dh]>/) }
204
+ it { is_expected.to eq [
205
+ '<th rowspan="3">foo</th>',
206
+ '<th rowspan="1">one</th>',
207
+ '<td>1</td>'
208
+ ]}
209
+ end
210
+ end
211
+ end
212
+ end
213
+ end
@@ -0,0 +1,87 @@
1
+ require 'rspec'
2
+ require 'rspec/its'
3
+ require 'rspec/expectations'
4
+ require 'matrix'
5
+ require 'awesome_print'
6
+ require 'distribution'
7
+ require 'tempfile'
8
+ require 'pry-byebug'
9
+ require 'nokogiri'
10
+ require 'webmock/rspec'
11
+
12
+ def mri?
13
+ RUBY_ENGINE == 'ruby'
14
+ end
15
+
16
+ def jruby?
17
+ RUBY_ENGINE == 'jruby'
18
+ end
19
+
20
+ RSpec::Expectations.configuration.warn_about_potential_false_positives = false
21
+
22
+ require 'simplecov'
23
+ SimpleCov.start do
24
+ add_filter 'vendor'
25
+ add_filter 'spec'
26
+ # minimum_coverage_by_file 95 -- too strict for now. Reconsider after specs redesign.
27
+ end
28
+
29
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
30
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
31
+ require 'daru_lite'
32
+
33
+ if jruby?
34
+ require 'mdarray'
35
+ end
36
+
37
+ ALL_DTYPES = [:array]
38
+
39
+ # FIXME: This must go! Need to be able to use be_within
40
+ def expect_correct_vector_in_delta v1, v2, delta
41
+ expect(v1.size).to eq(v2.size)
42
+ (0...v1.size).each do |v|
43
+ expect(v1[v]).to be_within(delta).of(v2[v])
44
+ end
45
+ end
46
+
47
+ def expect_correct_df_in_delta df1, df2, delta
48
+ df1.each_vector_with_index do |vector, i|
49
+ expect_correct_vector_in_delta vector, df2[i], delta
50
+ end
51
+ end
52
+
53
+ RSpec::Matchers.define :be_all_within do |delta|
54
+ match do |actual|
55
+ expect(@expected).to_not be_nil
56
+ expect(actual.size).to equal(actual.size)
57
+ (@act, @exp), @idx = actual.zip(@expected).each_with_index.detect { |(a, e), _| (a - e).abs > delta }
58
+ @idx.nil?
59
+ end
60
+
61
+ chain :of do |expected|
62
+ @expected = expected
63
+ end
64
+
65
+ failure_message do |actual|
66
+ return "expected value must be provided using '.of'." if @expected.nil?
67
+ return "expected.size must equal actual.size." if @expected.size != actual.size
68
+ "at index=[#{@idx}], expected '#{actual[@idx]}' to be within '#{delta}' of '#{@expected[@idx]}'."
69
+ end
70
+ end
71
+
72
+ class String
73
+ # allows to pretty test agains multiline strings:
74
+ # %Q{
75
+ # |test
76
+ # |me
77
+ # }.unindent # =>
78
+ # "test
79
+ # me"
80
+ def unindent
81
+ gsub(/\n\s+?\|/, "\n") # for all lines looking like "<spaces>|" -- remove this.
82
+ .gsub(/\|\n/, "\n") # allow to write trailing space not removed by editor
83
+ .gsub(/^\n|\n\s+$/, '') # remove empty strings before and after
84
+ end
85
+ end
86
+
87
+ Dir[File.expand_path('../support/**/*.rb', __FILE__)].each {|f| require f }
@@ -0,0 +1,30 @@
1
+ require 'sqlite3'
2
+ require 'dbi'
3
+ require 'active_record'
4
+
5
+ module DaruLite::RSpec
6
+ class Account < ActiveRecord::Base
7
+ self.table_name = 'accounts'
8
+ end
9
+ end
10
+
11
+ shared_context 'with accounts table in sqlite3 database' do
12
+ let(:db_name) do
13
+ 'daru_lite_test'
14
+ end
15
+
16
+ before do
17
+ # just in case
18
+ FileUtils.rm(db_name) if File.file?(db_name)
19
+
20
+ SQLite3::Database.new(db_name).tap do |db|
21
+ db.execute "create table accounts(id integer, name varchar, age integer, primary key(id))"
22
+ db.execute "insert into accounts values(1, 'Homer', 20)"
23
+ db.execute "insert into accounts values(2, 'Marge', 30)"
24
+ end
25
+ end
26
+
27
+ after do
28
+ FileUtils.rm(db_name)
29
+ end
30
+ end
@@ -0,0 +1,5 @@
1
+ RSpec::Matchers.define :be_boolean do
2
+ match do |actual|
3
+ expect(actual.is_a?(TrueClass) || actual.is_a?(FalseClass)).to be true
4
+ end
5
+ end