daru_lite 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,373 @@
1
+ # -*- coding: utf-8 -*-
2
+ describe DaruLite::IO do
3
+ describe DaruLite::DataFrame do
4
+ context ".from_csv" do
5
+ before do
6
+ %w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file|
7
+ WebMock
8
+ .stub_request(:get,"http://example.com/#{file}.csv")
9
+ .to_return(status: 200, body: File.read("spec/fixtures/#{file}.csv"))
10
+ end
11
+ end
12
+
13
+ it "loads from a CSV file" do
14
+ df = DaruLite::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
15
+ col_sep: ' ', headers: true)
16
+
17
+ df.vectors = [:image_resolution, :true_transform, :mls].to_index
18
+ expect(df.vectors).to eq([:image_resolution, :true_transform, :mls].to_index)
19
+ expect(df[:image_resolution].first).to eq(6.55779)
20
+ expect(df[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
21
+ end
22
+
23
+ it "works properly for repeated headers" do
24
+ df = DaruLite::DataFrame.from_csv('spec/fixtures/repeated_fields.csv',header_converters: :symbol)
25
+ expect(df.vectors.to_a).to eq(["id", "name_1", "age_1", "city", "a1", "name_2", "age_2"])
26
+
27
+ age = DaruLite::Vector.new([3, 4, 5, 6, nil, 8])
28
+ expect(df['age_2']).to eq(age)
29
+ end
30
+
31
+ it "accepts scientific notation as float" do
32
+ ds = DaruLite::DataFrame.from_csv('spec/fixtures/scientific_notation.csv', order: ['x', 'y'])
33
+ expect(ds.vectors.to_a).to eq(['x', 'y'])
34
+ y = [9.629587310436753e+127, 1.9341543147883677e+129, 3.88485279048245e+130]
35
+ y.zip(ds['y']).each do |y_expected, y_ds|
36
+ expect(y_ds).to be_within(0.001).of(y_expected)
37
+ end
38
+ end
39
+
40
+ it "follows the order of columns given in CSV" do
41
+ df = DaruLite::DataFrame.from_csv 'spec/fixtures/sales-funnel.csv'
42
+ expect(df.vectors.to_a).to eq(%W[Account Name Rep Manager Product Quantity Price Status])
43
+ end
44
+
45
+ it "handles empty rows in the CSV" do
46
+ df = DaruLite::DataFrame.from_csv 'spec/fixtures/empty_rows_test.csv'
47
+ expect(df.nrows).to eq(13)
48
+ end
49
+
50
+ it "uses the custom boolean converter correctly" do
51
+ df = DaruLite::DataFrame.from_csv 'spec/fixtures/boolean_converter_test.csv', converters: [:boolean]
52
+ expect(df['Domestic'].to_a).to all be_boolean
53
+ end
54
+
55
+ it "uses the custom string converter correctly" do
56
+ df = DaruLite::DataFrame.from_csv 'spec/fixtures/string_converter_test.csv', converters: [:string]
57
+ expect(df['Case Number'].to_a.all? {|x| String === x }).to be_truthy
58
+ end
59
+
60
+ it "allow symbol to converters option" do
61
+ df = DaruLite::DataFrame.from_csv 'spec/fixtures/boolean_converter_test.csv', converters: :boolean
62
+ expect(df['Domestic'].to_a).to all be_boolean
63
+ end
64
+
65
+ it "checks for equal parsing of local CSV files and remote CSV files" do
66
+ %w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file|
67
+ df_local = DaruLite::DataFrame.from_csv("spec/fixtures/#{file}.csv")
68
+ df_remote = DaruLite::DataFrame.from_csv("http://example.com/#{file}.csv")
69
+ expect(df_local).to eq(df_remote)
70
+ end
71
+ end
72
+ end
73
+
74
+ context "#write_csv" do
75
+ before do
76
+ @df = DaruLite::DataFrame.new({
77
+ 'a' => [1,2,3,4,5],
78
+ 'b' => [11,22,33,44,55],
79
+ 'c' => ['a', 'g', 4, 5,'addadf'],
80
+ 'd' => [nil, 23, 4,'a','ff']})
81
+ @tempfile = Tempfile.new('data.csv')
82
+
83
+ end
84
+
85
+ it "writes DataFrame to a CSV file" do
86
+ @df.write_csv @tempfile.path
87
+ expect(DaruLite::DataFrame.from_csv(@tempfile.path)).to eq(@df)
88
+ end
89
+
90
+ it "will write headers unless headers=false" do
91
+ @df.write_csv @tempfile.path
92
+ first_line = File.open(@tempfile.path, &:readline).chomp.split(',', -1)
93
+ expect(first_line).to eq @df.vectors.to_a
94
+ end
95
+
96
+ it "will not write headers when headers=false" do
97
+ @df.write_csv @tempfile.path, { headers: false }
98
+ first_line = File.open(@tempfile.path, &:readline).chomp.split(',', -1)
99
+ expect(first_line).to eq @df.head(1).map { |v| (v.first || '').to_s }
100
+ end
101
+
102
+ end
103
+
104
+ context ".from_excel" do
105
+ before do
106
+ id = DaruLite::Vector.new([1, 2, 3, 4, 5, 6])
107
+ name = DaruLite::Vector.new(%w(Alex Claude Peter Franz George Fernand))
108
+ age = DaruLite::Vector.new( [20, 23, 25, nil, 5.5, nil])
109
+ city = DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome', nil])
110
+ a1 = DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c', nil])
111
+ @expected = DaruLite::DataFrame.new({
112
+ :id => id, :name => name, :age => age, :city => city, :a1 => a1
113
+ }, order: [:id, :name, :age, :city, :a1])
114
+ end
115
+
116
+ it "loads DataFrame from an Excel Spreadsheet" do
117
+ df = DaruLite::DataFrame.from_excel 'spec/fixtures/test_xls.xls'
118
+
119
+ expect(df.nrows).to eq(6)
120
+ expect(df.vectors.to_a).to eq([:id, :name, :age, :city, :a1])
121
+ expect(df[:age][5]).to eq(nil)
122
+ expect(@expected).to eq(df)
123
+ end
124
+ end
125
+
126
+ context "#from_excel with row_id" do
127
+ before do
128
+ id = DaruLite::Vector.new(['id', 1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
129
+ name = DaruLite::Vector.new(%w(name Alex Claude Peter Franz George Fernand))
130
+ age = DaruLite::Vector.new(['age', 20.0, 23.0, 25.0, nil, 5.5, nil])
131
+ city = DaruLite::Vector.new(['city', 'New York', 'London', 'London', 'Paris', 'Tome', nil])
132
+ a1 = DaruLite::Vector.new(['a1', 'a,b', 'b,c', 'a', nil, 'a,b,c', nil])
133
+ @expected_1 = DaruLite::DataFrame.new({:id2 => id, :name2 => name, :age2 => age}, order: [:id2, :name2, :age2])
134
+ @expected_2 = DaruLite::DataFrame.new({
135
+ :id => id, :name => name, :age => age, :city => city, :a1 => a1
136
+ }, order: [:id, :name, :age, :city, :a1])
137
+ end
138
+
139
+ it "loads DataFrame from test_xls_2.xls" do
140
+ df = DaruLite::DataFrame.from_excel 'spec/fixtures/test_xls_2.xls'
141
+
142
+ expect(df.nrows).to eq(7)
143
+ expect(df.vectors.to_a).to eq([:id2, :name2, :age2])
144
+ expect(df[:age2][6]).to eq(nil)
145
+ expect(@expected_1).to eq(df)
146
+ end
147
+
148
+ it "loads DataFrame from test_xls_2.xls with row_id" do
149
+ df = DaruLite::DataFrame.from_excel 'spec/fixtures/test_xls_2.xls', {row_id: 1}
150
+
151
+ expect(df.nrows).to eq(7)
152
+ expect(df.vectors.to_a).to eq([:id, :name, :age, :city, :a1])
153
+ expect(df[:age][6]).to eq(nil)
154
+ expect(@expected_2).to eq(df)
155
+ end
156
+ end
157
+
158
+ context "#write_excel" do
159
+ before do
160
+ a = DaruLite::Vector.new(100.times.map { rand(100) })
161
+ b = DaruLite::Vector.new((['b'] * 100))
162
+ @expected = DaruLite::DataFrame.new({ :b => b, :a => a })
163
+
164
+ tempfile = Tempfile.new('test_write.xls')
165
+
166
+ @expected.write_excel tempfile.path
167
+ @df = DaruLite::DataFrame.from_excel tempfile.path
168
+ end
169
+
170
+ it "correctly writes DataFrame to an Excel Spreadsheet" do
171
+ expect(@expected).to eq(@df)
172
+ end
173
+ end
174
+
175
+ context ".from_sql" do
176
+ include_context 'with accounts table in sqlite3 database'
177
+
178
+ context 'with a database handler of DBI' do
179
+ let(:db) do
180
+ DBI.connect("DBI:SQLite3:#{db_name}")
181
+ end
182
+
183
+ subject { DaruLite::DataFrame.from_sql(db, "select * from accounts") }
184
+
185
+ it "loads data from an SQL database" do
186
+ accounts = subject
187
+ expect(accounts.class).to eq DaruLite::DataFrame
188
+ expect(accounts.nrows).to eq 2
189
+ expect(accounts.row[0][:id]).to eq 1
190
+ expect(accounts.row[0][:name]).to eq "Homer"
191
+ end
192
+ end
193
+
194
+ context 'with a database connection of ActiveRecord' do
195
+ let(:connection) do
196
+ DaruLite::RSpec::Account.establish_connection "sqlite3:#{db_name}"
197
+ DaruLite::RSpec::Account.connection
198
+ end
199
+
200
+ subject do
201
+ DaruLite::DataFrame.from_sql(connection, "select * from accounts")
202
+ end
203
+
204
+ it "loads data from an SQL database" do
205
+ accounts = subject
206
+ expect(accounts.class).to eq DaruLite::DataFrame
207
+ expect(accounts.nrows).to eq 2
208
+ expect(accounts.row[0][:id]).to eq 1
209
+ expect(accounts.row[0][:name]).to eq "Homer"
210
+ end
211
+ end
212
+ end
213
+
214
+ context "#write_sql" do
215
+ let(:df) { DaruLite::DataFrame.new({
216
+ 'a' => [1,2,3,4,5],
217
+ 'b' => [11,22,33,44,55],
218
+ 'c' => ['a', 'g', 4, 5,'addadf'],
219
+ 'd' => [nil, 23, 4,'a','ff']})
220
+ }
221
+
222
+ let(:dbh) { double }
223
+ let(:prepared_query) { double }
224
+
225
+ it "writes the DataFrame to an SQL database" do
226
+ expect(dbh).to receive(:prepare)
227
+ .with('INSERT INTO tbl (a,b,c,d) VALUES (?,?,?,?)')
228
+ .and_return(prepared_query)
229
+ df.each_row { |r| expect(prepared_query).to receive(:execute).with(*r.to_a).ordered }
230
+
231
+ df.write_sql dbh, 'tbl'
232
+ end
233
+ end
234
+
235
+ context '.from_activerecord' do
236
+ include_context 'with accounts table in sqlite3 database'
237
+
238
+ context 'with ActiveRecord::Relation' do
239
+ before do
240
+ DaruLite::RSpec::Account.establish_connection "sqlite3:#{db_name}"
241
+ end
242
+
243
+ let(:relation) do
244
+ DaruLite::RSpec::Account.all
245
+ end
246
+
247
+ context 'without specifying field names' do
248
+ subject do
249
+ DaruLite::DataFrame.from_activerecord(relation)
250
+ end
251
+
252
+ it 'loads data from an AR::Relation object' do
253
+ accounts = subject
254
+ expect(accounts.class).to eq DaruLite::DataFrame
255
+ expect(accounts.nrows).to eq 2
256
+ expect(accounts.vectors.to_a).to eq [:id, :name, :age]
257
+ expect(accounts.row[0][:id]).to eq 1
258
+ expect(accounts.row[0][:name]).to eq 'Homer'
259
+ expect(accounts.row[0][:age]).to eq 20
260
+ end
261
+ end
262
+
263
+ context 'with specifying field names in parameters' do
264
+ subject do
265
+ DaruLite::DataFrame.from_activerecord(relation, :name, :age)
266
+ end
267
+
268
+ it 'loads data from an AR::Relation object' do
269
+ accounts = subject
270
+ expect(accounts.class).to eq DaruLite::DataFrame
271
+ expect(accounts.nrows).to eq 2
272
+ expect(accounts.vectors.to_a).to eq [:name, :age]
273
+ expect(accounts.row[0][:name]).to eq 'Homer'
274
+ expect(accounts.row[0][:age]).to eq 20
275
+ end
276
+ end
277
+ end
278
+ end
279
+
280
+ context ".from_plaintext" do
281
+ it "reads data from plain text files" do
282
+ df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6]
283
+
284
+ expect(df.vectors.to_a).to eq([:v1,:v2,:v3,:v4,:v5,:v6])
285
+ end
286
+
287
+ xit "understands empty fields" do
288
+ pending 'See FIXME note in io.rb'
289
+
290
+ df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/empties.dat', [:v1,:v2,:v3]
291
+
292
+ expect(df.row[1].to_a).to eq [4, nil, 6]
293
+ end
294
+
295
+ it "understands non-numeric fields" do
296
+ df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/strings.dat', [:v1,:v2,:v3]
297
+
298
+ expect(df[:v1].to_a).to eq ['test', 'foo']
299
+ end
300
+ end
301
+
302
+ context "JSON" do
303
+ it "loads parsed JSON" do
304
+ require 'json'
305
+
306
+ json = File.read 'spec/fixtures/countries.json'
307
+ df = DaruLite::DataFrame.new JSON.parse(json)
308
+
309
+ expect(df.vectors).to eq([
310
+ 'name', 'nativeName', 'tld', 'cca2', 'ccn3', 'cca3', 'currency', 'callingCode',
311
+ 'capital', 'altSpellings', 'relevance', 'region', 'subregion', 'language',
312
+ 'languageCodes', 'translations', 'latlng', 'demonym', 'borders', 'area'].to_index)
313
+
314
+ expect(df.row[0]['name']).to eq("Afghanistan")
315
+ end
316
+ end
317
+
318
+ context "Marshalling" do
319
+ it "" do
320
+ vector = DaruLite::Vector.new (0..100).collect { |_n| rand(100) }
321
+ dataframe = DaruLite::Vector.new({a: vector, b: vector, c: vector})
322
+ expect(Marshal.load(Marshal.dump(dataframe))).to eq(dataframe)
323
+ end
324
+ end
325
+
326
+ context "#save" do
327
+ before do
328
+ @data_frame = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
329
+ c: [11,22,33,44,55]},
330
+ order: [:a, :b, :c],
331
+ index: [:one, :two, :three, :four, :five])
332
+ end
333
+
334
+ it "saves df to a file" do
335
+ outfile = Tempfile.new('dataframe.df')
336
+ @data_frame.save(outfile.path)
337
+ a = DaruLite::IO.load(outfile.path)
338
+ expect(a).to eq(@data_frame)
339
+ end
340
+ end
341
+ end
342
+
343
+ describe DaruLite::Vector do
344
+ context "Marshalling" do
345
+ it "" do
346
+ vector = DaruLite::Vector.new (0..100).collect { |_n| rand(100) }
347
+ expect(Marshal.load(Marshal.dump(vector))).to eq(vector)
348
+ end
349
+ end
350
+
351
+ context "#save" do
352
+ ALL_DTYPES.each do |dtype|
353
+ it "saves to a file and returns the same Vector of type #{dtype}" do
354
+ vector = DaruLite::Vector.new(
355
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, 11, -99, -99],
356
+ dtype: dtype)
357
+ outfile = Tempfile.new('vector.vec')
358
+ vector.save(outfile.path)
359
+ expect(DaruLite::IO.load(outfile.path)).to eq(vector)
360
+ end
361
+ end
362
+ end
363
+ end
364
+
365
+ describe DaruLite::Index do
366
+ context "Marshalling" do
367
+ it "" do
368
+ i = DaruLite::Index.new([:a, :b, :c, :d, :e])
369
+ expect(Marshal.load(Marshal.dump(i))).to eq(i)
370
+ end
371
+ end
372
+ end
373
+ end
@@ -0,0 +1,56 @@
1
+ require 'daru_lite/io/sql_data_source'
2
+ require 'sqlite3'
3
+ require 'dbi'
4
+ require 'active_record'
5
+
6
+ RSpec.describe DaruLite::IO::SqlDataSource do
7
+ include_context 'with accounts table in sqlite3 database'
8
+
9
+ let(:query) do
10
+ 'select * from accounts'
11
+ end
12
+
13
+ let(:source) do
14
+ ActiveRecord::Base.establish_connection("sqlite3:#{db_name}")
15
+ ActiveRecord::Base.connection
16
+ end
17
+
18
+ describe '.make_dataframe' do
19
+ subject(:df) { DaruLite::IO::SqlDataSource.make_dataframe(source, query) }
20
+
21
+ context 'with DBI::DatabaseHandle' do
22
+ let(:source) { DBI.connect("DBI:SQLite3:#{db_name}") }
23
+ it { is_expected.to be_a(DaruLite::DataFrame) }
24
+ it { expect(df.row[0]).to have_attributes(id: 1, age: 20) }
25
+ its(:nrows) { is_expected.to eq 2 }
26
+ end
27
+
28
+ context 'with ActiveRecord::Connection' do
29
+ it { is_expected.to be_a(DaruLite::DataFrame) }
30
+ it { expect(df.row[0]).to have_attributes(id: 1, age: 20) }
31
+ its(:nrows) { is_expected.to eq 2 }
32
+ end
33
+
34
+ context 'with path to sqlite3 file' do
35
+ let(:source) { db_name }
36
+ it { is_expected.to be_a(DaruLite::DataFrame) }
37
+ it { expect(df.row[0]).to have_attributes(id: 1, age: 20) }
38
+ its(:nrows) { is_expected.to eq 2 }
39
+ end
40
+
41
+ context 'with an object not a string as a query' do
42
+ let(:query) { Object.new }
43
+ it { expect { df }.to raise_error(ArgumentError) }
44
+ end
45
+
46
+ context 'with an object not a database connection' do
47
+ let(:source) { Object.new }
48
+ it { expect { df }.to raise_error(ArgumentError) }
49
+ end
50
+
51
+ context 'with path to unsupported db file' do
52
+ let(:source) { 'spec/fixtures/bank2.dat' }
53
+ it { expect { df }.to raise_error(ArgumentError) }
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,170 @@
1
+ describe DaruLite::DataFrame, '#to_html' do
2
+ let(:doc) { Nokogiri::HTML(df.to_html) }
3
+ subject(:table) { doc.at('table') }
4
+ let(:header) { doc.at('b')}
5
+ let(:name) { 'test' }
6
+
7
+ let(:splitted_row) { row.inner_html.scan(/<t[dh].+?<\/t[dh]>/) }
8
+
9
+ context 'simple' do
10
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, name: name)}
11
+
12
+ describe 'header' do
13
+ subject { header }
14
+
15
+ it { is_expected.not_to be_nil }
16
+ its(:text) { is_expected.to eq " DaruLite::DataFrame: test (3x3) " }
17
+
18
+ context 'without name' do
19
+ let(:name) { nil }
20
+
21
+ its(:text) { is_expected.to eq " DaruLite::DataFrame(3x3) " }
22
+ end
23
+ end
24
+
25
+ describe 'column headers' do
26
+ subject(:columns) { table.search('tr:nth-child(1) th').map(&:text) }
27
+ its(:size) { is_expected.to eq df.ncols + 1 }
28
+ it { is_expected.to eq ['', 'a', 'b', 'c'] }
29
+ end
30
+
31
+ context 'with multi-index columns' do
32
+ before { df.vectors = DaruLite::MultiIndex.from_tuples [[:a, :foo], [:a, :baz], [:b, :foo]] }
33
+
34
+ subject { splitted_row }
35
+ describe 'first row' do
36
+ let(:row) { table.search('thead > tr:nth-child(1)') }
37
+
38
+ it { is_expected.to eq [
39
+ '<th rowspan="2"></th>',
40
+ '<th colspan="2">a</th>',
41
+ '<th colspan="1">b</th>'
42
+ ] }
43
+ end
44
+
45
+ describe 'next row' do
46
+ let(:row) { table.search('thead > tr:nth-child(2)') }
47
+
48
+ it { is_expected.to eq [
49
+ '<th colspan="1">foo</th>',
50
+ '<th colspan="1">baz</th>',
51
+ '<th colspan="1">foo</th>'
52
+ ] }
53
+ end
54
+ end
55
+
56
+ describe 'index' do
57
+ subject(:indexes) { table.search('tr > td:first-child').map(&:text) }
58
+ its(:count) { is_expected.to eq df.nrows }
59
+ it { is_expected.to eq df.index.to_a.map(&:to_s) }
60
+ end
61
+
62
+ describe 'values' do
63
+ subject(:values) {
64
+ table.search('tr')[1..-1]
65
+ .map { |tr| tr.search('td')[1..-1].map(&:text) }
66
+ }
67
+ its(:count) { is_expected.to eq df.nrows }
68
+ it { is_expected.to eq df.map_rows{|r| r.map(&:to_s)} }
69
+ end
70
+ end
71
+
72
+ context 'large dataframe' do
73
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3]*100, b: [3,4,5]*100, c: [6,7,8]*100}, name: 'test') }
74
+
75
+ describe 'header' do
76
+ subject { header }
77
+
78
+ its(:text) { is_expected.to eq " DaruLite::DataFrame: test (300x3) " }
79
+ end
80
+
81
+ it 'has only 30 rows (+ 1 header rows, + 2 finishing rows)' do
82
+ expect(table.search('tr').size).to eq 33
83
+ end
84
+
85
+ describe '"skipped" row' do
86
+ subject(:row) { table.search('tr:nth-child(31) td').map(&:text) }
87
+ its(:count) { is_expected.to eq df.ncols + 1 }
88
+ it { is_expected.to all eq '...' }
89
+ end
90
+
91
+ describe 'last row' do
92
+ subject(:row) { table.search('tr:nth-child(32) td').map(&:text) }
93
+ its(:count) { is_expected.to eq df.ncols + 1 }
94
+ it { is_expected.to eq ['299', *df.row[-1].map(&:to_s)] }
95
+ end
96
+ end
97
+
98
+ context 'with multi-index' do
99
+ let(:df) {
100
+ DaruLite::DataFrame.new(
101
+ {
102
+ a: [1,2,3,4,5,6,7],
103
+ b: %w[a b c d e f g]
104
+ }, index: DaruLite::MultiIndex.from_tuples([
105
+ %w[foo one],
106
+ %w[foo two],
107
+ %w[foo three],
108
+ %w[bar one],
109
+ %w[bar two],
110
+ %w[bar three],
111
+ %w[baz one],
112
+ ]),
113
+ name: 'test'
114
+ )
115
+ }
116
+
117
+ describe 'header' do
118
+ subject { header }
119
+
120
+ it { is_expected.not_to be_nil }
121
+ its(:text) { is_expected.to eq " DaruLite::DataFrame: test (7x2) " }
122
+ end
123
+
124
+ describe 'column headers' do
125
+ let(:row) { table.search('thead > tr:nth-child(1)') }
126
+ subject { splitted_row }
127
+
128
+ it { is_expected.to eq [
129
+ '<th colspan="2"></th>',
130
+ '<th>a</th>',
131
+ '<th>b</th>'
132
+ ]}
133
+ end
134
+
135
+ context 'with multi-index columns' do
136
+ before { df.vectors = DaruLite::MultiIndex.from_tuples [[:a, :foo], [:a, :baz]] }
137
+
138
+ subject { splitted_row }
139
+ describe 'first row' do
140
+ let(:row) { table.search('thead > tr:nth-child(1)') }
141
+
142
+ it { is_expected.to eq [
143
+ '<th colspan="2" rowspan="2"></th>',
144
+ '<th colspan="2">a</th>',
145
+ ] }
146
+ end
147
+
148
+ describe 'next row' do
149
+ let(:row) { table.search('thead > tr:nth-child(2)') }
150
+
151
+ it { is_expected.to eq [
152
+ '<th colspan="1">foo</th>',
153
+ '<th colspan="1">baz</th>',
154
+ ] }
155
+ end
156
+ end
157
+
158
+ describe 'first row' do
159
+ let(:row) { table.search('tbody > tr:nth-child(1)') }
160
+ subject { splitted_row }
161
+
162
+ it { is_expected.to eq [
163
+ '<th rowspan="3">foo</th>',
164
+ '<th rowspan="1">one</th>',
165
+ '<td>1</td>',
166
+ '<td>a</td>'
167
+ ]}
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,49 @@
1
+ describe DaruLite::IRuby::Helpers do
2
+ context 'MultiIndex' do
3
+ let(:index) {
4
+ DaruLite::MultiIndex.from_tuples [
5
+ [:a,:one,:bar],
6
+ [:a,:one,:baz],
7
+ [:a,:two,:bar],
8
+ [:a,:two,:baz],
9
+ [:b,:one,:bar],
10
+ [:b,:two,:bar],
11
+ [:b,:two,:baz],
12
+ [:b,:one,:foo],
13
+ [:c,:one,:bar],
14
+ [:c,:one,:baz],
15
+ [:c,:two,:foo],
16
+ [:c,:two,:bar]
17
+ ]
18
+ }
19
+
20
+ context '#tuples_with_rowspans' do
21
+ subject { described_class.tuples_with_rowspans(index) }
22
+
23
+ it { is_expected.to eq [
24
+ [[:a,4],[:one,2],[:bar,1]],
25
+ [ [:baz,1]],
26
+ [ [:two,2],[:bar,1]],
27
+ [ [:baz,1]],
28
+ [[:b,4],[:one,1],[:bar,1]],
29
+ [ [:two,2],[:bar,1]],
30
+ [ [:baz,1]],
31
+ [ [:one,1],[:foo,1]],
32
+ [[:c,4],[:one,2],[:bar,1]],
33
+ [ [:baz,1]],
34
+ [ [:two,2],[:foo,1]],
35
+ [ [:bar,1]]
36
+ ]}
37
+ end
38
+
39
+ context '#tuples_with_colspans' do
40
+ subject { described_class.tuples_with_colspans(index) }
41
+
42
+ it { is_expected.to eq [
43
+ [[:a, 4], [:b, 4], [:c, 4]],
44
+ [[:one, 2], [:two, 2], [:one, 1], [:two, 2], [:one, 1], [:one, 2], [:two, 2]],
45
+ [[:bar, 1], [:baz, 1], [:bar, 1], [:baz, 1], [:bar, 1], [:bar, 1], [:baz, 1], [:foo, 1], [:bar, 1], [:baz, 1], [:foo, 1], [:bar, 1]]
46
+ ]}
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,37 @@
1
+ describe DaruLite::MultiIndex, '#to_html' do
2
+ let(:index) {
3
+ DaruLite::MultiIndex.from_tuples [
4
+ [:a,:one,:bar],
5
+ [:a,:one,:baz],
6
+ [:a,:two,:bar],
7
+ [:a,:two,:baz],
8
+ [:b,:one,:bar],
9
+ [:b,:two,:bar],
10
+ [:b,:two,:baz],
11
+ [:b,:one,:foo],
12
+ [:c,:one,:bar],
13
+ [:c,:one,:baz],
14
+ [:c,:two,:foo],
15
+ [:c,:two,:bar]
16
+ ]
17
+ }
18
+
19
+ let(:table) { Nokogiri::HTML(index.to_html) }
20
+
21
+ describe 'first row' do
22
+ subject { table.at('tr:first-child > th') }
23
+ its(['colspan']) { is_expected.to eq '3' }
24
+ its(:text) { is_expected.to eq 'DaruLite::MultiIndex(12x3)' }
25
+ end
26
+
27
+ describe 'next row' do
28
+ let(:row) { table.at('tr:nth-child(2)') }
29
+ subject { row.inner_html.scan(/<th.+?<\/th>/) }
30
+
31
+ it { is_expected.to eq [
32
+ '<th rowspan="4">a</th>',
33
+ '<th rowspan="2">one</th>',
34
+ '<th rowspan="1">bar</th>'
35
+ ]}
36
+ end
37
+ end