daru_lite 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,373 @@
1
+ # -*- coding: utf-8 -*-
2
+ describe DaruLite::IO do
3
+ describe DaruLite::DataFrame do
4
+ context ".from_csv" do
5
+ before do
6
+ %w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file|
7
+ WebMock
8
+ .stub_request(:get,"http://example.com/#{file}.csv")
9
+ .to_return(status: 200, body: File.read("spec/fixtures/#{file}.csv"))
10
+ end
11
+ end
12
+
13
+ it "loads from a CSV file" do
14
+ df = DaruLite::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
15
+ col_sep: ' ', headers: true)
16
+
17
+ df.vectors = [:image_resolution, :true_transform, :mls].to_index
18
+ expect(df.vectors).to eq([:image_resolution, :true_transform, :mls].to_index)
19
+ expect(df[:image_resolution].first).to eq(6.55779)
20
+ expect(df[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
21
+ end
22
+
23
+ it "works properly for repeated headers" do
24
+ df = DaruLite::DataFrame.from_csv('spec/fixtures/repeated_fields.csv',header_converters: :symbol)
25
+ expect(df.vectors.to_a).to eq(["id", "name_1", "age_1", "city", "a1", "name_2", "age_2"])
26
+
27
+ age = DaruLite::Vector.new([3, 4, 5, 6, nil, 8])
28
+ expect(df['age_2']).to eq(age)
29
+ end
30
+
31
+ it "accepts scientific notation as float" do
32
+ ds = DaruLite::DataFrame.from_csv('spec/fixtures/scientific_notation.csv', order: ['x', 'y'])
33
+ expect(ds.vectors.to_a).to eq(['x', 'y'])
34
+ y = [9.629587310436753e+127, 1.9341543147883677e+129, 3.88485279048245e+130]
35
+ y.zip(ds['y']).each do |y_expected, y_ds|
36
+ expect(y_ds).to be_within(0.001).of(y_expected)
37
+ end
38
+ end
39
+
40
+ it "follows the order of columns given in CSV" do
41
+ df = DaruLite::DataFrame.from_csv 'spec/fixtures/sales-funnel.csv'
42
+ expect(df.vectors.to_a).to eq(%W[Account Name Rep Manager Product Quantity Price Status])
43
+ end
44
+
45
+ it "handles empty rows in the CSV" do
46
+ df = DaruLite::DataFrame.from_csv 'spec/fixtures/empty_rows_test.csv'
47
+ expect(df.nrows).to eq(13)
48
+ end
49
+
50
+ it "uses the custom boolean converter correctly" do
51
+ df = DaruLite::DataFrame.from_csv 'spec/fixtures/boolean_converter_test.csv', converters: [:boolean]
52
+ expect(df['Domestic'].to_a).to all be_boolean
53
+ end
54
+
55
+ it "uses the custom string converter correctly" do
56
+ df = DaruLite::DataFrame.from_csv 'spec/fixtures/string_converter_test.csv', converters: [:string]
57
+ expect(df['Case Number'].to_a.all? {|x| String === x }).to be_truthy
58
+ end
59
+
60
+ it "allow symbol to converters option" do
61
+ df = DaruLite::DataFrame.from_csv 'spec/fixtures/boolean_converter_test.csv', converters: :boolean
62
+ expect(df['Domestic'].to_a).to all be_boolean
63
+ end
64
+
65
+ it "checks for equal parsing of local CSV files and remote CSV files" do
66
+ %w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file|
67
+ df_local = DaruLite::DataFrame.from_csv("spec/fixtures/#{file}.csv")
68
+ df_remote = DaruLite::DataFrame.from_csv("http://example.com/#{file}.csv")
69
+ expect(df_local).to eq(df_remote)
70
+ end
71
+ end
72
+ end
73
+
74
+ context "#write_csv" do
75
+ before do
76
+ @df = DaruLite::DataFrame.new({
77
+ 'a' => [1,2,3,4,5],
78
+ 'b' => [11,22,33,44,55],
79
+ 'c' => ['a', 'g', 4, 5,'addadf'],
80
+ 'd' => [nil, 23, 4,'a','ff']})
81
+ @tempfile = Tempfile.new('data.csv')
82
+
83
+ end
84
+
85
+ it "writes DataFrame to a CSV file" do
86
+ @df.write_csv @tempfile.path
87
+ expect(DaruLite::DataFrame.from_csv(@tempfile.path)).to eq(@df)
88
+ end
89
+
90
+ it "will write headers unless headers=false" do
91
+ @df.write_csv @tempfile.path
92
+ first_line = File.open(@tempfile.path, &:readline).chomp.split(',', -1)
93
+ expect(first_line).to eq @df.vectors.to_a
94
+ end
95
+
96
+ it "will not write headers when headers=false" do
97
+ @df.write_csv @tempfile.path, { headers: false }
98
+ first_line = File.open(@tempfile.path, &:readline).chomp.split(',', -1)
99
+ expect(first_line).to eq @df.head(1).map { |v| (v.first || '').to_s }
100
+ end
101
+
102
+ end
103
+
104
+ context ".from_excel" do
105
+ before do
106
+ id = DaruLite::Vector.new([1, 2, 3, 4, 5, 6])
107
+ name = DaruLite::Vector.new(%w(Alex Claude Peter Franz George Fernand))
108
+ age = DaruLite::Vector.new( [20, 23, 25, nil, 5.5, nil])
109
+ city = DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome', nil])
110
+ a1 = DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c', nil])
111
+ @expected = DaruLite::DataFrame.new({
112
+ :id => id, :name => name, :age => age, :city => city, :a1 => a1
113
+ }, order: [:id, :name, :age, :city, :a1])
114
+ end
115
+
116
+ it "loads DataFrame from an Excel Spreadsheet" do
117
+ df = DaruLite::DataFrame.from_excel 'spec/fixtures/test_xls.xls'
118
+
119
+ expect(df.nrows).to eq(6)
120
+ expect(df.vectors.to_a).to eq([:id, :name, :age, :city, :a1])
121
+ expect(df[:age][5]).to eq(nil)
122
+ expect(@expected).to eq(df)
123
+ end
124
+ end
125
+
126
+ context "#from_excel with row_id" do
127
+ before do
128
+ id = DaruLite::Vector.new(['id', 1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
129
+ name = DaruLite::Vector.new(%w(name Alex Claude Peter Franz George Fernand))
130
+ age = DaruLite::Vector.new(['age', 20.0, 23.0, 25.0, nil, 5.5, nil])
131
+ city = DaruLite::Vector.new(['city', 'New York', 'London', 'London', 'Paris', 'Tome', nil])
132
+ a1 = DaruLite::Vector.new(['a1', 'a,b', 'b,c', 'a', nil, 'a,b,c', nil])
133
+ @expected_1 = DaruLite::DataFrame.new({:id2 => id, :name2 => name, :age2 => age}, order: [:id2, :name2, :age2])
134
+ @expected_2 = DaruLite::DataFrame.new({
135
+ :id => id, :name => name, :age => age, :city => city, :a1 => a1
136
+ }, order: [:id, :name, :age, :city, :a1])
137
+ end
138
+
139
+ it "loads DataFrame from test_xls_2.xls" do
140
+ df = DaruLite::DataFrame.from_excel 'spec/fixtures/test_xls_2.xls'
141
+
142
+ expect(df.nrows).to eq(7)
143
+ expect(df.vectors.to_a).to eq([:id2, :name2, :age2])
144
+ expect(df[:age2][6]).to eq(nil)
145
+ expect(@expected_1).to eq(df)
146
+ end
147
+
148
+ it "loads DataFrame from test_xls_2.xls with row_id" do
149
+ df = DaruLite::DataFrame.from_excel 'spec/fixtures/test_xls_2.xls', {row_id: 1}
150
+
151
+ expect(df.nrows).to eq(7)
152
+ expect(df.vectors.to_a).to eq([:id, :name, :age, :city, :a1])
153
+ expect(df[:age][6]).to eq(nil)
154
+ expect(@expected_2).to eq(df)
155
+ end
156
+ end
157
+
158
+ context "#write_excel" do
159
+ before do
160
+ a = DaruLite::Vector.new(100.times.map { rand(100) })
161
+ b = DaruLite::Vector.new((['b'] * 100))
162
+ @expected = DaruLite::DataFrame.new({ :b => b, :a => a })
163
+
164
+ tempfile = Tempfile.new('test_write.xls')
165
+
166
+ @expected.write_excel tempfile.path
167
+ @df = DaruLite::DataFrame.from_excel tempfile.path
168
+ end
169
+
170
+ it "correctly writes DataFrame to an Excel Spreadsheet" do
171
+ expect(@expected).to eq(@df)
172
+ end
173
+ end
174
+
175
+ context ".from_sql" do
176
+ include_context 'with accounts table in sqlite3 database'
177
+
178
+ context 'with a database handler of DBI' do
179
+ let(:db) do
180
+ DBI.connect("DBI:SQLite3:#{db_name}")
181
+ end
182
+
183
+ subject { DaruLite::DataFrame.from_sql(db, "select * from accounts") }
184
+
185
+ it "loads data from an SQL database" do
186
+ accounts = subject
187
+ expect(accounts.class).to eq DaruLite::DataFrame
188
+ expect(accounts.nrows).to eq 2
189
+ expect(accounts.row[0][:id]).to eq 1
190
+ expect(accounts.row[0][:name]).to eq "Homer"
191
+ end
192
+ end
193
+
194
+ context 'with a database connection of ActiveRecord' do
195
+ let(:connection) do
196
+ DaruLite::RSpec::Account.establish_connection "sqlite3:#{db_name}"
197
+ DaruLite::RSpec::Account.connection
198
+ end
199
+
200
+ subject do
201
+ DaruLite::DataFrame.from_sql(connection, "select * from accounts")
202
+ end
203
+
204
+ it "loads data from an SQL database" do
205
+ accounts = subject
206
+ expect(accounts.class).to eq DaruLite::DataFrame
207
+ expect(accounts.nrows).to eq 2
208
+ expect(accounts.row[0][:id]).to eq 1
209
+ expect(accounts.row[0][:name]).to eq "Homer"
210
+ end
211
+ end
212
+ end
213
+
214
+ context "#write_sql" do
215
+ let(:df) { DaruLite::DataFrame.new({
216
+ 'a' => [1,2,3,4,5],
217
+ 'b' => [11,22,33,44,55],
218
+ 'c' => ['a', 'g', 4, 5,'addadf'],
219
+ 'd' => [nil, 23, 4,'a','ff']})
220
+ }
221
+
222
+ let(:dbh) { double }
223
+ let(:prepared_query) { double }
224
+
225
+ it "writes the DataFrame to an SQL database" do
226
+ expect(dbh).to receive(:prepare)
227
+ .with('INSERT INTO tbl (a,b,c,d) VALUES (?,?,?,?)')
228
+ .and_return(prepared_query)
229
+ df.each_row { |r| expect(prepared_query).to receive(:execute).with(*r.to_a).ordered }
230
+
231
+ df.write_sql dbh, 'tbl'
232
+ end
233
+ end
234
+
235
+ context '.from_activerecord' do
236
+ include_context 'with accounts table in sqlite3 database'
237
+
238
+ context 'with ActiveRecord::Relation' do
239
+ before do
240
+ DaruLite::RSpec::Account.establish_connection "sqlite3:#{db_name}"
241
+ end
242
+
243
+ let(:relation) do
244
+ DaruLite::RSpec::Account.all
245
+ end
246
+
247
+ context 'without specifying field names' do
248
+ subject do
249
+ DaruLite::DataFrame.from_activerecord(relation)
250
+ end
251
+
252
+ it 'loads data from an AR::Relation object' do
253
+ accounts = subject
254
+ expect(accounts.class).to eq DaruLite::DataFrame
255
+ expect(accounts.nrows).to eq 2
256
+ expect(accounts.vectors.to_a).to eq [:id, :name, :age]
257
+ expect(accounts.row[0][:id]).to eq 1
258
+ expect(accounts.row[0][:name]).to eq 'Homer'
259
+ expect(accounts.row[0][:age]).to eq 20
260
+ end
261
+ end
262
+
263
+ context 'with specifying field names in parameters' do
264
+ subject do
265
+ DaruLite::DataFrame.from_activerecord(relation, :name, :age)
266
+ end
267
+
268
+ it 'loads data from an AR::Relation object' do
269
+ accounts = subject
270
+ expect(accounts.class).to eq DaruLite::DataFrame
271
+ expect(accounts.nrows).to eq 2
272
+ expect(accounts.vectors.to_a).to eq [:name, :age]
273
+ expect(accounts.row[0][:name]).to eq 'Homer'
274
+ expect(accounts.row[0][:age]).to eq 20
275
+ end
276
+ end
277
+ end
278
+ end
279
+
280
+ context ".from_plaintext" do
281
+ it "reads data from plain text files" do
282
+ df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6]
283
+
284
+ expect(df.vectors.to_a).to eq([:v1,:v2,:v3,:v4,:v5,:v6])
285
+ end
286
+
287
+ xit "understands empty fields" do
288
+ pending 'See FIXME note in io.rb'
289
+
290
+ df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/empties.dat', [:v1,:v2,:v3]
291
+
292
+ expect(df.row[1].to_a).to eq [4, nil, 6]
293
+ end
294
+
295
+ it "understands non-numeric fields" do
296
+ df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/strings.dat', [:v1,:v2,:v3]
297
+
298
+ expect(df[:v1].to_a).to eq ['test', 'foo']
299
+ end
300
+ end
301
+
302
+ context "JSON" do
303
+ it "loads parsed JSON" do
304
+ require 'json'
305
+
306
+ json = File.read 'spec/fixtures/countries.json'
307
+ df = DaruLite::DataFrame.new JSON.parse(json)
308
+
309
+ expect(df.vectors).to eq([
310
+ 'name', 'nativeName', 'tld', 'cca2', 'ccn3', 'cca3', 'currency', 'callingCode',
311
+ 'capital', 'altSpellings', 'relevance', 'region', 'subregion', 'language',
312
+ 'languageCodes', 'translations', 'latlng', 'demonym', 'borders', 'area'].to_index)
313
+
314
+ expect(df.row[0]['name']).to eq("Afghanistan")
315
+ end
316
+ end
317
+
318
+ context "Marshalling" do
319
+ it "" do
320
+ vector = DaruLite::Vector.new (0..100).collect { |_n| rand(100) }
321
+ dataframe = DaruLite::Vector.new({a: vector, b: vector, c: vector})
322
+ expect(Marshal.load(Marshal.dump(dataframe))).to eq(dataframe)
323
+ end
324
+ end
325
+
326
+ context "#save" do
327
+ before do
328
+ @data_frame = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
329
+ c: [11,22,33,44,55]},
330
+ order: [:a, :b, :c],
331
+ index: [:one, :two, :three, :four, :five])
332
+ end
333
+
334
+ it "saves df to a file" do
335
+ outfile = Tempfile.new('dataframe.df')
336
+ @data_frame.save(outfile.path)
337
+ a = DaruLite::IO.load(outfile.path)
338
+ expect(a).to eq(@data_frame)
339
+ end
340
+ end
341
+ end
342
+
343
+ describe DaruLite::Vector do
344
+ context "Marshalling" do
345
+ it "" do
346
+ vector = DaruLite::Vector.new (0..100).collect { |_n| rand(100) }
347
+ expect(Marshal.load(Marshal.dump(vector))).to eq(vector)
348
+ end
349
+ end
350
+
351
+ context "#save" do
352
+ ALL_DTYPES.each do |dtype|
353
+ it "saves to a file and returns the same Vector of type #{dtype}" do
354
+ vector = DaruLite::Vector.new(
355
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, 11, -99, -99],
356
+ dtype: dtype)
357
+ outfile = Tempfile.new('vector.vec')
358
+ vector.save(outfile.path)
359
+ expect(DaruLite::IO.load(outfile.path)).to eq(vector)
360
+ end
361
+ end
362
+ end
363
+ end
364
+
365
+ describe DaruLite::Index do
366
+ context "Marshalling" do
367
+ it "" do
368
+ i = DaruLite::Index.new([:a, :b, :c, :d, :e])
369
+ expect(Marshal.load(Marshal.dump(i))).to eq(i)
370
+ end
371
+ end
372
+ end
373
+ end
@@ -0,0 +1,56 @@
1
+ require 'daru_lite/io/sql_data_source'
2
+ require 'sqlite3'
3
+ require 'dbi'
4
+ require 'active_record'
5
+
6
+ RSpec.describe DaruLite::IO::SqlDataSource do
7
+ include_context 'with accounts table in sqlite3 database'
8
+
9
+ let(:query) do
10
+ 'select * from accounts'
11
+ end
12
+
13
+ let(:source) do
14
+ ActiveRecord::Base.establish_connection("sqlite3:#{db_name}")
15
+ ActiveRecord::Base.connection
16
+ end
17
+
18
+ describe '.make_dataframe' do
19
+ subject(:df) { DaruLite::IO::SqlDataSource.make_dataframe(source, query) }
20
+
21
+ context 'with DBI::DatabaseHandle' do
22
+ let(:source) { DBI.connect("DBI:SQLite3:#{db_name}") }
23
+ it { is_expected.to be_a(DaruLite::DataFrame) }
24
+ it { expect(df.row[0]).to have_attributes(id: 1, age: 20) }
25
+ its(:nrows) { is_expected.to eq 2 }
26
+ end
27
+
28
+ context 'with ActiveRecord::Connection' do
29
+ it { is_expected.to be_a(DaruLite::DataFrame) }
30
+ it { expect(df.row[0]).to have_attributes(id: 1, age: 20) }
31
+ its(:nrows) { is_expected.to eq 2 }
32
+ end
33
+
34
+ context 'with path to sqlite3 file' do
35
+ let(:source) { db_name }
36
+ it { is_expected.to be_a(DaruLite::DataFrame) }
37
+ it { expect(df.row[0]).to have_attributes(id: 1, age: 20) }
38
+ its(:nrows) { is_expected.to eq 2 }
39
+ end
40
+
41
+ context 'with an object not a string as a query' do
42
+ let(:query) { Object.new }
43
+ it { expect { df }.to raise_error(ArgumentError) }
44
+ end
45
+
46
+ context 'with an object not a database connection' do
47
+ let(:source) { Object.new }
48
+ it { expect { df }.to raise_error(ArgumentError) }
49
+ end
50
+
51
+ context 'with path to unsupported db file' do
52
+ let(:source) { 'spec/fixtures/bank2.dat' }
53
+ it { expect { df }.to raise_error(ArgumentError) }
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,170 @@
1
+ describe DaruLite::DataFrame, '#to_html' do
2
+ let(:doc) { Nokogiri::HTML(df.to_html) }
3
+ subject(:table) { doc.at('table') }
4
+ let(:header) { doc.at('b')}
5
+ let(:name) { 'test' }
6
+
7
+ let(:splitted_row) { row.inner_html.scan(/<t[dh].+?<\/t[dh]>/) }
8
+
9
+ context 'simple' do
10
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, name: name)}
11
+
12
+ describe 'header' do
13
+ subject { header }
14
+
15
+ it { is_expected.not_to be_nil }
16
+ its(:text) { is_expected.to eq " DaruLite::DataFrame: test (3x3) " }
17
+
18
+ context 'without name' do
19
+ let(:name) { nil }
20
+
21
+ its(:text) { is_expected.to eq " DaruLite::DataFrame(3x3) " }
22
+ end
23
+ end
24
+
25
+ describe 'column headers' do
26
+ subject(:columns) { table.search('tr:nth-child(1) th').map(&:text) }
27
+ its(:size) { is_expected.to eq df.ncols + 1 }
28
+ it { is_expected.to eq ['', 'a', 'b', 'c'] }
29
+ end
30
+
31
+ context 'with multi-index columns' do
32
+ before { df.vectors = DaruLite::MultiIndex.from_tuples [[:a, :foo], [:a, :baz], [:b, :foo]] }
33
+
34
+ subject { splitted_row }
35
+ describe 'first row' do
36
+ let(:row) { table.search('thead > tr:nth-child(1)') }
37
+
38
+ it { is_expected.to eq [
39
+ '<th rowspan="2"></th>',
40
+ '<th colspan="2">a</th>',
41
+ '<th colspan="1">b</th>'
42
+ ] }
43
+ end
44
+
45
+ describe 'next row' do
46
+ let(:row) { table.search('thead > tr:nth-child(2)') }
47
+
48
+ it { is_expected.to eq [
49
+ '<th colspan="1">foo</th>',
50
+ '<th colspan="1">baz</th>',
51
+ '<th colspan="1">foo</th>'
52
+ ] }
53
+ end
54
+ end
55
+
56
+ describe 'index' do
57
+ subject(:indexes) { table.search('tr > td:first-child').map(&:text) }
58
+ its(:count) { is_expected.to eq df.nrows }
59
+ it { is_expected.to eq df.index.to_a.map(&:to_s) }
60
+ end
61
+
62
+ describe 'values' do
63
+ subject(:values) {
64
+ table.search('tr')[1..-1]
65
+ .map { |tr| tr.search('td')[1..-1].map(&:text) }
66
+ }
67
+ its(:count) { is_expected.to eq df.nrows }
68
+ it { is_expected.to eq df.map_rows{|r| r.map(&:to_s)} }
69
+ end
70
+ end
71
+
72
+ context 'large dataframe' do
73
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3]*100, b: [3,4,5]*100, c: [6,7,8]*100}, name: 'test') }
74
+
75
+ describe 'header' do
76
+ subject { header }
77
+
78
+ its(:text) { is_expected.to eq " DaruLite::DataFrame: test (300x3) " }
79
+ end
80
+
81
+ it 'has only 30 rows (+ 1 header rows, + 2 finishing rows)' do
82
+ expect(table.search('tr').size).to eq 33
83
+ end
84
+
85
+ describe '"skipped" row' do
86
+ subject(:row) { table.search('tr:nth-child(31) td').map(&:text) }
87
+ its(:count) { is_expected.to eq df.ncols + 1 }
88
+ it { is_expected.to all eq '...' }
89
+ end
90
+
91
+ describe 'last row' do
92
+ subject(:row) { table.search('tr:nth-child(32) td').map(&:text) }
93
+ its(:count) { is_expected.to eq df.ncols + 1 }
94
+ it { is_expected.to eq ['299', *df.row[-1].map(&:to_s)] }
95
+ end
96
+ end
97
+
98
+ context 'with multi-index' do
99
+ let(:df) {
100
+ DaruLite::DataFrame.new(
101
+ {
102
+ a: [1,2,3,4,5,6,7],
103
+ b: %w[a b c d e f g]
104
+ }, index: DaruLite::MultiIndex.from_tuples([
105
+ %w[foo one],
106
+ %w[foo two],
107
+ %w[foo three],
108
+ %w[bar one],
109
+ %w[bar two],
110
+ %w[bar three],
111
+ %w[baz one],
112
+ ]),
113
+ name: 'test'
114
+ )
115
+ }
116
+
117
+ describe 'header' do
118
+ subject { header }
119
+
120
+ it { is_expected.not_to be_nil }
121
+ its(:text) { is_expected.to eq " DaruLite::DataFrame: test (7x2) " }
122
+ end
123
+
124
+ describe 'column headers' do
125
+ let(:row) { table.search('thead > tr:nth-child(1)') }
126
+ subject { splitted_row }
127
+
128
+ it { is_expected.to eq [
129
+ '<th colspan="2"></th>',
130
+ '<th>a</th>',
131
+ '<th>b</th>'
132
+ ]}
133
+ end
134
+
135
+ context 'with multi-index columns' do
136
+ before { df.vectors = DaruLite::MultiIndex.from_tuples [[:a, :foo], [:a, :baz]] }
137
+
138
+ subject { splitted_row }
139
+ describe 'first row' do
140
+ let(:row) { table.search('thead > tr:nth-child(1)') }
141
+
142
+ it { is_expected.to eq [
143
+ '<th colspan="2" rowspan="2"></th>',
144
+ '<th colspan="2">a</th>',
145
+ ] }
146
+ end
147
+
148
+ describe 'next row' do
149
+ let(:row) { table.search('thead > tr:nth-child(2)') }
150
+
151
+ it { is_expected.to eq [
152
+ '<th colspan="1">foo</th>',
153
+ '<th colspan="1">baz</th>',
154
+ ] }
155
+ end
156
+ end
157
+
158
+ describe 'first row' do
159
+ let(:row) { table.search('tbody > tr:nth-child(1)') }
160
+ subject { splitted_row }
161
+
162
+ it { is_expected.to eq [
163
+ '<th rowspan="3">foo</th>',
164
+ '<th rowspan="1">one</th>',
165
+ '<td>1</td>',
166
+ '<td>a</td>'
167
+ ]}
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,49 @@
1
+ describe DaruLite::IRuby::Helpers do
2
+ context 'MultiIndex' do
3
+ let(:index) {
4
+ DaruLite::MultiIndex.from_tuples [
5
+ [:a,:one,:bar],
6
+ [:a,:one,:baz],
7
+ [:a,:two,:bar],
8
+ [:a,:two,:baz],
9
+ [:b,:one,:bar],
10
+ [:b,:two,:bar],
11
+ [:b,:two,:baz],
12
+ [:b,:one,:foo],
13
+ [:c,:one,:bar],
14
+ [:c,:one,:baz],
15
+ [:c,:two,:foo],
16
+ [:c,:two,:bar]
17
+ ]
18
+ }
19
+
20
+ context '#tuples_with_rowspans' do
21
+ subject { described_class.tuples_with_rowspans(index) }
22
+
23
+ it { is_expected.to eq [
24
+ [[:a,4],[:one,2],[:bar,1]],
25
+ [ [:baz,1]],
26
+ [ [:two,2],[:bar,1]],
27
+ [ [:baz,1]],
28
+ [[:b,4],[:one,1],[:bar,1]],
29
+ [ [:two,2],[:bar,1]],
30
+ [ [:baz,1]],
31
+ [ [:one,1],[:foo,1]],
32
+ [[:c,4],[:one,2],[:bar,1]],
33
+ [ [:baz,1]],
34
+ [ [:two,2],[:foo,1]],
35
+ [ [:bar,1]]
36
+ ]}
37
+ end
38
+
39
+ context '#tuples_with_colspans' do
40
+ subject { described_class.tuples_with_colspans(index) }
41
+
42
+ it { is_expected.to eq [
43
+ [[:a, 4], [:b, 4], [:c, 4]],
44
+ [[:one, 2], [:two, 2], [:one, 1], [:two, 2], [:one, 1], [:one, 2], [:two, 2]],
45
+ [[:bar, 1], [:baz, 1], [:bar, 1], [:baz, 1], [:bar, 1], [:bar, 1], [:baz, 1], [:foo, 1], [:bar, 1], [:baz, 1], [:foo, 1], [:bar, 1]]
46
+ ]}
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,37 @@
1
+ describe DaruLite::MultiIndex, '#to_html' do
2
+ let(:index) {
3
+ DaruLite::MultiIndex.from_tuples [
4
+ [:a,:one,:bar],
5
+ [:a,:one,:baz],
6
+ [:a,:two,:bar],
7
+ [:a,:two,:baz],
8
+ [:b,:one,:bar],
9
+ [:b,:two,:bar],
10
+ [:b,:two,:baz],
11
+ [:b,:one,:foo],
12
+ [:c,:one,:bar],
13
+ [:c,:one,:baz],
14
+ [:c,:two,:foo],
15
+ [:c,:two,:bar]
16
+ ]
17
+ }
18
+
19
+ let(:table) { Nokogiri::HTML(index.to_html) }
20
+
21
+ describe 'first row' do
22
+ subject { table.at('tr:first-child > th') }
23
+ its(['colspan']) { is_expected.to eq '3' }
24
+ its(:text) { is_expected.to eq 'DaruLite::MultiIndex(12x3)' }
25
+ end
26
+
27
+ describe 'next row' do
28
+ let(:row) { table.at('tr:nth-child(2)') }
29
+ subject { row.inner_html.scan(/<th.+?<\/th>/) }
30
+
31
+ it { is_expected.to eq [
32
+ '<th rowspan="4">a</th>',
33
+ '<th rowspan="2">one</th>',
34
+ '<th rowspan="1">bar</th>'
35
+ ]}
36
+ end
37
+ end