daru_lite 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
data/spec/io/io_spec.rb
ADDED
@@ -0,0 +1,373 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
describe DaruLite::IO do
|
3
|
+
describe DaruLite::DataFrame do
|
4
|
+
context ".from_csv" do
|
5
|
+
before do
|
6
|
+
%w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file|
|
7
|
+
WebMock
|
8
|
+
.stub_request(:get,"http://example.com/#{file}.csv")
|
9
|
+
.to_return(status: 200, body: File.read("spec/fixtures/#{file}.csv"))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
it "loads from a CSV file" do
|
14
|
+
df = DaruLite::DataFrame.from_csv('spec/fixtures/matrix_test.csv',
|
15
|
+
col_sep: ' ', headers: true)
|
16
|
+
|
17
|
+
df.vectors = [:image_resolution, :true_transform, :mls].to_index
|
18
|
+
expect(df.vectors).to eq([:image_resolution, :true_transform, :mls].to_index)
|
19
|
+
expect(df[:image_resolution].first).to eq(6.55779)
|
20
|
+
expect(df[:true_transform].first).to eq("-0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1")
|
21
|
+
end
|
22
|
+
|
23
|
+
it "works properly for repeated headers" do
|
24
|
+
df = DaruLite::DataFrame.from_csv('spec/fixtures/repeated_fields.csv',header_converters: :symbol)
|
25
|
+
expect(df.vectors.to_a).to eq(["id", "name_1", "age_1", "city", "a1", "name_2", "age_2"])
|
26
|
+
|
27
|
+
age = DaruLite::Vector.new([3, 4, 5, 6, nil, 8])
|
28
|
+
expect(df['age_2']).to eq(age)
|
29
|
+
end
|
30
|
+
|
31
|
+
it "accepts scientific notation as float" do
|
32
|
+
ds = DaruLite::DataFrame.from_csv('spec/fixtures/scientific_notation.csv', order: ['x', 'y'])
|
33
|
+
expect(ds.vectors.to_a).to eq(['x', 'y'])
|
34
|
+
y = [9.629587310436753e+127, 1.9341543147883677e+129, 3.88485279048245e+130]
|
35
|
+
y.zip(ds['y']).each do |y_expected, y_ds|
|
36
|
+
expect(y_ds).to be_within(0.001).of(y_expected)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
it "follows the order of columns given in CSV" do
|
41
|
+
df = DaruLite::DataFrame.from_csv 'spec/fixtures/sales-funnel.csv'
|
42
|
+
expect(df.vectors.to_a).to eq(%W[Account Name Rep Manager Product Quantity Price Status])
|
43
|
+
end
|
44
|
+
|
45
|
+
it "handles empty rows in the CSV" do
|
46
|
+
df = DaruLite::DataFrame.from_csv 'spec/fixtures/empty_rows_test.csv'
|
47
|
+
expect(df.nrows).to eq(13)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "uses the custom boolean converter correctly" do
|
51
|
+
df = DaruLite::DataFrame.from_csv 'spec/fixtures/boolean_converter_test.csv', converters: [:boolean]
|
52
|
+
expect(df['Domestic'].to_a).to all be_boolean
|
53
|
+
end
|
54
|
+
|
55
|
+
it "uses the custom string converter correctly" do
|
56
|
+
df = DaruLite::DataFrame.from_csv 'spec/fixtures/string_converter_test.csv', converters: [:string]
|
57
|
+
expect(df['Case Number'].to_a.all? {|x| String === x }).to be_truthy
|
58
|
+
end
|
59
|
+
|
60
|
+
it "allow symbol to converters option" do
|
61
|
+
df = DaruLite::DataFrame.from_csv 'spec/fixtures/boolean_converter_test.csv', converters: :boolean
|
62
|
+
expect(df['Domestic'].to_a).to all be_boolean
|
63
|
+
end
|
64
|
+
|
65
|
+
it "checks for equal parsing of local CSV files and remote CSV files" do
|
66
|
+
%w[matrix_test repeated_fields scientific_notation sales-funnel].each do |file|
|
67
|
+
df_local = DaruLite::DataFrame.from_csv("spec/fixtures/#{file}.csv")
|
68
|
+
df_remote = DaruLite::DataFrame.from_csv("http://example.com/#{file}.csv")
|
69
|
+
expect(df_local).to eq(df_remote)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
context "#write_csv" do
|
75
|
+
before do
|
76
|
+
@df = DaruLite::DataFrame.new({
|
77
|
+
'a' => [1,2,3,4,5],
|
78
|
+
'b' => [11,22,33,44,55],
|
79
|
+
'c' => ['a', 'g', 4, 5,'addadf'],
|
80
|
+
'd' => [nil, 23, 4,'a','ff']})
|
81
|
+
@tempfile = Tempfile.new('data.csv')
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
it "writes DataFrame to a CSV file" do
|
86
|
+
@df.write_csv @tempfile.path
|
87
|
+
expect(DaruLite::DataFrame.from_csv(@tempfile.path)).to eq(@df)
|
88
|
+
end
|
89
|
+
|
90
|
+
it "will write headers unless headers=false" do
|
91
|
+
@df.write_csv @tempfile.path
|
92
|
+
first_line = File.open(@tempfile.path, &:readline).chomp.split(',', -1)
|
93
|
+
expect(first_line).to eq @df.vectors.to_a
|
94
|
+
end
|
95
|
+
|
96
|
+
it "will not write headers when headers=false" do
|
97
|
+
@df.write_csv @tempfile.path, { headers: false }
|
98
|
+
first_line = File.open(@tempfile.path, &:readline).chomp.split(',', -1)
|
99
|
+
expect(first_line).to eq @df.head(1).map { |v| (v.first || '').to_s }
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
context ".from_excel" do
|
105
|
+
before do
|
106
|
+
id = DaruLite::Vector.new([1, 2, 3, 4, 5, 6])
|
107
|
+
name = DaruLite::Vector.new(%w(Alex Claude Peter Franz George Fernand))
|
108
|
+
age = DaruLite::Vector.new( [20, 23, 25, nil, 5.5, nil])
|
109
|
+
city = DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome', nil])
|
110
|
+
a1 = DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c', nil])
|
111
|
+
@expected = DaruLite::DataFrame.new({
|
112
|
+
:id => id, :name => name, :age => age, :city => city, :a1 => a1
|
113
|
+
}, order: [:id, :name, :age, :city, :a1])
|
114
|
+
end
|
115
|
+
|
116
|
+
it "loads DataFrame from an Excel Spreadsheet" do
|
117
|
+
df = DaruLite::DataFrame.from_excel 'spec/fixtures/test_xls.xls'
|
118
|
+
|
119
|
+
expect(df.nrows).to eq(6)
|
120
|
+
expect(df.vectors.to_a).to eq([:id, :name, :age, :city, :a1])
|
121
|
+
expect(df[:age][5]).to eq(nil)
|
122
|
+
expect(@expected).to eq(df)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
context "#from_excel with row_id" do
|
127
|
+
before do
|
128
|
+
id = DaruLite::Vector.new(['id', 1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
|
129
|
+
name = DaruLite::Vector.new(%w(name Alex Claude Peter Franz George Fernand))
|
130
|
+
age = DaruLite::Vector.new(['age', 20.0, 23.0, 25.0, nil, 5.5, nil])
|
131
|
+
city = DaruLite::Vector.new(['city', 'New York', 'London', 'London', 'Paris', 'Tome', nil])
|
132
|
+
a1 = DaruLite::Vector.new(['a1', 'a,b', 'b,c', 'a', nil, 'a,b,c', nil])
|
133
|
+
@expected_1 = DaruLite::DataFrame.new({:id2 => id, :name2 => name, :age2 => age}, order: [:id2, :name2, :age2])
|
134
|
+
@expected_2 = DaruLite::DataFrame.new({
|
135
|
+
:id => id, :name => name, :age => age, :city => city, :a1 => a1
|
136
|
+
}, order: [:id, :name, :age, :city, :a1])
|
137
|
+
end
|
138
|
+
|
139
|
+
it "loads DataFrame from test_xls_2.xls" do
|
140
|
+
df = DaruLite::DataFrame.from_excel 'spec/fixtures/test_xls_2.xls'
|
141
|
+
|
142
|
+
expect(df.nrows).to eq(7)
|
143
|
+
expect(df.vectors.to_a).to eq([:id2, :name2, :age2])
|
144
|
+
expect(df[:age2][6]).to eq(nil)
|
145
|
+
expect(@expected_1).to eq(df)
|
146
|
+
end
|
147
|
+
|
148
|
+
it "loads DataFrame from test_xls_2.xls with row_id" do
|
149
|
+
df = DaruLite::DataFrame.from_excel 'spec/fixtures/test_xls_2.xls', {row_id: 1}
|
150
|
+
|
151
|
+
expect(df.nrows).to eq(7)
|
152
|
+
expect(df.vectors.to_a).to eq([:id, :name, :age, :city, :a1])
|
153
|
+
expect(df[:age][6]).to eq(nil)
|
154
|
+
expect(@expected_2).to eq(df)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
context "#write_excel" do
|
159
|
+
before do
|
160
|
+
a = DaruLite::Vector.new(100.times.map { rand(100) })
|
161
|
+
b = DaruLite::Vector.new((['b'] * 100))
|
162
|
+
@expected = DaruLite::DataFrame.new({ :b => b, :a => a })
|
163
|
+
|
164
|
+
tempfile = Tempfile.new('test_write.xls')
|
165
|
+
|
166
|
+
@expected.write_excel tempfile.path
|
167
|
+
@df = DaruLite::DataFrame.from_excel tempfile.path
|
168
|
+
end
|
169
|
+
|
170
|
+
it "correctly writes DataFrame to an Excel Spreadsheet" do
|
171
|
+
expect(@expected).to eq(@df)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
context ".from_sql" do
|
176
|
+
include_context 'with accounts table in sqlite3 database'
|
177
|
+
|
178
|
+
context 'with a database handler of DBI' do
|
179
|
+
let(:db) do
|
180
|
+
DBI.connect("DBI:SQLite3:#{db_name}")
|
181
|
+
end
|
182
|
+
|
183
|
+
subject { DaruLite::DataFrame.from_sql(db, "select * from accounts") }
|
184
|
+
|
185
|
+
it "loads data from an SQL database" do
|
186
|
+
accounts = subject
|
187
|
+
expect(accounts.class).to eq DaruLite::DataFrame
|
188
|
+
expect(accounts.nrows).to eq 2
|
189
|
+
expect(accounts.row[0][:id]).to eq 1
|
190
|
+
expect(accounts.row[0][:name]).to eq "Homer"
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
context 'with a database connection of ActiveRecord' do
|
195
|
+
let(:connection) do
|
196
|
+
DaruLite::RSpec::Account.establish_connection "sqlite3:#{db_name}"
|
197
|
+
DaruLite::RSpec::Account.connection
|
198
|
+
end
|
199
|
+
|
200
|
+
subject do
|
201
|
+
DaruLite::DataFrame.from_sql(connection, "select * from accounts")
|
202
|
+
end
|
203
|
+
|
204
|
+
it "loads data from an SQL database" do
|
205
|
+
accounts = subject
|
206
|
+
expect(accounts.class).to eq DaruLite::DataFrame
|
207
|
+
expect(accounts.nrows).to eq 2
|
208
|
+
expect(accounts.row[0][:id]).to eq 1
|
209
|
+
expect(accounts.row[0][:name]).to eq "Homer"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
context "#write_sql" do
|
215
|
+
let(:df) { DaruLite::DataFrame.new({
|
216
|
+
'a' => [1,2,3,4,5],
|
217
|
+
'b' => [11,22,33,44,55],
|
218
|
+
'c' => ['a', 'g', 4, 5,'addadf'],
|
219
|
+
'd' => [nil, 23, 4,'a','ff']})
|
220
|
+
}
|
221
|
+
|
222
|
+
let(:dbh) { double }
|
223
|
+
let(:prepared_query) { double }
|
224
|
+
|
225
|
+
it "writes the DataFrame to an SQL database" do
|
226
|
+
expect(dbh).to receive(:prepare)
|
227
|
+
.with('INSERT INTO tbl (a,b,c,d) VALUES (?,?,?,?)')
|
228
|
+
.and_return(prepared_query)
|
229
|
+
df.each_row { |r| expect(prepared_query).to receive(:execute).with(*r.to_a).ordered }
|
230
|
+
|
231
|
+
df.write_sql dbh, 'tbl'
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
context '.from_activerecord' do
|
236
|
+
include_context 'with accounts table in sqlite3 database'
|
237
|
+
|
238
|
+
context 'with ActiveRecord::Relation' do
|
239
|
+
before do
|
240
|
+
DaruLite::RSpec::Account.establish_connection "sqlite3:#{db_name}"
|
241
|
+
end
|
242
|
+
|
243
|
+
let(:relation) do
|
244
|
+
DaruLite::RSpec::Account.all
|
245
|
+
end
|
246
|
+
|
247
|
+
context 'without specifying field names' do
|
248
|
+
subject do
|
249
|
+
DaruLite::DataFrame.from_activerecord(relation)
|
250
|
+
end
|
251
|
+
|
252
|
+
it 'loads data from an AR::Relation object' do
|
253
|
+
accounts = subject
|
254
|
+
expect(accounts.class).to eq DaruLite::DataFrame
|
255
|
+
expect(accounts.nrows).to eq 2
|
256
|
+
expect(accounts.vectors.to_a).to eq [:id, :name, :age]
|
257
|
+
expect(accounts.row[0][:id]).to eq 1
|
258
|
+
expect(accounts.row[0][:name]).to eq 'Homer'
|
259
|
+
expect(accounts.row[0][:age]).to eq 20
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
context 'with specifying field names in parameters' do
|
264
|
+
subject do
|
265
|
+
DaruLite::DataFrame.from_activerecord(relation, :name, :age)
|
266
|
+
end
|
267
|
+
|
268
|
+
it 'loads data from an AR::Relation object' do
|
269
|
+
accounts = subject
|
270
|
+
expect(accounts.class).to eq DaruLite::DataFrame
|
271
|
+
expect(accounts.nrows).to eq 2
|
272
|
+
expect(accounts.vectors.to_a).to eq [:name, :age]
|
273
|
+
expect(accounts.row[0][:name]).to eq 'Homer'
|
274
|
+
expect(accounts.row[0][:age]).to eq 20
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
context ".from_plaintext" do
|
281
|
+
it "reads data from plain text files" do
|
282
|
+
df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6]
|
283
|
+
|
284
|
+
expect(df.vectors.to_a).to eq([:v1,:v2,:v3,:v4,:v5,:v6])
|
285
|
+
end
|
286
|
+
|
287
|
+
xit "understands empty fields" do
|
288
|
+
pending 'See FIXME note in io.rb'
|
289
|
+
|
290
|
+
df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/empties.dat', [:v1,:v2,:v3]
|
291
|
+
|
292
|
+
expect(df.row[1].to_a).to eq [4, nil, 6]
|
293
|
+
end
|
294
|
+
|
295
|
+
it "understands non-numeric fields" do
|
296
|
+
df = DaruLite::DataFrame.from_plaintext 'spec/fixtures/strings.dat', [:v1,:v2,:v3]
|
297
|
+
|
298
|
+
expect(df[:v1].to_a).to eq ['test', 'foo']
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
context "JSON" do
|
303
|
+
it "loads parsed JSON" do
|
304
|
+
require 'json'
|
305
|
+
|
306
|
+
json = File.read 'spec/fixtures/countries.json'
|
307
|
+
df = DaruLite::DataFrame.new JSON.parse(json)
|
308
|
+
|
309
|
+
expect(df.vectors).to eq([
|
310
|
+
'name', 'nativeName', 'tld', 'cca2', 'ccn3', 'cca3', 'currency', 'callingCode',
|
311
|
+
'capital', 'altSpellings', 'relevance', 'region', 'subregion', 'language',
|
312
|
+
'languageCodes', 'translations', 'latlng', 'demonym', 'borders', 'area'].to_index)
|
313
|
+
|
314
|
+
expect(df.row[0]['name']).to eq("Afghanistan")
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
context "Marshalling" do
|
319
|
+
it "" do
|
320
|
+
vector = DaruLite::Vector.new (0..100).collect { |_n| rand(100) }
|
321
|
+
dataframe = DaruLite::Vector.new({a: vector, b: vector, c: vector})
|
322
|
+
expect(Marshal.load(Marshal.dump(dataframe))).to eq(dataframe)
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
context "#save" do
|
327
|
+
before do
|
328
|
+
@data_frame = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
329
|
+
c: [11,22,33,44,55]},
|
330
|
+
order: [:a, :b, :c],
|
331
|
+
index: [:one, :two, :three, :four, :five])
|
332
|
+
end
|
333
|
+
|
334
|
+
it "saves df to a file" do
|
335
|
+
outfile = Tempfile.new('dataframe.df')
|
336
|
+
@data_frame.save(outfile.path)
|
337
|
+
a = DaruLite::IO.load(outfile.path)
|
338
|
+
expect(a).to eq(@data_frame)
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
describe DaruLite::Vector do
|
344
|
+
context "Marshalling" do
|
345
|
+
it "" do
|
346
|
+
vector = DaruLite::Vector.new (0..100).collect { |_n| rand(100) }
|
347
|
+
expect(Marshal.load(Marshal.dump(vector))).to eq(vector)
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
context "#save" do
|
352
|
+
ALL_DTYPES.each do |dtype|
|
353
|
+
it "saves to a file and returns the same Vector of type #{dtype}" do
|
354
|
+
vector = DaruLite::Vector.new(
|
355
|
+
[5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, 11, -99, -99],
|
356
|
+
dtype: dtype)
|
357
|
+
outfile = Tempfile.new('vector.vec')
|
358
|
+
vector.save(outfile.path)
|
359
|
+
expect(DaruLite::IO.load(outfile.path)).to eq(vector)
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
describe DaruLite::Index do
|
366
|
+
context "Marshalling" do
|
367
|
+
it "" do
|
368
|
+
i = DaruLite::Index.new([:a, :b, :c, :d, :e])
|
369
|
+
expect(Marshal.load(Marshal.dump(i))).to eq(i)
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'daru_lite/io/sql_data_source'
|
2
|
+
require 'sqlite3'
|
3
|
+
require 'dbi'
|
4
|
+
require 'active_record'
|
5
|
+
|
6
|
+
RSpec.describe DaruLite::IO::SqlDataSource do
|
7
|
+
include_context 'with accounts table in sqlite3 database'
|
8
|
+
|
9
|
+
let(:query) do
|
10
|
+
'select * from accounts'
|
11
|
+
end
|
12
|
+
|
13
|
+
let(:source) do
|
14
|
+
ActiveRecord::Base.establish_connection("sqlite3:#{db_name}")
|
15
|
+
ActiveRecord::Base.connection
|
16
|
+
end
|
17
|
+
|
18
|
+
describe '.make_dataframe' do
|
19
|
+
subject(:df) { DaruLite::IO::SqlDataSource.make_dataframe(source, query) }
|
20
|
+
|
21
|
+
context 'with DBI::DatabaseHandle' do
|
22
|
+
let(:source) { DBI.connect("DBI:SQLite3:#{db_name}") }
|
23
|
+
it { is_expected.to be_a(DaruLite::DataFrame) }
|
24
|
+
it { expect(df.row[0]).to have_attributes(id: 1, age: 20) }
|
25
|
+
its(:nrows) { is_expected.to eq 2 }
|
26
|
+
end
|
27
|
+
|
28
|
+
context 'with ActiveRecord::Connection' do
|
29
|
+
it { is_expected.to be_a(DaruLite::DataFrame) }
|
30
|
+
it { expect(df.row[0]).to have_attributes(id: 1, age: 20) }
|
31
|
+
its(:nrows) { is_expected.to eq 2 }
|
32
|
+
end
|
33
|
+
|
34
|
+
context 'with path to sqlite3 file' do
|
35
|
+
let(:source) { db_name }
|
36
|
+
it { is_expected.to be_a(DaruLite::DataFrame) }
|
37
|
+
it { expect(df.row[0]).to have_attributes(id: 1, age: 20) }
|
38
|
+
its(:nrows) { is_expected.to eq 2 }
|
39
|
+
end
|
40
|
+
|
41
|
+
context 'with an object not a string as a query' do
|
42
|
+
let(:query) { Object.new }
|
43
|
+
it { expect { df }.to raise_error(ArgumentError) }
|
44
|
+
end
|
45
|
+
|
46
|
+
context 'with an object not a database connection' do
|
47
|
+
let(:source) { Object.new }
|
48
|
+
it { expect { df }.to raise_error(ArgumentError) }
|
49
|
+
end
|
50
|
+
|
51
|
+
context 'with path to unsupported db file' do
|
52
|
+
let(:source) { 'spec/fixtures/bank2.dat' }
|
53
|
+
it { expect { df }.to raise_error(ArgumentError) }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,170 @@
|
|
1
|
+
describe DaruLite::DataFrame, '#to_html' do
|
2
|
+
let(:doc) { Nokogiri::HTML(df.to_html) }
|
3
|
+
subject(:table) { doc.at('table') }
|
4
|
+
let(:header) { doc.at('b')}
|
5
|
+
let(:name) { 'test' }
|
6
|
+
|
7
|
+
let(:splitted_row) { row.inner_html.scan(/<t[dh].+?<\/t[dh]>/) }
|
8
|
+
|
9
|
+
context 'simple' do
|
10
|
+
let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, name: name)}
|
11
|
+
|
12
|
+
describe 'header' do
|
13
|
+
subject { header }
|
14
|
+
|
15
|
+
it { is_expected.not_to be_nil }
|
16
|
+
its(:text) { is_expected.to eq " DaruLite::DataFrame: test (3x3) " }
|
17
|
+
|
18
|
+
context 'without name' do
|
19
|
+
let(:name) { nil }
|
20
|
+
|
21
|
+
its(:text) { is_expected.to eq " DaruLite::DataFrame(3x3) " }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe 'column headers' do
|
26
|
+
subject(:columns) { table.search('tr:nth-child(1) th').map(&:text) }
|
27
|
+
its(:size) { is_expected.to eq df.ncols + 1 }
|
28
|
+
it { is_expected.to eq ['', 'a', 'b', 'c'] }
|
29
|
+
end
|
30
|
+
|
31
|
+
context 'with multi-index columns' do
|
32
|
+
before { df.vectors = DaruLite::MultiIndex.from_tuples [[:a, :foo], [:a, :baz], [:b, :foo]] }
|
33
|
+
|
34
|
+
subject { splitted_row }
|
35
|
+
describe 'first row' do
|
36
|
+
let(:row) { table.search('thead > tr:nth-child(1)') }
|
37
|
+
|
38
|
+
it { is_expected.to eq [
|
39
|
+
'<th rowspan="2"></th>',
|
40
|
+
'<th colspan="2">a</th>',
|
41
|
+
'<th colspan="1">b</th>'
|
42
|
+
] }
|
43
|
+
end
|
44
|
+
|
45
|
+
describe 'next row' do
|
46
|
+
let(:row) { table.search('thead > tr:nth-child(2)') }
|
47
|
+
|
48
|
+
it { is_expected.to eq [
|
49
|
+
'<th colspan="1">foo</th>',
|
50
|
+
'<th colspan="1">baz</th>',
|
51
|
+
'<th colspan="1">foo</th>'
|
52
|
+
] }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe 'index' do
|
57
|
+
subject(:indexes) { table.search('tr > td:first-child').map(&:text) }
|
58
|
+
its(:count) { is_expected.to eq df.nrows }
|
59
|
+
it { is_expected.to eq df.index.to_a.map(&:to_s) }
|
60
|
+
end
|
61
|
+
|
62
|
+
describe 'values' do
|
63
|
+
subject(:values) {
|
64
|
+
table.search('tr')[1..-1]
|
65
|
+
.map { |tr| tr.search('td')[1..-1].map(&:text) }
|
66
|
+
}
|
67
|
+
its(:count) { is_expected.to eq df.nrows }
|
68
|
+
it { is_expected.to eq df.map_rows{|r| r.map(&:to_s)} }
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
context 'large dataframe' do
|
73
|
+
let(:df) { DaruLite::DataFrame.new({a: [1,2,3]*100, b: [3,4,5]*100, c: [6,7,8]*100}, name: 'test') }
|
74
|
+
|
75
|
+
describe 'header' do
|
76
|
+
subject { header }
|
77
|
+
|
78
|
+
its(:text) { is_expected.to eq " DaruLite::DataFrame: test (300x3) " }
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'has only 30 rows (+ 1 header rows, + 2 finishing rows)' do
|
82
|
+
expect(table.search('tr').size).to eq 33
|
83
|
+
end
|
84
|
+
|
85
|
+
describe '"skipped" row' do
|
86
|
+
subject(:row) { table.search('tr:nth-child(31) td').map(&:text) }
|
87
|
+
its(:count) { is_expected.to eq df.ncols + 1 }
|
88
|
+
it { is_expected.to all eq '...' }
|
89
|
+
end
|
90
|
+
|
91
|
+
describe 'last row' do
|
92
|
+
subject(:row) { table.search('tr:nth-child(32) td').map(&:text) }
|
93
|
+
its(:count) { is_expected.to eq df.ncols + 1 }
|
94
|
+
it { is_expected.to eq ['299', *df.row[-1].map(&:to_s)] }
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
context 'with multi-index' do
|
99
|
+
let(:df) {
|
100
|
+
DaruLite::DataFrame.new(
|
101
|
+
{
|
102
|
+
a: [1,2,3,4,5,6,7],
|
103
|
+
b: %w[a b c d e f g]
|
104
|
+
}, index: DaruLite::MultiIndex.from_tuples([
|
105
|
+
%w[foo one],
|
106
|
+
%w[foo two],
|
107
|
+
%w[foo three],
|
108
|
+
%w[bar one],
|
109
|
+
%w[bar two],
|
110
|
+
%w[bar three],
|
111
|
+
%w[baz one],
|
112
|
+
]),
|
113
|
+
name: 'test'
|
114
|
+
)
|
115
|
+
}
|
116
|
+
|
117
|
+
describe 'header' do
|
118
|
+
subject { header }
|
119
|
+
|
120
|
+
it { is_expected.not_to be_nil }
|
121
|
+
its(:text) { is_expected.to eq " DaruLite::DataFrame: test (7x2) " }
|
122
|
+
end
|
123
|
+
|
124
|
+
describe 'column headers' do
|
125
|
+
let(:row) { table.search('thead > tr:nth-child(1)') }
|
126
|
+
subject { splitted_row }
|
127
|
+
|
128
|
+
it { is_expected.to eq [
|
129
|
+
'<th colspan="2"></th>',
|
130
|
+
'<th>a</th>',
|
131
|
+
'<th>b</th>'
|
132
|
+
]}
|
133
|
+
end
|
134
|
+
|
135
|
+
context 'with multi-index columns' do
|
136
|
+
before { df.vectors = DaruLite::MultiIndex.from_tuples [[:a, :foo], [:a, :baz]] }
|
137
|
+
|
138
|
+
subject { splitted_row }
|
139
|
+
describe 'first row' do
|
140
|
+
let(:row) { table.search('thead > tr:nth-child(1)') }
|
141
|
+
|
142
|
+
it { is_expected.to eq [
|
143
|
+
'<th colspan="2" rowspan="2"></th>',
|
144
|
+
'<th colspan="2">a</th>',
|
145
|
+
] }
|
146
|
+
end
|
147
|
+
|
148
|
+
describe 'next row' do
|
149
|
+
let(:row) { table.search('thead > tr:nth-child(2)') }
|
150
|
+
|
151
|
+
it { is_expected.to eq [
|
152
|
+
'<th colspan="1">foo</th>',
|
153
|
+
'<th colspan="1">baz</th>',
|
154
|
+
] }
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
describe 'first row' do
|
159
|
+
let(:row) { table.search('tbody > tr:nth-child(1)') }
|
160
|
+
subject { splitted_row }
|
161
|
+
|
162
|
+
it { is_expected.to eq [
|
163
|
+
'<th rowspan="3">foo</th>',
|
164
|
+
'<th rowspan="1">one</th>',
|
165
|
+
'<td>1</td>',
|
166
|
+
'<td>a</td>'
|
167
|
+
]}
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
describe DaruLite::IRuby::Helpers do
|
2
|
+
context 'MultiIndex' do
|
3
|
+
let(:index) {
|
4
|
+
DaruLite::MultiIndex.from_tuples [
|
5
|
+
[:a,:one,:bar],
|
6
|
+
[:a,:one,:baz],
|
7
|
+
[:a,:two,:bar],
|
8
|
+
[:a,:two,:baz],
|
9
|
+
[:b,:one,:bar],
|
10
|
+
[:b,:two,:bar],
|
11
|
+
[:b,:two,:baz],
|
12
|
+
[:b,:one,:foo],
|
13
|
+
[:c,:one,:bar],
|
14
|
+
[:c,:one,:baz],
|
15
|
+
[:c,:two,:foo],
|
16
|
+
[:c,:two,:bar]
|
17
|
+
]
|
18
|
+
}
|
19
|
+
|
20
|
+
context '#tuples_with_rowspans' do
|
21
|
+
subject { described_class.tuples_with_rowspans(index) }
|
22
|
+
|
23
|
+
it { is_expected.to eq [
|
24
|
+
[[:a,4],[:one,2],[:bar,1]],
|
25
|
+
[ [:baz,1]],
|
26
|
+
[ [:two,2],[:bar,1]],
|
27
|
+
[ [:baz,1]],
|
28
|
+
[[:b,4],[:one,1],[:bar,1]],
|
29
|
+
[ [:two,2],[:bar,1]],
|
30
|
+
[ [:baz,1]],
|
31
|
+
[ [:one,1],[:foo,1]],
|
32
|
+
[[:c,4],[:one,2],[:bar,1]],
|
33
|
+
[ [:baz,1]],
|
34
|
+
[ [:two,2],[:foo,1]],
|
35
|
+
[ [:bar,1]]
|
36
|
+
]}
|
37
|
+
end
|
38
|
+
|
39
|
+
context '#tuples_with_colspans' do
|
40
|
+
subject { described_class.tuples_with_colspans(index) }
|
41
|
+
|
42
|
+
it { is_expected.to eq [
|
43
|
+
[[:a, 4], [:b, 4], [:c, 4]],
|
44
|
+
[[:one, 2], [:two, 2], [:one, 1], [:two, 2], [:one, 1], [:one, 2], [:two, 2]],
|
45
|
+
[[:bar, 1], [:baz, 1], [:bar, 1], [:baz, 1], [:bar, 1], [:bar, 1], [:baz, 1], [:foo, 1], [:bar, 1], [:baz, 1], [:foo, 1], [:bar, 1]]
|
46
|
+
]}
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
describe DaruLite::MultiIndex, '#to_html' do
|
2
|
+
let(:index) {
|
3
|
+
DaruLite::MultiIndex.from_tuples [
|
4
|
+
[:a,:one,:bar],
|
5
|
+
[:a,:one,:baz],
|
6
|
+
[:a,:two,:bar],
|
7
|
+
[:a,:two,:baz],
|
8
|
+
[:b,:one,:bar],
|
9
|
+
[:b,:two,:bar],
|
10
|
+
[:b,:two,:baz],
|
11
|
+
[:b,:one,:foo],
|
12
|
+
[:c,:one,:bar],
|
13
|
+
[:c,:one,:baz],
|
14
|
+
[:c,:two,:foo],
|
15
|
+
[:c,:two,:bar]
|
16
|
+
]
|
17
|
+
}
|
18
|
+
|
19
|
+
let(:table) { Nokogiri::HTML(index.to_html) }
|
20
|
+
|
21
|
+
describe 'first row' do
|
22
|
+
subject { table.at('tr:first-child > th') }
|
23
|
+
its(['colspan']) { is_expected.to eq '3' }
|
24
|
+
its(:text) { is_expected.to eq 'DaruLite::MultiIndex(12x3)' }
|
25
|
+
end
|
26
|
+
|
27
|
+
describe 'next row' do
|
28
|
+
let(:row) { table.at('tr:nth-child(2)') }
|
29
|
+
subject { row.inner_html.scan(/<th.+?<\/th>/) }
|
30
|
+
|
31
|
+
it { is_expected.to eq [
|
32
|
+
'<th rowspan="4">a</th>',
|
33
|
+
'<th rowspan="2">one</th>',
|
34
|
+
'<th rowspan="1">bar</th>'
|
35
|
+
]}
|
36
|
+
end
|
37
|
+
end
|