daru_lite 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
"Group","Treatment","Replicate","RelativeFitness"
|
2
|
+
"BKB","Tube",1,0.869962555792838
|
3
|
+
"BKB","Tube",2,1.00036299125423
|
4
|
+
"BKB","Tube",3,0.982935090384188
|
5
|
+
"BAC","Tube",1,0.810391635206191
|
6
|
+
"BAC","Tube",2,0.795106571577928
|
7
|
+
"JDK","Tube",1,0.849203581734814
|
8
|
+
"JDK","Tube",2,0.917636977577209
|
9
|
+
"JDK","Tube",3,0.905323024195181
|
10
|
+
"ETH","Tube",1,0.930820887284085
|
11
|
+
"ETH","Tube",2,0.958183317853959
|
12
|
+
"ETH","Tube",3,0.91428823286218
|
13
|
+
"SWI","Tube",1,0.918513532826773
|
14
|
+
"SWI","Tube",2,0.909023164202865
|
15
|
+
"SWI","Tube",3,0.928404818223812
|
16
|
+
"PPP","Tube",1,0.990500457882474
|
17
|
+
"PPP","Tube",2,0.97156464183624
|
18
|
+
"PPP","Tube",3,0.948766905642272
|
19
|
+
"ECO","Tube",1,0.996870763277447
|
20
|
+
"ECO","Tube",2,0.976750424056972
|
21
|
+
"ECO","Tube",3,0.939479247244218
|
22
|
+
"DOS","Tube",1,0.9461642789306
|
23
|
+
"DOS","Tube",2,0.951056617451754
|
24
|
+
"DOS","Tube",3,0.939563175900147
|
25
|
+
"FIT","Tube",1,0.942215594296048
|
26
|
+
"FIT","Tube",2,0.926698027408171
|
27
|
+
"FIT","Tube",3,1.00031750759377
|
28
|
+
"HHE","Tube",1,0.944414164259624
|
29
|
+
"HHE","Tube",2,0.95194592074032
|
30
|
+
"HHE","Tube",3,0.922485751593779
|
31
|
+
"H2W","Tube",1,0.915304498822462
|
32
|
+
"H2W","Tube",2,0.853906227834699
|
33
|
+
"H2W","Tube",3,0.938698147417512
|
34
|
+
"BKB","Dish",1,1.15254427639376
|
35
|
+
"BKB","Dish",2,1.28708514998039
|
36
|
+
"BKB","Dish",3,1.50741545998468
|
37
|
+
"BAC","Dish",2,1.65223419104399
|
38
|
+
"BAC","Dish",3,1.61502230247434
|
39
|
+
"JDK","Dish",1,1.5102682623396
|
40
|
+
"JDK","Dish",2,1.58270078151532
|
41
|
+
"JDK","Dish",3,1.54715152852448
|
42
|
+
"ETH","Dish",1,1.61281555981988
|
43
|
+
"ETH","Dish",2,1.36445572541848
|
44
|
+
"ETH","Dish",3,1.47155233724852
|
45
|
+
"SWI","Dish",1,1.36255560722945
|
46
|
+
"SWI","Dish",2,1.51569228710501
|
47
|
+
"SWI","Dish",3,1.47714121664773
|
48
|
+
"PPP","Dish",1,1.50896917413568
|
49
|
+
"PPP","Dish",2,1.58138592886817
|
50
|
+
"PPP","Dish",3,1.55356839081014
|
51
|
+
"ECO","Dish",1,1.69927564777656
|
52
|
+
"ECO","Dish",2,1.45605668065038
|
53
|
+
"ECO","Dish",3,1.52825762511041
|
54
|
+
"DOS","Dish",1,1.59453120031454
|
55
|
+
"DOS","Dish",2,1.58132511409296
|
56
|
+
"DOS","Dish",3,1.58558683941181
|
57
|
+
"FIT","Dish",1,1.01468578343171
|
58
|
+
"FIT","Dish",2,0.955220815085047
|
59
|
+
"FIT","Dish",3,1.03597298511451
|
60
|
+
"HHE","Dish",1,1.49399422139513
|
61
|
+
"HHE","Dish",2,1.32980278545457
|
62
|
+
"HHE","Dish",3,1.4505220676174
|
63
|
+
"H2W","Dish",1,1.52460143511286
|
64
|
+
"H2W","Dish",2,1.53958247554139
|
65
|
+
"H2W","Dish",3,1.51149897493835
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Date - 28 june 2016. daru version - 0.1.3.1
|
2
|
+
# Compare speed of Ruby stdlib CSV and DataFrame.from_csv.
|
3
|
+
|
4
|
+
require 'benchmark'
|
5
|
+
require 'csv'
|
6
|
+
require 'daru_lite'
|
7
|
+
|
8
|
+
Benchmark.bm do |x|
|
9
|
+
x.report("Ruby CSV") do
|
10
|
+
CSV.read("TradeoffData.csv")
|
11
|
+
end
|
12
|
+
|
13
|
+
x.report("DataFrame.from_csv") do
|
14
|
+
DaruLite::DataFrame.from_csv("TradeoffData.csv")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# FIXME: Improve this. It's 4 times slower than Ruby CSV reading!!
|
19
|
+
|
20
|
+
# user system total real
|
21
|
+
# Ruby CSV 0.010000 0.000000 0.010000 ( 0.002385)
|
22
|
+
# DataFrame.from_csv 0.000000 0.000000 0.000000 ( 0.008225)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
|
6
|
+
Benchmark.bm do |x|
|
7
|
+
x.report("Create with Arrays and clone") do
|
8
|
+
df = DaruLite::DataFrame.new({
|
9
|
+
a: 100000.times.map { rand },
|
10
|
+
b: 100000.times.map { rand },
|
11
|
+
c: 100000.times.map { rand }
|
12
|
+
})
|
13
|
+
end
|
14
|
+
|
15
|
+
x.report("Create with Vectors and clone") do
|
16
|
+
df = DaruLite::DataFrame.new({
|
17
|
+
a: DaruLite::Vector.new(100000.times.map { rand }),
|
18
|
+
b: DaruLite::Vector.new(100000.times.map { rand }),
|
19
|
+
c: DaruLite::Vector.new(100000.times.map { rand })
|
20
|
+
})
|
21
|
+
end
|
22
|
+
|
23
|
+
x.report("Create with Vector and dont clone") do
|
24
|
+
df = DaruLite::DataFrame.new({
|
25
|
+
a: DaruLite::Vector.new(100000.times.map { rand }),
|
26
|
+
b: DaruLite::Vector.new(100000.times.map { rand }),
|
27
|
+
c: DaruLite::Vector.new(100000.times.map { rand })
|
28
|
+
}, clone: false)
|
29
|
+
end
|
30
|
+
|
31
|
+
x.report("Create by row from Arrays") do
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# ===== Benchmarks =====
|
36
|
+
# user system total real
|
37
|
+
# Create with Arrays and clone 0.940000 0.010000 0.950000 ( 0.959851)
|
38
|
+
# Create with Vectors and clone 1.950000 0.020000 1.970000 ( 1.966835)
|
39
|
+
# Create with Vector and dont clone 1.170000 0.000000 1.170000 ( 1.177132)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
require 'sqlite3'
|
6
|
+
require 'dbi'
|
7
|
+
require 'active_record'
|
8
|
+
|
9
|
+
db_name = 'daru_lite_test.sqlite'
|
10
|
+
FileUtils.rm(db_name) if File.file?(db_name)
|
11
|
+
|
12
|
+
SQLite3::Database.new(db_name).tap do |db|
|
13
|
+
db.execute "create table accounts(id integer, name varchar, age integer, primary key(id))"
|
14
|
+
|
15
|
+
values = 1.upto(100_000).map { |i| %!(#{i},"name_#{i}",#{rand(100)})! }.join(",")
|
16
|
+
db.execute "insert into accounts values #{values}"
|
17
|
+
end
|
18
|
+
|
19
|
+
ActiveRecord::Base.establish_connection("sqlite3:#{db_name}")
|
20
|
+
ActiveRecord::Base.connection
|
21
|
+
|
22
|
+
class Account < ActiveRecord::Base; end
|
23
|
+
|
24
|
+
Benchmark.bm do |x|
|
25
|
+
x.report("DataFrame.from_sql") do
|
26
|
+
DaruLite::DataFrame.from_sql(ActiveRecord::Base.connection, "SELECT * FROM accounts")
|
27
|
+
end
|
28
|
+
|
29
|
+
x.report("DataFrame.from_activerecord") do
|
30
|
+
DaruLite::DataFrame.from_activerecord(Account.all)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
FileUtils.rm(db_name)
|
@@ -0,0 +1,45 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
|
6
|
+
# Check scaling
|
7
|
+
base_n = 10000
|
8
|
+
0.upto(2) do |iscale|
|
9
|
+
n = base_n * 2**iscale
|
10
|
+
|
11
|
+
df_h = ('a'..'z').map { |v| v.to_sym }.reduce({}) do |h, v|
|
12
|
+
h[v] = DaruLite::Vector.new(1.upto(n).to_a)
|
13
|
+
h
|
14
|
+
end
|
15
|
+
|
16
|
+
df = DaruLite::DataFrame.new(df_h)
|
17
|
+
|
18
|
+
Benchmark.bm do |bm|
|
19
|
+
bm.report("dupe (n=#{n})") do
|
20
|
+
df.dup
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# ===== Benchmarks =====
|
26
|
+
# System: iMac Late 2013 3.5GHz Core i7
|
27
|
+
#
|
28
|
+
# user system total real
|
29
|
+
#dupe (n=10000) 0.590000 0.020000 0.610000 ( 0.613648)
|
30
|
+
# user system total real
|
31
|
+
#dupe (n=20000) 1.170000 0.040000 1.210000 ( 1.236629)
|
32
|
+
# user system total real
|
33
|
+
#dupe (n=40000) 2.390000 0.070000 2.460000 ( 2.511199)
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
# ===== Prior Benchmarks (Daru 0.1.2 - 2707559369c03894a8394714820aabf116b99b20 - 2016-04-25) =====
|
39
|
+
# Note that the n here is 100x smaller than above
|
40
|
+
# user system total real
|
41
|
+
#dupe (n=100) 0.220000 0.000000 0.220000 ( 0.227924)
|
42
|
+
# user system total real
|
43
|
+
#dupe (n=200) 0.850000 0.000000 0.850000 ( 0.856591)
|
44
|
+
# user system total real
|
45
|
+
#dupe (n=400) 3.370000 0.020000 3.390000 ( 3.428211)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
|
6
|
+
data = DaruLite::DataFrame.from_csv 'TradeoffData.csv'
|
7
|
+
|
8
|
+
Benchmark.bm do |x|
|
9
|
+
x.report("Single column grouping") do
|
10
|
+
@single = data.group_by(['Treatment'])
|
11
|
+
end
|
12
|
+
|
13
|
+
x.report("Multi-column grouping") do
|
14
|
+
@multi = data.group_by(['Group', 'Treatment'])
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Single mean") do
|
18
|
+
@single.mean
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Multi mean") do
|
22
|
+
@multi.mean
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# ===== Benchmarks =====
|
27
|
+
#
|
28
|
+
# user system total real
|
29
|
+
# Single column grouping 0.000000 0.000000 0.000000 (0.000340)
|
30
|
+
# Multi-column grouping 0.000000 0.000000 0.000000 (0.000855)
|
31
|
+
# Single mean 0.000000 0.000000 0.000000 (0.001208)
|
32
|
+
# Multi mean 0.000000 0.000000 0.000000 (0.004892)
|
@@ -0,0 +1,52 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
|
6
|
+
# Check scaling
|
7
|
+
base_n = 10000
|
8
|
+
0.upto(2) do |iscale|
|
9
|
+
n = base_n * 2**iscale
|
10
|
+
keys = (1..(n)).to_a
|
11
|
+
base_data = { idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]}}
|
12
|
+
lookup_hash = keys.map { |k| [k, k * 100]}.to_h
|
13
|
+
|
14
|
+
base_data_df = DaruLite::DataFrame.new(base_data)
|
15
|
+
lookup_df = DaruLite::DataFrame.new({ keys: lookup_hash.keys, values: lookup_hash.values })
|
16
|
+
|
17
|
+
Benchmark.bm do |bm|
|
18
|
+
bm.report("Inner join (n=#{n})") do
|
19
|
+
base_data_df.join(lookup_df, on: [:keys], how: :inner)
|
20
|
+
end
|
21
|
+
|
22
|
+
bm.report("Outer join (n=#{n})") do
|
23
|
+
base_data_df.join(lookup_df, on: [:keys], how: :outer)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# ===== Benchmarks =====
|
29
|
+
# System: MacBook Pro Mid 2014 3GHz Core i7
|
30
|
+
#
|
31
|
+
# user system total real
|
32
|
+
#Inner join (n=10000) 0.170000 0.000000 0.170000 ( 0.182254)
|
33
|
+
#Outer join (n=10000) 0.200000 0.000000 0.200000 ( 0.203022)
|
34
|
+
# user system total real
|
35
|
+
#Inner join (n=20000) 0.380000 0.000000 0.380000 ( 0.387600)
|
36
|
+
#Outer join (n=20000) 0.410000 0.000000 0.410000 ( 0.415644)
|
37
|
+
# user system total real
|
38
|
+
#Inner join (n=40000) 0.720000 0.010000 0.730000 ( 0.743787)
|
39
|
+
#Outer join (n=40000) 0.810000 0.010000 0.820000 ( 0.840871)
|
40
|
+
|
41
|
+
|
42
|
+
# ===== Prior Benchmarks (Daru 0.1.2 - prior to sorted merge algorithm) =====
|
43
|
+
# Note that the n here is 10x smaller than above
|
44
|
+
# user system total real
|
45
|
+
#Inner join (n=1000) 0.170000 0.010000 0.180000 ( 0.175585)
|
46
|
+
#Outer join (n=1000) 0.990000 0.000000 0.990000 ( 1.004305)
|
47
|
+
# user system total real
|
48
|
+
#Inner join (n=2000) 0.440000 0.010000 0.450000 ( 0.446748)
|
49
|
+
#Outer join (n=2000) 3.880000 0.010000 3.890000 ( 3.926399)
|
50
|
+
# user system total real
|
51
|
+
#Inner join (n=4000) 1.670000 0.010000 1.680000 ( 1.680742)
|
52
|
+
#Outer join (n=4000) 15.640000 0.060000 15.700000 ( 15.855202)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
|
6
|
+
df = DaruLite::DataFrame.new({
|
7
|
+
a: 100000.times.map { rand },
|
8
|
+
b: 100000.times.map { rand },
|
9
|
+
c: 100000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Access single row") do
|
14
|
+
df.row[50]
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Access rows by comma") do
|
18
|
+
df.row[*(5..40000).to_a.shuffle]
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Individual rows") do
|
22
|
+
rows = []
|
23
|
+
index = (5..40000).to_a.shuffle
|
24
|
+
index.each do |a|
|
25
|
+
rows << df.row[a].to_a
|
26
|
+
end
|
27
|
+
|
28
|
+
DaruLite::DataFrame.rows(rows, order: [:a,:b,:c], index: index)
|
29
|
+
end
|
30
|
+
|
31
|
+
x.report("Access rows by range") do
|
32
|
+
df.row[5..40000]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# ==== Benchmarks ====
|
37
|
+
# user system total real
|
38
|
+
# Access single row 0.000000 0.000000 0.000000 ( 0.000059)
|
39
|
+
# Access rows by comma 1.410000 0.010000 1.420000 ( 1.420426)
|
40
|
+
# Individual rows 1.480000 0.000000 1.480000 ( 1.488531)
|
41
|
+
# Access rows by range 1.440000 0.010000 1.450000 ( 1.436750)
|
@@ -0,0 +1,36 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
|
6
|
+
df = DaruLite::DataFrame.new({
|
7
|
+
a: 100000.times.map { rand },
|
8
|
+
b: 100000.times.map { rand },
|
9
|
+
c: 100000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Set a single row with Array") do
|
14
|
+
df.row[5] = [55,22,65]
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Set a single row with DaruLite::Vector") do
|
18
|
+
df.row[3456] = DaruLite::Vector.new([3,54,11], index: [:b,:e,:a])
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Create a new row with Array") do
|
22
|
+
df.row[100001] = [34,66,11]
|
23
|
+
end
|
24
|
+
|
25
|
+
x.report("Create a new row with DaruLite::Vector") do
|
26
|
+
df.row[100005] = DaruLite::Vector.new([34,66,11], index: [:a,:b,:t])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# ==== Benchmarks ====
|
31
|
+
#
|
32
|
+
# user system total real
|
33
|
+
# Set a single row with Array 0.600000 0.000000 0.600000 ( 0.604718)
|
34
|
+
# Set a single row with DaruLite::Vector 0.600000 0.000000 0.600000 ( 0.598599)
|
35
|
+
# Create a new row with Array 0.840000 0.010000 0.850000 ( 0.858349)
|
36
|
+
# Create a new row with DaruLite::Vector 0.950000 0.000000 0.950000 ( 0.950725)
|
@@ -0,0 +1,51 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
|
6
|
+
vector = DaruLite::Vector.new(10000.times.map.to_a.shuffle)
|
7
|
+
df = DaruLite::DataFrame.new({
|
8
|
+
a: vector,
|
9
|
+
b: vector,
|
10
|
+
c: vector
|
11
|
+
})
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Sort a Vector without any args") do
|
14
|
+
vector.sort
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Sort vector in descending order with custom <=> operator") do
|
18
|
+
vector.sort(ascending: false) { |a,b| a.to_s <=> b.to_s }
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Sort single column of DataFrame") do
|
22
|
+
df.sort([:a])
|
23
|
+
end
|
24
|
+
|
25
|
+
x.report("Sort two columns of DataFrame") do
|
26
|
+
df.sort([:c,:a])
|
27
|
+
end
|
28
|
+
|
29
|
+
x.report("Sort two columns with custom operators in different orders of DataFrame") do
|
30
|
+
df.sort([:c,:a], ascending: [true, false],
|
31
|
+
by: { c: lambda { |a| a.to_s },
|
32
|
+
a: lambda { |a| a+1 } })
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# FIXME: MASSIVE SPEEDUP NECESSARY!
|
37
|
+
|
38
|
+
# ===== Benchamarks =====
|
39
|
+
# user system total real
|
40
|
+
# Sort a Vector without any args 0.130000 0.000000 0.130000 ( 0.128006)
|
41
|
+
# Sort vector in descending order with custom <=> operator 0.190000 0.000000 0.190000 ( 0.184604)
|
42
|
+
# Sort single column of DataFrame 2502.450000 0.000000 2502.450000 (2503.808073)
|
43
|
+
# Sort two columns of DataFrame 0.540000 0.000000 0.540000 ( 0.537670)
|
44
|
+
# Sort two columns with custom operators in different orders of DataFrame 2084.160000 7.260000 2091.420000 (2092.716603)
|
45
|
+
|
46
|
+
# ===== Current Benchamarks =====
|
47
|
+
# Sort a Vector without any args 0.070000 0.000000 0.070000 ( 0.070323)
|
48
|
+
# Sort vector in descending order with custom <=> operator 0.120000 0.000000 0.120000 ( 0.119462)
|
49
|
+
# Sort single column of DataFrame 0.940000 0.010000 0.950000 ( 0.950349)
|
50
|
+
# Sort two columns of DataFrame 1.490000 0.010000 1.500000 ( 1.505680)
|
51
|
+
# Sort two columns with custom operators in different orders of DataFrame 1.480000 0.000000 1.480000 ( 1.495839)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'daru_lite'
|
2
|
+
require 'benchmark'
|
3
|
+
|
4
|
+
vector = DaruLite::Vector.new(
|
5
|
+
(10**6).times.map.to_a.shuffle,
|
6
|
+
missing_values: 100.times.map.to_a.shuffle
|
7
|
+
)
|
8
|
+
|
9
|
+
vector = DaruLite::Vector.new(
|
10
|
+
10000.times.map.to_a.shuffle,
|
11
|
+
missing_values: 100.times.map.to_a.shuffle,
|
12
|
+
)
|
13
|
+
|
14
|
+
Benchmark.bm do |x|
|
15
|
+
x.report("Mean of a vector") do
|
16
|
+
vector.mean
|
17
|
+
end
|
18
|
+
|
19
|
+
x.report("Minimum of a vector") do
|
20
|
+
vector.min
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# ===== Benchmarks =====
|
25
|
+
#
|
26
|
+
# user system total real
|
27
|
+
# Mean of a vector 0.130000 0.010000 0.140000 ( 0.145534)
|
28
|
+
# Min of a vector 0.150000 0.000000 0.150000 ( 0.163623)
|
@@ -0,0 +1,31 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
|
6
|
+
df = DaruLite::DataFrame.new({
|
7
|
+
a: 10000.times.map { rand },
|
8
|
+
b: 10000.times.map { rand },
|
9
|
+
c: 10000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Single Vector access") do
|
14
|
+
df[:a]
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Access as range") do
|
18
|
+
df[:a..:c]
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Access with commas") do
|
22
|
+
df[:a, :c]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# ======== Benchmarks =======
|
27
|
+
#
|
28
|
+
# user system total real
|
29
|
+
# Single Vector access 0.000000 0.000000 0.000000 ( 0.000012)
|
30
|
+
# Access as range 0.090000 0.000000 0.090000 ( 0.084584)
|
31
|
+
# Access with commas 0.050000 0.000000 0.050000 ( 0.051951)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
|
6
|
+
df = DaruLite::DataFrame.new({
|
7
|
+
a: 100000.times.map { rand },
|
8
|
+
b: 100000.times.map { rand },
|
9
|
+
c: 100000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
index = DaruLite::Index.new((0...100000).to_a.shuffle)
|
13
|
+
|
14
|
+
Benchmark.bm do |x|
|
15
|
+
x.report("Assign new vector as Array") do
|
16
|
+
df[:d] = 100000.times.map { rand }
|
17
|
+
end
|
18
|
+
|
19
|
+
x.report("Reassign same vector as Array") do
|
20
|
+
df[:a] = 100000.times.map { rand }
|
21
|
+
end
|
22
|
+
|
23
|
+
x.report("Assign new Vector as DaruLite::Vector") do
|
24
|
+
df[:e] = DaruLite::Vector.new(100000.times.map { rand })
|
25
|
+
end
|
26
|
+
|
27
|
+
x.report("Reassign same Vector as DaruLite::Vector") do
|
28
|
+
df[:b] = DaruLite::Vector.new(100000.times.map { rand })
|
29
|
+
end
|
30
|
+
|
31
|
+
x.report("Reassgin differently indexed DaruLite::Vector") do
|
32
|
+
df[:b] = DaruLite::Vector.new(100000.times.map { rand }, index: index)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# ===== Benchmarks =====
|
37
|
+
# user system total real
|
38
|
+
# Assign new vector as Array 0.370000 0.000000 0.370000 (0.364515)
|
39
|
+
# Reassign same vector as Array 0.470000 0.000000 0.470000 (0.471408)
|
40
|
+
# Assign new Vector as DaruLite::Vector 0.940000 0.000000 0.940000 (0.947879)
|
41
|
+
# Reassign same Vector as DaruLite::Vector 0.760000 0.020000 0.780000 (0.769969)
|
42
|
+
# Reassgin differently indexed DaruLite::Vector <Too embarassingly slow.>
|
@@ -0,0 +1,48 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
|
6
|
+
df = DaruLite::DataFrame.new({
|
7
|
+
a: 100000.times.map { |i| i },
|
8
|
+
b: 100000.times.map { |i| i },
|
9
|
+
c: 100000.times.map { |i| i }
|
10
|
+
}, index: DaruLite::Index.new(100000.times.map.to_a.shuffle))
|
11
|
+
|
12
|
+
puts "Benchmarking DataFrame#where\n"
|
13
|
+
Benchmark.bm do |x|
|
14
|
+
x.report("Basic one liner") do
|
15
|
+
df.where(df[:a].mt(2341))
|
16
|
+
end
|
17
|
+
|
18
|
+
x.report("Little complex statement") do
|
19
|
+
df.where(df[:a].lt(235) | df[:b].eq(2341) | df[:c].in([35,355,22]))
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
puts "Benchmarking Vector#where\n"
|
24
|
+
v = DaruLite::Vector.new(
|
25
|
+
100000.times.map { |i| i }, index: 100000.times.map.to_a.shuffle)
|
26
|
+
|
27
|
+
Benchmark.bm do |x|
|
28
|
+
x.report("Basic one liner") do
|
29
|
+
v.where(v.mteq(1000))
|
30
|
+
end
|
31
|
+
|
32
|
+
x.report("Little complex statement") do
|
33
|
+
v.where(v.lt(235) & v.eq(2341) | v.in([23,511,55]))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# ====== Benchmarks ======
|
38
|
+
#
|
39
|
+
# Benchmarking DataFrame#where
|
40
|
+
#
|
41
|
+
# user system total real
|
42
|
+
# Basic one liner 0.700000 0.000000 0.700000 (0.703532)
|
43
|
+
# Little complex statement 0.120000 0.000000 0.120000 (0.121765)
|
44
|
+
#
|
45
|
+
# Benchmarking Vector#where
|
46
|
+
# user system total real
|
47
|
+
# Basic one liner 0.240000 0.000000 0.240000 (0.245787)
|
48
|
+
# Little complex statement 0.100000 0.000000 0.100000 (0.094423)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru_lite'
|
5
|
+
|
6
|
+
df = DaruLite::DataFrame.new({
|
7
|
+
a: [1,2,3,4,5,6]*100,
|
8
|
+
b: ['a','b','c','d','e','f']*100,
|
9
|
+
c: [11,22,33,44,55,66]*100
|
10
|
+
}, index: (1..600).to_a.shuffle)
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("where") do
|
14
|
+
df.where(df[:a].eq(2) | df[:c].eq(55))
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("filter_rows") do
|
18
|
+
df.filter(:row) do |r|
|
19
|
+
r[:a] == 2 or r[:c] == 55
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# ===== Benchmarks =====
|
25
|
+
#
|
26
|
+
# user system total real
|
27
|
+
# where 0.000000 0.000000 0.000000 ( 0.002575)
|
28
|
+
# filter_rows 0.210000 0.000000 0.210000 ( 0.205403)
|
data/daru_lite.gemspec
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
$:.unshift File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
require 'daru_lite/version.rb'
|
5
|
+
|
6
|
+
DaruLite::DESCRIPTION = <<MSG
|
7
|
+
Daru (Data Analysis in RUby) is a library for analysis, manipulation and visualization
|
8
|
+
of data. Daru works seamlessly accross interpreters and leverages interpreter-specific
|
9
|
+
optimizations whenever they are available.
|
10
|
+
|
11
|
+
It is the default data storage gem for all the statsample gems (glm, timeseries, etc.)
|
12
|
+
and can be used with many others like mixed_models, gnuplotrb and iruby.
|
13
|
+
|
14
|
+
Daru Lite is a fork of Daru that aims to focus on data manipulation and stability.
|
15
|
+
MSG
|
16
|
+
|
17
|
+
Gem::Specification.new do |spec|
|
18
|
+
spec.name = 'daru_lite'
|
19
|
+
spec.version = DaruLite::VERSION
|
20
|
+
spec.authors = ['Thomas Naude-Filonnière', 'Maxime Lasserre', 'Julie Thomas', 'Amar Slaoua', 'Mourtada Belhantri']
|
21
|
+
spec.summary = %q{Data Analysis in RUby, stripped down}
|
22
|
+
spec.description = DaruLite::DESCRIPTION
|
23
|
+
spec.homepage = "https://github.com/pollandroll/daru"
|
24
|
+
spec.license = 'BSD-2-Clause'
|
25
|
+
|
26
|
+
spec.files = `git ls-files -z`.split("\x0")
|
27
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
28
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
29
|
+
spec.require_paths = ["lib"]
|
30
|
+
|
31
|
+
spec.add_development_dependency 'activerecord', '~> 6.0'
|
32
|
+
spec.add_development_dependency 'awesome_print', '~> 1.9.2'
|
33
|
+
spec.add_development_dependency 'bundler', '~> 2.1', '>= 2.1.4'
|
34
|
+
spec.add_development_dependency 'dbd-sqlite3', '~> 1.2.5'
|
35
|
+
spec.add_development_dependency 'dbi', '~> 0.4.5'
|
36
|
+
spec.add_development_dependency 'distribution', '~> 0.8'
|
37
|
+
spec.add_development_dependency 'matrix', '~> 0.4.2'
|
38
|
+
spec.add_development_dependency 'nokogiri', '~> 1.16.2'
|
39
|
+
spec.add_development_dependency 'prime', '~> 0.1.2'
|
40
|
+
spec.add_development_dependency 'pry', '~> 0.14'
|
41
|
+
spec.add_development_dependency 'pry-byebug', '~> 3.10.1'
|
42
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
43
|
+
spec.add_development_dependency 'rspec', '~> 3.11'
|
44
|
+
spec.add_development_dependency 'rspec-its', '~> 1.3.0'
|
45
|
+
spec.add_development_dependency 'rubocop', '~> 1.60'
|
46
|
+
spec.add_development_dependency 'rubocop-performance', '~> 1.20.2'
|
47
|
+
spec.add_development_dependency 'rubocop-rspec', '~> 2.25'
|
48
|
+
spec.add_development_dependency 'ruby-prof', '~> 1.7.0'
|
49
|
+
spec.add_development_dependency 'simplecov', '~> 0.22.0'
|
50
|
+
spec.add_development_dependency 'spreadsheet', '~> 1.3.0'
|
51
|
+
spec.add_development_dependency 'sqlite3', '~> 1.7.2'
|
52
|
+
# issue : https://github.com/SciRuby/daru/issues/493 occured
|
53
|
+
# with latest version of sqlite3
|
54
|
+
spec.add_development_dependency 'webmock', '~> 3.20.0'
|
55
|
+
end
|
data/images/README.md
ADDED
data/images/con0.png
ADDED
Binary file
|
data/images/con1.png
ADDED
Binary file
|
data/images/init0.png
ADDED
Binary file
|