daru_lite 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"Group","Treatment","Replicate","RelativeFitness"
|
|
2
|
+
"BKB","Tube",1,0.869962555792838
|
|
3
|
+
"BKB","Tube",2,1.00036299125423
|
|
4
|
+
"BKB","Tube",3,0.982935090384188
|
|
5
|
+
"BAC","Tube",1,0.810391635206191
|
|
6
|
+
"BAC","Tube",2,0.795106571577928
|
|
7
|
+
"JDK","Tube",1,0.849203581734814
|
|
8
|
+
"JDK","Tube",2,0.917636977577209
|
|
9
|
+
"JDK","Tube",3,0.905323024195181
|
|
10
|
+
"ETH","Tube",1,0.930820887284085
|
|
11
|
+
"ETH","Tube",2,0.958183317853959
|
|
12
|
+
"ETH","Tube",3,0.91428823286218
|
|
13
|
+
"SWI","Tube",1,0.918513532826773
|
|
14
|
+
"SWI","Tube",2,0.909023164202865
|
|
15
|
+
"SWI","Tube",3,0.928404818223812
|
|
16
|
+
"PPP","Tube",1,0.990500457882474
|
|
17
|
+
"PPP","Tube",2,0.97156464183624
|
|
18
|
+
"PPP","Tube",3,0.948766905642272
|
|
19
|
+
"ECO","Tube",1,0.996870763277447
|
|
20
|
+
"ECO","Tube",2,0.976750424056972
|
|
21
|
+
"ECO","Tube",3,0.939479247244218
|
|
22
|
+
"DOS","Tube",1,0.9461642789306
|
|
23
|
+
"DOS","Tube",2,0.951056617451754
|
|
24
|
+
"DOS","Tube",3,0.939563175900147
|
|
25
|
+
"FIT","Tube",1,0.942215594296048
|
|
26
|
+
"FIT","Tube",2,0.926698027408171
|
|
27
|
+
"FIT","Tube",3,1.00031750759377
|
|
28
|
+
"HHE","Tube",1,0.944414164259624
|
|
29
|
+
"HHE","Tube",2,0.95194592074032
|
|
30
|
+
"HHE","Tube",3,0.922485751593779
|
|
31
|
+
"H2W","Tube",1,0.915304498822462
|
|
32
|
+
"H2W","Tube",2,0.853906227834699
|
|
33
|
+
"H2W","Tube",3,0.938698147417512
|
|
34
|
+
"BKB","Dish",1,1.15254427639376
|
|
35
|
+
"BKB","Dish",2,1.28708514998039
|
|
36
|
+
"BKB","Dish",3,1.50741545998468
|
|
37
|
+
"BAC","Dish",2,1.65223419104399
|
|
38
|
+
"BAC","Dish",3,1.61502230247434
|
|
39
|
+
"JDK","Dish",1,1.5102682623396
|
|
40
|
+
"JDK","Dish",2,1.58270078151532
|
|
41
|
+
"JDK","Dish",3,1.54715152852448
|
|
42
|
+
"ETH","Dish",1,1.61281555981988
|
|
43
|
+
"ETH","Dish",2,1.36445572541848
|
|
44
|
+
"ETH","Dish",3,1.47155233724852
|
|
45
|
+
"SWI","Dish",1,1.36255560722945
|
|
46
|
+
"SWI","Dish",2,1.51569228710501
|
|
47
|
+
"SWI","Dish",3,1.47714121664773
|
|
48
|
+
"PPP","Dish",1,1.50896917413568
|
|
49
|
+
"PPP","Dish",2,1.58138592886817
|
|
50
|
+
"PPP","Dish",3,1.55356839081014
|
|
51
|
+
"ECO","Dish",1,1.69927564777656
|
|
52
|
+
"ECO","Dish",2,1.45605668065038
|
|
53
|
+
"ECO","Dish",3,1.52825762511041
|
|
54
|
+
"DOS","Dish",1,1.59453120031454
|
|
55
|
+
"DOS","Dish",2,1.58132511409296
|
|
56
|
+
"DOS","Dish",3,1.58558683941181
|
|
57
|
+
"FIT","Dish",1,1.01468578343171
|
|
58
|
+
"FIT","Dish",2,0.955220815085047
|
|
59
|
+
"FIT","Dish",3,1.03597298511451
|
|
60
|
+
"HHE","Dish",1,1.49399422139513
|
|
61
|
+
"HHE","Dish",2,1.32980278545457
|
|
62
|
+
"HHE","Dish",3,1.4505220676174
|
|
63
|
+
"H2W","Dish",1,1.52460143511286
|
|
64
|
+
"H2W","Dish",2,1.53958247554139
|
|
65
|
+
"H2W","Dish",3,1.51149897493835
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Date - 28 june 2016. daru version - 0.1.3.1
|
|
2
|
+
# Compare speed of Ruby stdlib CSV and DataFrame.from_csv.
|
|
3
|
+
|
|
4
|
+
require 'benchmark'
|
|
5
|
+
require 'csv'
|
|
6
|
+
require 'daru_lite'
|
|
7
|
+
|
|
8
|
+
Benchmark.bm do |x|
|
|
9
|
+
x.report("Ruby CSV") do
|
|
10
|
+
CSV.read("TradeoffData.csv")
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
x.report("DataFrame.from_csv") do
|
|
14
|
+
DaruLite::DataFrame.from_csv("TradeoffData.csv")
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# FIXME: Improve this. It's 4 times slower than Ruby CSV reading!!
|
|
19
|
+
|
|
20
|
+
# user system total real
|
|
21
|
+
# Ruby CSV 0.010000 0.000000 0.010000 ( 0.002385)
|
|
22
|
+
# DataFrame.from_csv 0.000000 0.000000 0.000000 ( 0.008225)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
|
|
6
|
+
Benchmark.bm do |x|
|
|
7
|
+
x.report("Create with Arrays and clone") do
|
|
8
|
+
df = DaruLite::DataFrame.new({
|
|
9
|
+
a: 100000.times.map { rand },
|
|
10
|
+
b: 100000.times.map { rand },
|
|
11
|
+
c: 100000.times.map { rand }
|
|
12
|
+
})
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
x.report("Create with Vectors and clone") do
|
|
16
|
+
df = DaruLite::DataFrame.new({
|
|
17
|
+
a: DaruLite::Vector.new(100000.times.map { rand }),
|
|
18
|
+
b: DaruLite::Vector.new(100000.times.map { rand }),
|
|
19
|
+
c: DaruLite::Vector.new(100000.times.map { rand })
|
|
20
|
+
})
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
x.report("Create with Vector and dont clone") do
|
|
24
|
+
df = DaruLite::DataFrame.new({
|
|
25
|
+
a: DaruLite::Vector.new(100000.times.map { rand }),
|
|
26
|
+
b: DaruLite::Vector.new(100000.times.map { rand }),
|
|
27
|
+
c: DaruLite::Vector.new(100000.times.map { rand })
|
|
28
|
+
}, clone: false)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
x.report("Create by row from Arrays") do
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# ===== Benchmarks =====
|
|
36
|
+
# user system total real
|
|
37
|
+
# Create with Arrays and clone 0.940000 0.010000 0.950000 ( 0.959851)
|
|
38
|
+
# Create with Vectors and clone 1.950000 0.020000 1.970000 ( 1.966835)
|
|
39
|
+
# Create with Vector and dont clone 1.170000 0.000000 1.170000 ( 1.177132)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
require 'sqlite3'
|
|
6
|
+
require 'dbi'
|
|
7
|
+
require 'active_record'
|
|
8
|
+
|
|
9
|
+
db_name = 'daru_lite_test.sqlite'
|
|
10
|
+
FileUtils.rm(db_name) if File.file?(db_name)
|
|
11
|
+
|
|
12
|
+
SQLite3::Database.new(db_name).tap do |db|
|
|
13
|
+
db.execute "create table accounts(id integer, name varchar, age integer, primary key(id))"
|
|
14
|
+
|
|
15
|
+
values = 1.upto(100_000).map { |i| %!(#{i},"name_#{i}",#{rand(100)})! }.join(",")
|
|
16
|
+
db.execute "insert into accounts values #{values}"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
ActiveRecord::Base.establish_connection("sqlite3:#{db_name}")
|
|
20
|
+
ActiveRecord::Base.connection
|
|
21
|
+
|
|
22
|
+
class Account < ActiveRecord::Base; end
|
|
23
|
+
|
|
24
|
+
Benchmark.bm do |x|
|
|
25
|
+
x.report("DataFrame.from_sql") do
|
|
26
|
+
DaruLite::DataFrame.from_sql(ActiveRecord::Base.connection, "SELECT * FROM accounts")
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
x.report("DataFrame.from_activerecord") do
|
|
30
|
+
DaruLite::DataFrame.from_activerecord(Account.all)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
FileUtils.rm(db_name)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
|
|
6
|
+
# Check scaling
|
|
7
|
+
base_n = 10000
|
|
8
|
+
0.upto(2) do |iscale|
|
|
9
|
+
n = base_n * 2**iscale
|
|
10
|
+
|
|
11
|
+
df_h = ('a'..'z').map { |v| v.to_sym }.reduce({}) do |h, v|
|
|
12
|
+
h[v] = DaruLite::Vector.new(1.upto(n).to_a)
|
|
13
|
+
h
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
df = DaruLite::DataFrame.new(df_h)
|
|
17
|
+
|
|
18
|
+
Benchmark.bm do |bm|
|
|
19
|
+
bm.report("dupe (n=#{n})") do
|
|
20
|
+
df.dup
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# ===== Benchmarks =====
|
|
26
|
+
# System: iMac Late 2013 3.5GHz Core i7
|
|
27
|
+
#
|
|
28
|
+
# user system total real
|
|
29
|
+
#dupe (n=10000) 0.590000 0.020000 0.610000 ( 0.613648)
|
|
30
|
+
# user system total real
|
|
31
|
+
#dupe (n=20000) 1.170000 0.040000 1.210000 ( 1.236629)
|
|
32
|
+
# user system total real
|
|
33
|
+
#dupe (n=40000) 2.390000 0.070000 2.460000 ( 2.511199)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ===== Prior Benchmarks (Daru 0.1.2 - 2707559369c03894a8394714820aabf116b99b20 - 2016-04-25) =====
|
|
39
|
+
# Note that the n here is 100x smaller than above
|
|
40
|
+
# user system total real
|
|
41
|
+
#dupe (n=100) 0.220000 0.000000 0.220000 ( 0.227924)
|
|
42
|
+
# user system total real
|
|
43
|
+
#dupe (n=200) 0.850000 0.000000 0.850000 ( 0.856591)
|
|
44
|
+
# user system total real
|
|
45
|
+
#dupe (n=400) 3.370000 0.020000 3.390000 ( 3.428211)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
|
|
6
|
+
data = DaruLite::DataFrame.from_csv 'TradeoffData.csv'
|
|
7
|
+
|
|
8
|
+
Benchmark.bm do |x|
|
|
9
|
+
x.report("Single column grouping") do
|
|
10
|
+
@single = data.group_by(['Treatment'])
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
x.report("Multi-column grouping") do
|
|
14
|
+
@multi = data.group_by(['Group', 'Treatment'])
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
x.report("Single mean") do
|
|
18
|
+
@single.mean
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
x.report("Multi mean") do
|
|
22
|
+
@multi.mean
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# ===== Benchmarks =====
|
|
27
|
+
#
|
|
28
|
+
# user system total real
|
|
29
|
+
# Single column grouping 0.000000 0.000000 0.000000 (0.000340)
|
|
30
|
+
# Multi-column grouping 0.000000 0.000000 0.000000 (0.000855)
|
|
31
|
+
# Single mean 0.000000 0.000000 0.000000 (0.001208)
|
|
32
|
+
# Multi mean 0.000000 0.000000 0.000000 (0.004892)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
|
|
6
|
+
# Check scaling
|
|
7
|
+
base_n = 10000
|
|
8
|
+
0.upto(2) do |iscale|
|
|
9
|
+
n = base_n * 2**iscale
|
|
10
|
+
keys = (1..(n)).to_a
|
|
11
|
+
base_data = { idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]}}
|
|
12
|
+
lookup_hash = keys.map { |k| [k, k * 100]}.to_h
|
|
13
|
+
|
|
14
|
+
base_data_df = DaruLite::DataFrame.new(base_data)
|
|
15
|
+
lookup_df = DaruLite::DataFrame.new({ keys: lookup_hash.keys, values: lookup_hash.values })
|
|
16
|
+
|
|
17
|
+
Benchmark.bm do |bm|
|
|
18
|
+
bm.report("Inner join (n=#{n})") do
|
|
19
|
+
base_data_df.join(lookup_df, on: [:keys], how: :inner)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
bm.report("Outer join (n=#{n})") do
|
|
23
|
+
base_data_df.join(lookup_df, on: [:keys], how: :outer)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# ===== Benchmarks =====
|
|
29
|
+
# System: MacBook Pro Mid 2014 3GHz Core i7
|
|
30
|
+
#
|
|
31
|
+
# user system total real
|
|
32
|
+
#Inner join (n=10000) 0.170000 0.000000 0.170000 ( 0.182254)
|
|
33
|
+
#Outer join (n=10000) 0.200000 0.000000 0.200000 ( 0.203022)
|
|
34
|
+
# user system total real
|
|
35
|
+
#Inner join (n=20000) 0.380000 0.000000 0.380000 ( 0.387600)
|
|
36
|
+
#Outer join (n=20000) 0.410000 0.000000 0.410000 ( 0.415644)
|
|
37
|
+
# user system total real
|
|
38
|
+
#Inner join (n=40000) 0.720000 0.010000 0.730000 ( 0.743787)
|
|
39
|
+
#Outer join (n=40000) 0.810000 0.010000 0.820000 ( 0.840871)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ===== Prior Benchmarks (Daru 0.1.2 - prior to sorted merge algorithm) =====
|
|
43
|
+
# Note that the n here is 10x smaller than above
|
|
44
|
+
# user system total real
|
|
45
|
+
#Inner join (n=1000) 0.170000 0.010000 0.180000 ( 0.175585)
|
|
46
|
+
#Outer join (n=1000) 0.990000 0.000000 0.990000 ( 1.004305)
|
|
47
|
+
# user system total real
|
|
48
|
+
#Inner join (n=2000) 0.440000 0.010000 0.450000 ( 0.446748)
|
|
49
|
+
#Outer join (n=2000) 3.880000 0.010000 3.890000 ( 3.926399)
|
|
50
|
+
# user system total real
|
|
51
|
+
#Inner join (n=4000) 1.670000 0.010000 1.680000 ( 1.680742)
|
|
52
|
+
#Outer join (n=4000) 15.640000 0.060000 15.700000 ( 15.855202)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
|
|
6
|
+
df = DaruLite::DataFrame.new({
|
|
7
|
+
a: 100000.times.map { rand },
|
|
8
|
+
b: 100000.times.map { rand },
|
|
9
|
+
c: 100000.times.map { rand }
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
Benchmark.bm do |x|
|
|
13
|
+
x.report("Access single row") do
|
|
14
|
+
df.row[50]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
x.report("Access rows by comma") do
|
|
18
|
+
df.row[*(5..40000).to_a.shuffle]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
x.report("Individual rows") do
|
|
22
|
+
rows = []
|
|
23
|
+
index = (5..40000).to_a.shuffle
|
|
24
|
+
index.each do |a|
|
|
25
|
+
rows << df.row[a].to_a
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
DaruLite::DataFrame.rows(rows, order: [:a,:b,:c], index: index)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
x.report("Access rows by range") do
|
|
32
|
+
df.row[5..40000]
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# ==== Benchmarks ====
|
|
37
|
+
# user system total real
|
|
38
|
+
# Access single row 0.000000 0.000000 0.000000 ( 0.000059)
|
|
39
|
+
# Access rows by comma 1.410000 0.010000 1.420000 ( 1.420426)
|
|
40
|
+
# Individual rows 1.480000 0.000000 1.480000 ( 1.488531)
|
|
41
|
+
# Access rows by range 1.440000 0.010000 1.450000 ( 1.436750)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
|
|
6
|
+
df = DaruLite::DataFrame.new({
|
|
7
|
+
a: 100000.times.map { rand },
|
|
8
|
+
b: 100000.times.map { rand },
|
|
9
|
+
c: 100000.times.map { rand }
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
Benchmark.bm do |x|
|
|
13
|
+
x.report("Set a single row with Array") do
|
|
14
|
+
df.row[5] = [55,22,65]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
x.report("Set a single row with DaruLite::Vector") do
|
|
18
|
+
df.row[3456] = DaruLite::Vector.new([3,54,11], index: [:b,:e,:a])
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
x.report("Create a new row with Array") do
|
|
22
|
+
df.row[100001] = [34,66,11]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
x.report("Create a new row with DaruLite::Vector") do
|
|
26
|
+
df.row[100005] = DaruLite::Vector.new([34,66,11], index: [:a,:b,:t])
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# ==== Benchmarks ====
|
|
31
|
+
#
|
|
32
|
+
# user system total real
|
|
33
|
+
# Set a single row with Array 0.600000 0.000000 0.600000 ( 0.604718)
|
|
34
|
+
# Set a single row with DaruLite::Vector 0.600000 0.000000 0.600000 ( 0.598599)
|
|
35
|
+
# Create a new row with Array 0.840000 0.010000 0.850000 ( 0.858349)
|
|
36
|
+
# Create a new row with DaruLite::Vector 0.950000 0.000000 0.950000 ( 0.950725)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
|
|
6
|
+
vector = DaruLite::Vector.new(10000.times.map.to_a.shuffle)
|
|
7
|
+
df = DaruLite::DataFrame.new({
|
|
8
|
+
a: vector,
|
|
9
|
+
b: vector,
|
|
10
|
+
c: vector
|
|
11
|
+
})
|
|
12
|
+
Benchmark.bm do |x|
|
|
13
|
+
x.report("Sort a Vector without any args") do
|
|
14
|
+
vector.sort
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
x.report("Sort vector in descending order with custom <=> operator") do
|
|
18
|
+
vector.sort(ascending: false) { |a,b| a.to_s <=> b.to_s }
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
x.report("Sort single column of DataFrame") do
|
|
22
|
+
df.sort([:a])
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
x.report("Sort two columns of DataFrame") do
|
|
26
|
+
df.sort([:c,:a])
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
x.report("Sort two columns with custom operators in different orders of DataFrame") do
|
|
30
|
+
df.sort([:c,:a], ascending: [true, false],
|
|
31
|
+
by: { c: lambda { |a| a.to_s },
|
|
32
|
+
a: lambda { |a| a+1 } })
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# FIXME: MASSIVE SPEEDUP NECESSARY!
|
|
37
|
+
|
|
38
|
+
# ===== Benchamarks =====
|
|
39
|
+
# user system total real
|
|
40
|
+
# Sort a Vector without any args 0.130000 0.000000 0.130000 ( 0.128006)
|
|
41
|
+
# Sort vector in descending order with custom <=> operator 0.190000 0.000000 0.190000 ( 0.184604)
|
|
42
|
+
# Sort single column of DataFrame 2502.450000 0.000000 2502.450000 (2503.808073)
|
|
43
|
+
# Sort two columns of DataFrame 0.540000 0.000000 0.540000 ( 0.537670)
|
|
44
|
+
# Sort two columns with custom operators in different orders of DataFrame 2084.160000 7.260000 2091.420000 (2092.716603)
|
|
45
|
+
|
|
46
|
+
# ===== Current Benchamarks =====
|
|
47
|
+
# Sort a Vector without any args 0.070000 0.000000 0.070000 ( 0.070323)
|
|
48
|
+
# Sort vector in descending order with custom <=> operator 0.120000 0.000000 0.120000 ( 0.119462)
|
|
49
|
+
# Sort single column of DataFrame 0.940000 0.010000 0.950000 ( 0.950349)
|
|
50
|
+
# Sort two columns of DataFrame 1.490000 0.010000 1.500000 ( 1.505680)
|
|
51
|
+
# Sort two columns with custom operators in different orders of DataFrame 1.480000 0.000000 1.480000 ( 1.495839)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
require 'daru_lite'
|
|
2
|
+
require 'benchmark'
|
|
3
|
+
|
|
4
|
+
vector = DaruLite::Vector.new(
|
|
5
|
+
(10**6).times.map.to_a.shuffle,
|
|
6
|
+
missing_values: 100.times.map.to_a.shuffle
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
vector = DaruLite::Vector.new(
|
|
10
|
+
10000.times.map.to_a.shuffle,
|
|
11
|
+
missing_values: 100.times.map.to_a.shuffle,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
Benchmark.bm do |x|
|
|
15
|
+
x.report("Mean of a vector") do
|
|
16
|
+
vector.mean
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
x.report("Minimum of a vector") do
|
|
20
|
+
vector.min
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# ===== Benchmarks =====
|
|
25
|
+
#
|
|
26
|
+
# user system total real
|
|
27
|
+
# Mean of a vector 0.130000 0.010000 0.140000 ( 0.145534)
|
|
28
|
+
# Min of a vector 0.150000 0.000000 0.150000 ( 0.163623)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
|
|
6
|
+
df = DaruLite::DataFrame.new({
|
|
7
|
+
a: 10000.times.map { rand },
|
|
8
|
+
b: 10000.times.map { rand },
|
|
9
|
+
c: 10000.times.map { rand }
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
Benchmark.bm do |x|
|
|
13
|
+
x.report("Single Vector access") do
|
|
14
|
+
df[:a]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
x.report("Access as range") do
|
|
18
|
+
df[:a..:c]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
x.report("Access with commas") do
|
|
22
|
+
df[:a, :c]
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# ======== Benchmarks =======
|
|
27
|
+
#
|
|
28
|
+
# user system total real
|
|
29
|
+
# Single Vector access 0.000000 0.000000 0.000000 ( 0.000012)
|
|
30
|
+
# Access as range 0.090000 0.000000 0.090000 ( 0.084584)
|
|
31
|
+
# Access with commas 0.050000 0.000000 0.050000 ( 0.051951)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
|
|
6
|
+
df = DaruLite::DataFrame.new({
|
|
7
|
+
a: 100000.times.map { rand },
|
|
8
|
+
b: 100000.times.map { rand },
|
|
9
|
+
c: 100000.times.map { rand }
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
index = DaruLite::Index.new((0...100000).to_a.shuffle)
|
|
13
|
+
|
|
14
|
+
Benchmark.bm do |x|
|
|
15
|
+
x.report("Assign new vector as Array") do
|
|
16
|
+
df[:d] = 100000.times.map { rand }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
x.report("Reassign same vector as Array") do
|
|
20
|
+
df[:a] = 100000.times.map { rand }
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
x.report("Assign new Vector as DaruLite::Vector") do
|
|
24
|
+
df[:e] = DaruLite::Vector.new(100000.times.map { rand })
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
x.report("Reassign same Vector as DaruLite::Vector") do
|
|
28
|
+
df[:b] = DaruLite::Vector.new(100000.times.map { rand })
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
x.report("Reassgin differently indexed DaruLite::Vector") do
|
|
32
|
+
df[:b] = DaruLite::Vector.new(100000.times.map { rand }, index: index)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# ===== Benchmarks =====
|
|
37
|
+
# user system total real
|
|
38
|
+
# Assign new vector as Array 0.370000 0.000000 0.370000 (0.364515)
|
|
39
|
+
# Reassign same vector as Array 0.470000 0.000000 0.470000 (0.471408)
|
|
40
|
+
# Assign new Vector as DaruLite::Vector 0.940000 0.000000 0.940000 (0.947879)
|
|
41
|
+
# Reassign same Vector as DaruLite::Vector 0.760000 0.020000 0.780000 (0.769969)
|
|
42
|
+
# Reassgin differently indexed DaruLite::Vector <Too embarassingly slow.>
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
|
|
6
|
+
df = DaruLite::DataFrame.new({
|
|
7
|
+
a: 100000.times.map { |i| i },
|
|
8
|
+
b: 100000.times.map { |i| i },
|
|
9
|
+
c: 100000.times.map { |i| i }
|
|
10
|
+
}, index: DaruLite::Index.new(100000.times.map.to_a.shuffle))
|
|
11
|
+
|
|
12
|
+
puts "Benchmarking DataFrame#where\n"
|
|
13
|
+
Benchmark.bm do |x|
|
|
14
|
+
x.report("Basic one liner") do
|
|
15
|
+
df.where(df[:a].mt(2341))
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
x.report("Little complex statement") do
|
|
19
|
+
df.where(df[:a].lt(235) | df[:b].eq(2341) | df[:c].in([35,355,22]))
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
puts "Benchmarking Vector#where\n"
|
|
24
|
+
v = DaruLite::Vector.new(
|
|
25
|
+
100000.times.map { |i| i }, index: 100000.times.map.to_a.shuffle)
|
|
26
|
+
|
|
27
|
+
Benchmark.bm do |x|
|
|
28
|
+
x.report("Basic one liner") do
|
|
29
|
+
v.where(v.mteq(1000))
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
x.report("Little complex statement") do
|
|
33
|
+
v.where(v.lt(235) & v.eq(2341) | v.in([23,511,55]))
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# ====== Benchmarks ======
|
|
38
|
+
#
|
|
39
|
+
# Benchmarking DataFrame#where
|
|
40
|
+
#
|
|
41
|
+
# user system total real
|
|
42
|
+
# Basic one liner 0.700000 0.000000 0.700000 (0.703532)
|
|
43
|
+
# Little complex statement 0.120000 0.000000 0.120000 (0.121765)
|
|
44
|
+
#
|
|
45
|
+
# Benchmarking Vector#where
|
|
46
|
+
# user system total real
|
|
47
|
+
# Basic one liner 0.240000 0.000000 0.240000 (0.245787)
|
|
48
|
+
# Little complex statement 0.100000 0.000000 0.100000 (0.094423)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'benchmark'
|
|
4
|
+
require 'daru_lite'
|
|
5
|
+
|
|
6
|
+
df = DaruLite::DataFrame.new({
|
|
7
|
+
a: [1,2,3,4,5,6]*100,
|
|
8
|
+
b: ['a','b','c','d','e','f']*100,
|
|
9
|
+
c: [11,22,33,44,55,66]*100
|
|
10
|
+
}, index: (1..600).to_a.shuffle)
|
|
11
|
+
|
|
12
|
+
Benchmark.bm do |x|
|
|
13
|
+
x.report("where") do
|
|
14
|
+
df.where(df[:a].eq(2) | df[:c].eq(55))
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
x.report("filter_rows") do
|
|
18
|
+
df.filter(:row) do |r|
|
|
19
|
+
r[:a] == 2 or r[:c] == 55
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# ===== Benchmarks =====
|
|
25
|
+
#
|
|
26
|
+
# user system total real
|
|
27
|
+
# where 0.000000 0.000000 0.000000 ( 0.002575)
|
|
28
|
+
# filter_rows 0.210000 0.000000 0.210000 ( 0.205403)
|
data/daru_lite.gemspec
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
$:.unshift File.expand_path("../lib", __FILE__)
|
|
3
|
+
|
|
4
|
+
require 'daru_lite/version.rb'
|
|
5
|
+
|
|
6
|
+
DaruLite::DESCRIPTION = <<MSG
|
|
7
|
+
Daru (Data Analysis in RUby) is a library for analysis, manipulation and visualization
|
|
8
|
+
of data. Daru works seamlessly accross interpreters and leverages interpreter-specific
|
|
9
|
+
optimizations whenever they are available.
|
|
10
|
+
|
|
11
|
+
It is the default data storage gem for all the statsample gems (glm, timeseries, etc.)
|
|
12
|
+
and can be used with many others like mixed_models, gnuplotrb and iruby.
|
|
13
|
+
|
|
14
|
+
Daru Lite is a fork of Daru that aims to focus on data manipulation and stability.
|
|
15
|
+
MSG
|
|
16
|
+
|
|
17
|
+
Gem::Specification.new do |spec|
|
|
18
|
+
spec.name = 'daru_lite'
|
|
19
|
+
spec.version = DaruLite::VERSION
|
|
20
|
+
spec.authors = ['Thomas Naude-Filonnière', 'Maxime Lasserre', 'Julie Thomas', 'Amar Slaoua', 'Mourtada Belhantri']
|
|
21
|
+
spec.summary = %q{Data Analysis in RUby, stripped down}
|
|
22
|
+
spec.description = DaruLite::DESCRIPTION
|
|
23
|
+
spec.homepage = "https://github.com/pollandroll/daru"
|
|
24
|
+
spec.license = 'BSD-2-Clause'
|
|
25
|
+
|
|
26
|
+
spec.files = `git ls-files -z`.split("\x0")
|
|
27
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
28
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
29
|
+
spec.require_paths = ["lib"]
|
|
30
|
+
|
|
31
|
+
spec.add_development_dependency 'activerecord', '~> 6.0'
|
|
32
|
+
spec.add_development_dependency 'awesome_print', '~> 1.9.2'
|
|
33
|
+
spec.add_development_dependency 'bundler', '~> 2.1', '>= 2.1.4'
|
|
34
|
+
spec.add_development_dependency 'dbd-sqlite3', '~> 1.2.5'
|
|
35
|
+
spec.add_development_dependency 'dbi', '~> 0.4.5'
|
|
36
|
+
spec.add_development_dependency 'distribution', '~> 0.8'
|
|
37
|
+
spec.add_development_dependency 'matrix', '~> 0.4.2'
|
|
38
|
+
spec.add_development_dependency 'nokogiri', '~> 1.16.2'
|
|
39
|
+
spec.add_development_dependency 'prime', '~> 0.1.2'
|
|
40
|
+
spec.add_development_dependency 'pry', '~> 0.14'
|
|
41
|
+
spec.add_development_dependency 'pry-byebug', '~> 3.10.1'
|
|
42
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
|
43
|
+
spec.add_development_dependency 'rspec', '~> 3.11'
|
|
44
|
+
spec.add_development_dependency 'rspec-its', '~> 1.3.0'
|
|
45
|
+
spec.add_development_dependency 'rubocop', '~> 1.60'
|
|
46
|
+
spec.add_development_dependency 'rubocop-performance', '~> 1.20.2'
|
|
47
|
+
spec.add_development_dependency 'rubocop-rspec', '~> 2.25'
|
|
48
|
+
spec.add_development_dependency 'ruby-prof', '~> 1.7.0'
|
|
49
|
+
spec.add_development_dependency 'simplecov', '~> 0.22.0'
|
|
50
|
+
spec.add_development_dependency 'spreadsheet', '~> 1.3.0'
|
|
51
|
+
spec.add_development_dependency 'sqlite3', '~> 1.7.2'
|
|
52
|
+
# issue : https://github.com/SciRuby/daru/issues/493 occured
|
|
53
|
+
# with latest version of sqlite3
|
|
54
|
+
spec.add_development_dependency 'webmock', '~> 3.20.0'
|
|
55
|
+
end
|
data/images/README.md
ADDED
data/images/con0.png
ADDED
|
Binary file
|
data/images/con1.png
ADDED
|
Binary file
|
data/images/init0.png
ADDED
|
Binary file
|