daru_lite 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,65 @@
1
+ "Group","Treatment","Replicate","RelativeFitness"
2
+ "BKB","Tube",1,0.869962555792838
3
+ "BKB","Tube",2,1.00036299125423
4
+ "BKB","Tube",3,0.982935090384188
5
+ "BAC","Tube",1,0.810391635206191
6
+ "BAC","Tube",2,0.795106571577928
7
+ "JDK","Tube",1,0.849203581734814
8
+ "JDK","Tube",2,0.917636977577209
9
+ "JDK","Tube",3,0.905323024195181
10
+ "ETH","Tube",1,0.930820887284085
11
+ "ETH","Tube",2,0.958183317853959
12
+ "ETH","Tube",3,0.91428823286218
13
+ "SWI","Tube",1,0.918513532826773
14
+ "SWI","Tube",2,0.909023164202865
15
+ "SWI","Tube",3,0.928404818223812
16
+ "PPP","Tube",1,0.990500457882474
17
+ "PPP","Tube",2,0.97156464183624
18
+ "PPP","Tube",3,0.948766905642272
19
+ "ECO","Tube",1,0.996870763277447
20
+ "ECO","Tube",2,0.976750424056972
21
+ "ECO","Tube",3,0.939479247244218
22
+ "DOS","Tube",1,0.9461642789306
23
+ "DOS","Tube",2,0.951056617451754
24
+ "DOS","Tube",3,0.939563175900147
25
+ "FIT","Tube",1,0.942215594296048
26
+ "FIT","Tube",2,0.926698027408171
27
+ "FIT","Tube",3,1.00031750759377
28
+ "HHE","Tube",1,0.944414164259624
29
+ "HHE","Tube",2,0.95194592074032
30
+ "HHE","Tube",3,0.922485751593779
31
+ "H2W","Tube",1,0.915304498822462
32
+ "H2W","Tube",2,0.853906227834699
33
+ "H2W","Tube",3,0.938698147417512
34
+ "BKB","Dish",1,1.15254427639376
35
+ "BKB","Dish",2,1.28708514998039
36
+ "BKB","Dish",3,1.50741545998468
37
+ "BAC","Dish",2,1.65223419104399
38
+ "BAC","Dish",3,1.61502230247434
39
+ "JDK","Dish",1,1.5102682623396
40
+ "JDK","Dish",2,1.58270078151532
41
+ "JDK","Dish",3,1.54715152852448
42
+ "ETH","Dish",1,1.61281555981988
43
+ "ETH","Dish",2,1.36445572541848
44
+ "ETH","Dish",3,1.47155233724852
45
+ "SWI","Dish",1,1.36255560722945
46
+ "SWI","Dish",2,1.51569228710501
47
+ "SWI","Dish",3,1.47714121664773
48
+ "PPP","Dish",1,1.50896917413568
49
+ "PPP","Dish",2,1.58138592886817
50
+ "PPP","Dish",3,1.55356839081014
51
+ "ECO","Dish",1,1.69927564777656
52
+ "ECO","Dish",2,1.45605668065038
53
+ "ECO","Dish",3,1.52825762511041
54
+ "DOS","Dish",1,1.59453120031454
55
+ "DOS","Dish",2,1.58132511409296
56
+ "DOS","Dish",3,1.58558683941181
57
+ "FIT","Dish",1,1.01468578343171
58
+ "FIT","Dish",2,0.955220815085047
59
+ "FIT","Dish",3,1.03597298511451
60
+ "HHE","Dish",1,1.49399422139513
61
+ "HHE","Dish",2,1.32980278545457
62
+ "HHE","Dish",3,1.4505220676174
63
+ "H2W","Dish",1,1.52460143511286
64
+ "H2W","Dish",2,1.53958247554139
65
+ "H2W","Dish",3,1.51149897493835
@@ -0,0 +1,22 @@
1
+ # Date - 28 june 2016. daru version - 0.1.3.1
2
+ # Compare speed of Ruby stdlib CSV and DataFrame.from_csv.
3
+
4
+ require 'benchmark'
5
+ require 'csv'
6
+ require 'daru_lite'
7
+
8
+ Benchmark.bm do |x|
9
+ x.report("Ruby CSV") do
10
+ CSV.read("TradeoffData.csv")
11
+ end
12
+
13
+ x.report("DataFrame.from_csv") do
14
+ DaruLite::DataFrame.from_csv("TradeoffData.csv")
15
+ end
16
+ end
17
+
18
+ # FIXME: Improve this. It's 4 times slower than Ruby CSV reading!!
19
+
20
+ # user system total real
21
+ # Ruby CSV 0.010000 0.000000 0.010000 ( 0.002385)
22
+ # DataFrame.from_csv 0.000000 0.000000 0.000000 ( 0.008225)
@@ -0,0 +1,39 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ Benchmark.bm do |x|
7
+ x.report("Create with Arrays and clone") do
8
+ df = DaruLite::DataFrame.new({
9
+ a: 100000.times.map { rand },
10
+ b: 100000.times.map { rand },
11
+ c: 100000.times.map { rand }
12
+ })
13
+ end
14
+
15
+ x.report("Create with Vectors and clone") do
16
+ df = DaruLite::DataFrame.new({
17
+ a: DaruLite::Vector.new(100000.times.map { rand }),
18
+ b: DaruLite::Vector.new(100000.times.map { rand }),
19
+ c: DaruLite::Vector.new(100000.times.map { rand })
20
+ })
21
+ end
22
+
23
+ x.report("Create with Vector and dont clone") do
24
+ df = DaruLite::DataFrame.new({
25
+ a: DaruLite::Vector.new(100000.times.map { rand }),
26
+ b: DaruLite::Vector.new(100000.times.map { rand }),
27
+ c: DaruLite::Vector.new(100000.times.map { rand })
28
+ }, clone: false)
29
+ end
30
+
31
+ x.report("Create by row from Arrays") do
32
+ end
33
+ end
34
+
35
+ # ===== Benchmarks =====
36
+ # user system total real
37
+ # Create with Arrays and clone 0.940000 0.010000 0.950000 ( 0.959851)
38
+ # Create with Vectors and clone 1.950000 0.020000 1.970000 ( 1.966835)
39
+ # Create with Vector and dont clone 1.170000 0.000000 1.170000 ( 1.177132)
@@ -0,0 +1,34 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+ require 'sqlite3'
6
+ require 'dbi'
7
+ require 'active_record'
8
+
9
+ db_name = 'daru_lite_test.sqlite'
10
+ FileUtils.rm(db_name) if File.file?(db_name)
11
+
12
+ SQLite3::Database.new(db_name).tap do |db|
13
+ db.execute "create table accounts(id integer, name varchar, age integer, primary key(id))"
14
+
15
+ values = 1.upto(100_000).map { |i| %!(#{i},"name_#{i}",#{rand(100)})! }.join(",")
16
+ db.execute "insert into accounts values #{values}"
17
+ end
18
+
19
+ ActiveRecord::Base.establish_connection("sqlite3:#{db_name}")
20
+ ActiveRecord::Base.connection
21
+
22
+ class Account < ActiveRecord::Base; end
23
+
24
+ Benchmark.bm do |x|
25
+ x.report("DataFrame.from_sql") do
26
+ DaruLite::DataFrame.from_sql(ActiveRecord::Base.connection, "SELECT * FROM accounts")
27
+ end
28
+
29
+ x.report("DataFrame.from_activerecord") do
30
+ DaruLite::DataFrame.from_activerecord(Account.all)
31
+ end
32
+ end
33
+
34
+ FileUtils.rm(db_name)
@@ -0,0 +1,45 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ # Check scaling
7
+ base_n = 10000
8
+ 0.upto(2) do |iscale|
9
+ n = base_n * 2**iscale
10
+
11
+ df_h = ('a'..'z').map { |v| v.to_sym }.reduce({}) do |h, v|
12
+ h[v] = DaruLite::Vector.new(1.upto(n).to_a)
13
+ h
14
+ end
15
+
16
+ df = DaruLite::DataFrame.new(df_h)
17
+
18
+ Benchmark.bm do |bm|
19
+ bm.report("dupe (n=#{n})") do
20
+ df.dup
21
+ end
22
+ end
23
+ end
24
+
25
+ # ===== Benchmarks =====
26
+ # System: iMac Late 2013 3.5GHz Core i7
27
+ #
28
+ # user system total real
29
+ #dupe (n=10000) 0.590000 0.020000 0.610000 ( 0.613648)
30
+ # user system total real
31
+ #dupe (n=20000) 1.170000 0.040000 1.210000 ( 1.236629)
32
+ # user system total real
33
+ #dupe (n=40000) 2.390000 0.070000 2.460000 ( 2.511199)
34
+
35
+
36
+
37
+
38
+ # ===== Prior Benchmarks (Daru 0.1.2 - 2707559369c03894a8394714820aabf116b99b20 - 2016-04-25) =====
39
+ # Note that the n here is 100x smaller than above
40
+ # user system total real
41
+ #dupe (n=100) 0.220000 0.000000 0.220000 ( 0.227924)
42
+ # user system total real
43
+ #dupe (n=200) 0.850000 0.000000 0.850000 ( 0.856591)
44
+ # user system total real
45
+ #dupe (n=400) 3.370000 0.020000 3.390000 ( 3.428211)
@@ -0,0 +1,32 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ data = DaruLite::DataFrame.from_csv 'TradeoffData.csv'
7
+
8
+ Benchmark.bm do |x|
9
+ x.report("Single column grouping") do
10
+ @single = data.group_by(['Treatment'])
11
+ end
12
+
13
+ x.report("Multi-column grouping") do
14
+ @multi = data.group_by(['Group', 'Treatment'])
15
+ end
16
+
17
+ x.report("Single mean") do
18
+ @single.mean
19
+ end
20
+
21
+ x.report("Multi mean") do
22
+ @multi.mean
23
+ end
24
+ end
25
+
26
+ # ===== Benchmarks =====
27
+ #
28
+ # user system total real
29
+ # Single column grouping 0.000000 0.000000 0.000000 (0.000340)
30
+ # Multi-column grouping 0.000000 0.000000 0.000000 (0.000855)
31
+ # Single mean 0.000000 0.000000 0.000000 (0.001208)
32
+ # Multi mean 0.000000 0.000000 0.000000 (0.004892)
@@ -0,0 +1,52 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ # Check scaling
7
+ base_n = 10000
8
+ 0.upto(2) do |iscale|
9
+ n = base_n * 2**iscale
10
+ keys = (1..(n)).to_a
11
+ base_data = { idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]}}
12
+ lookup_hash = keys.map { |k| [k, k * 100]}.to_h
13
+
14
+ base_data_df = DaruLite::DataFrame.new(base_data)
15
+ lookup_df = DaruLite::DataFrame.new({ keys: lookup_hash.keys, values: lookup_hash.values })
16
+
17
+ Benchmark.bm do |bm|
18
+ bm.report("Inner join (n=#{n})") do
19
+ base_data_df.join(lookup_df, on: [:keys], how: :inner)
20
+ end
21
+
22
+ bm.report("Outer join (n=#{n})") do
23
+ base_data_df.join(lookup_df, on: [:keys], how: :outer)
24
+ end
25
+ end
26
+ end
27
+
28
+ # ===== Benchmarks =====
29
+ # System: MacBook Pro Mid 2014 3GHz Core i7
30
+ #
31
+ # user system total real
32
+ #Inner join (n=10000) 0.170000 0.000000 0.170000 ( 0.182254)
33
+ #Outer join (n=10000) 0.200000 0.000000 0.200000 ( 0.203022)
34
+ # user system total real
35
+ #Inner join (n=20000) 0.380000 0.000000 0.380000 ( 0.387600)
36
+ #Outer join (n=20000) 0.410000 0.000000 0.410000 ( 0.415644)
37
+ # user system total real
38
+ #Inner join (n=40000) 0.720000 0.010000 0.730000 ( 0.743787)
39
+ #Outer join (n=40000) 0.810000 0.010000 0.820000 ( 0.840871)
40
+
41
+
42
+ # ===== Prior Benchmarks (Daru 0.1.2 - prior to sorted merge algorithm) =====
43
+ # Note that the n here is 10x smaller than above
44
+ # user system total real
45
+ #Inner join (n=1000) 0.170000 0.010000 0.180000 ( 0.175585)
46
+ #Outer join (n=1000) 0.990000 0.000000 0.990000 ( 1.004305)
47
+ # user system total real
48
+ #Inner join (n=2000) 0.440000 0.010000 0.450000 ( 0.446748)
49
+ #Outer join (n=2000) 3.880000 0.010000 3.890000 ( 3.926399)
50
+ # user system total real
51
+ #Inner join (n=4000) 1.670000 0.010000 1.680000 ( 1.680742)
52
+ #Outer join (n=4000) 15.640000 0.060000 15.700000 ( 15.855202)
@@ -0,0 +1,41 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: 100000.times.map { rand },
8
+ b: 100000.times.map { rand },
9
+ c: 100000.times.map { rand }
10
+ })
11
+
12
+ Benchmark.bm do |x|
13
+ x.report("Access single row") do
14
+ df.row[50]
15
+ end
16
+
17
+ x.report("Access rows by comma") do
18
+ df.row[*(5..40000).to_a.shuffle]
19
+ end
20
+
21
+ x.report("Individual rows") do
22
+ rows = []
23
+ index = (5..40000).to_a.shuffle
24
+ index.each do |a|
25
+ rows << df.row[a].to_a
26
+ end
27
+
28
+ DaruLite::DataFrame.rows(rows, order: [:a,:b,:c], index: index)
29
+ end
30
+
31
+ x.report("Access rows by range") do
32
+ df.row[5..40000]
33
+ end
34
+ end
35
+
36
+ # ==== Benchmarks ====
37
+ # user system total real
38
+ # Access single row 0.000000 0.000000 0.000000 ( 0.000059)
39
+ # Access rows by comma 1.410000 0.010000 1.420000 ( 1.420426)
40
+ # Individual rows 1.480000 0.000000 1.480000 ( 1.488531)
41
+ # Access rows by range 1.440000 0.010000 1.450000 ( 1.436750)
@@ -0,0 +1,36 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: 100000.times.map { rand },
8
+ b: 100000.times.map { rand },
9
+ c: 100000.times.map { rand }
10
+ })
11
+
12
+ Benchmark.bm do |x|
13
+ x.report("Set a single row with Array") do
14
+ df.row[5] = [55,22,65]
15
+ end
16
+
17
+ x.report("Set a single row with DaruLite::Vector") do
18
+ df.row[3456] = DaruLite::Vector.new([3,54,11], index: [:b,:e,:a])
19
+ end
20
+
21
+ x.report("Create a new row with Array") do
22
+ df.row[100001] = [34,66,11]
23
+ end
24
+
25
+ x.report("Create a new row with DaruLite::Vector") do
26
+ df.row[100005] = DaruLite::Vector.new([34,66,11], index: [:a,:b,:t])
27
+ end
28
+ end
29
+
30
+ # ==== Benchmarks ====
31
+ #
32
+ # user system total real
33
+ # Set a single row with Array 0.600000 0.000000 0.600000 ( 0.604718)
34
+ # Set a single row with DaruLite::Vector 0.600000 0.000000 0.600000 ( 0.598599)
35
+ # Create a new row with Array 0.840000 0.010000 0.850000 ( 0.858349)
36
+ # Create a new row with DaruLite::Vector 0.950000 0.000000 0.950000 ( 0.950725)
@@ -0,0 +1,51 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ vector = DaruLite::Vector.new(10000.times.map.to_a.shuffle)
7
+ df = DaruLite::DataFrame.new({
8
+ a: vector,
9
+ b: vector,
10
+ c: vector
11
+ })
12
+ Benchmark.bm do |x|
13
+ x.report("Sort a Vector without any args") do
14
+ vector.sort
15
+ end
16
+
17
+ x.report("Sort vector in descending order with custom <=> operator") do
18
+ vector.sort(ascending: false) { |a,b| a.to_s <=> b.to_s }
19
+ end
20
+
21
+ x.report("Sort single column of DataFrame") do
22
+ df.sort([:a])
23
+ end
24
+
25
+ x.report("Sort two columns of DataFrame") do
26
+ df.sort([:c,:a])
27
+ end
28
+
29
+ x.report("Sort two columns with custom operators in different orders of DataFrame") do
30
+ df.sort([:c,:a], ascending: [true, false],
31
+ by: { c: lambda { |a| a.to_s },
32
+ a: lambda { |a| a+1 } })
33
+ end
34
+ end
35
+
36
+ # FIXME: MASSIVE SPEEDUP NECESSARY!
37
+
38
+ # ===== Benchamarks =====
39
+ # user system total real
40
+ # Sort a Vector without any args 0.130000 0.000000 0.130000 ( 0.128006)
41
+ # Sort vector in descending order with custom <=> operator 0.190000 0.000000 0.190000 ( 0.184604)
42
+ # Sort single column of DataFrame 2502.450000 0.000000 2502.450000 (2503.808073)
43
+ # Sort two columns of DataFrame 0.540000 0.000000 0.540000 ( 0.537670)
44
+ # Sort two columns with custom operators in different orders of DataFrame 2084.160000 7.260000 2091.420000 (2092.716603)
45
+
46
+ # ===== Current Benchamarks =====
47
+ # Sort a Vector without any args 0.070000 0.000000 0.070000 ( 0.070323)
48
+ # Sort vector in descending order with custom <=> operator 0.120000 0.000000 0.120000 ( 0.119462)
49
+ # Sort single column of DataFrame 0.940000 0.010000 0.950000 ( 0.950349)
50
+ # Sort two columns of DataFrame 1.490000 0.010000 1.500000 ( 1.505680)
51
+ # Sort two columns with custom operators in different orders of DataFrame 1.480000 0.000000 1.480000 ( 1.495839)
@@ -0,0 +1,28 @@
1
+ require 'daru_lite'
2
+ require 'benchmark'
3
+
4
+ vector = DaruLite::Vector.new(
5
+ (10**6).times.map.to_a.shuffle,
6
+ missing_values: 100.times.map.to_a.shuffle
7
+ )
8
+
9
+ vector = DaruLite::Vector.new(
10
+ 10000.times.map.to_a.shuffle,
11
+ missing_values: 100.times.map.to_a.shuffle,
12
+ )
13
+
14
+ Benchmark.bm do |x|
15
+ x.report("Mean of a vector") do
16
+ vector.mean
17
+ end
18
+
19
+ x.report("Minimum of a vector") do
20
+ vector.min
21
+ end
22
+ end
23
+
24
+ # ===== Benchmarks =====
25
+ #
26
+ # user system total real
27
+ # Mean of a vector 0.130000 0.010000 0.140000 ( 0.145534)
28
+ # Min of a vector 0.150000 0.000000 0.150000 ( 0.163623)
@@ -0,0 +1,31 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: 10000.times.map { rand },
8
+ b: 10000.times.map { rand },
9
+ c: 10000.times.map { rand }
10
+ })
11
+
12
+ Benchmark.bm do |x|
13
+ x.report("Single Vector access") do
14
+ df[:a]
15
+ end
16
+
17
+ x.report("Access as range") do
18
+ df[:a..:c]
19
+ end
20
+
21
+ x.report("Access with commas") do
22
+ df[:a, :c]
23
+ end
24
+ end
25
+
26
+ # ======== Benchmarks =======
27
+ #
28
+ # user system total real
29
+ # Single Vector access 0.000000 0.000000 0.000000 ( 0.000012)
30
+ # Access as range 0.090000 0.000000 0.090000 ( 0.084584)
31
+ # Access with commas 0.050000 0.000000 0.050000 ( 0.051951)
@@ -0,0 +1,42 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: 100000.times.map { rand },
8
+ b: 100000.times.map { rand },
9
+ c: 100000.times.map { rand }
10
+ })
11
+
12
+ index = DaruLite::Index.new((0...100000).to_a.shuffle)
13
+
14
+ Benchmark.bm do |x|
15
+ x.report("Assign new vector as Array") do
16
+ df[:d] = 100000.times.map { rand }
17
+ end
18
+
19
+ x.report("Reassign same vector as Array") do
20
+ df[:a] = 100000.times.map { rand }
21
+ end
22
+
23
+ x.report("Assign new Vector as DaruLite::Vector") do
24
+ df[:e] = DaruLite::Vector.new(100000.times.map { rand })
25
+ end
26
+
27
+ x.report("Reassign same Vector as DaruLite::Vector") do
28
+ df[:b] = DaruLite::Vector.new(100000.times.map { rand })
29
+ end
30
+
31
+ x.report("Reassgin differently indexed DaruLite::Vector") do
32
+ df[:b] = DaruLite::Vector.new(100000.times.map { rand }, index: index)
33
+ end
34
+ end
35
+
36
+ # ===== Benchmarks =====
37
+ # user system total real
38
+ # Assign new vector as Array 0.370000 0.000000 0.370000 (0.364515)
39
+ # Reassign same vector as Array 0.470000 0.000000 0.470000 (0.471408)
40
+ # Assign new Vector as DaruLite::Vector 0.940000 0.000000 0.940000 (0.947879)
41
+ # Reassign same Vector as DaruLite::Vector 0.760000 0.020000 0.780000 (0.769969)
42
+ # Reassgin differently indexed DaruLite::Vector <Too embarassingly slow.>
@@ -0,0 +1,48 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: 100000.times.map { |i| i },
8
+ b: 100000.times.map { |i| i },
9
+ c: 100000.times.map { |i| i }
10
+ }, index: DaruLite::Index.new(100000.times.map.to_a.shuffle))
11
+
12
+ puts "Benchmarking DataFrame#where\n"
13
+ Benchmark.bm do |x|
14
+ x.report("Basic one liner") do
15
+ df.where(df[:a].mt(2341))
16
+ end
17
+
18
+ x.report("Little complex statement") do
19
+ df.where(df[:a].lt(235) | df[:b].eq(2341) | df[:c].in([35,355,22]))
20
+ end
21
+ end
22
+
23
+ puts "Benchmarking Vector#where\n"
24
+ v = DaruLite::Vector.new(
25
+ 100000.times.map { |i| i }, index: 100000.times.map.to_a.shuffle)
26
+
27
+ Benchmark.bm do |x|
28
+ x.report("Basic one liner") do
29
+ v.where(v.mteq(1000))
30
+ end
31
+
32
+ x.report("Little complex statement") do
33
+ v.where(v.lt(235) & v.eq(2341) | v.in([23,511,55]))
34
+ end
35
+ end
36
+
37
+ # ====== Benchmarks ======
38
+ #
39
+ # Benchmarking DataFrame#where
40
+ #
41
+ # user system total real
42
+ # Basic one liner 0.700000 0.000000 0.700000 (0.703532)
43
+ # Little complex statement 0.120000 0.000000 0.120000 (0.121765)
44
+ #
45
+ # Benchmarking Vector#where
46
+ # user system total real
47
+ # Basic one liner 0.240000 0.000000 0.240000 (0.245787)
48
+ # Little complex statement 0.100000 0.000000 0.100000 (0.094423)
@@ -0,0 +1,28 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: [1,2,3,4,5,6]*100,
8
+ b: ['a','b','c','d','e','f']*100,
9
+ c: [11,22,33,44,55,66]*100
10
+ }, index: (1..600).to_a.shuffle)
11
+
12
+ Benchmark.bm do |x|
13
+ x.report("where") do
14
+ df.where(df[:a].eq(2) | df[:c].eq(55))
15
+ end
16
+
17
+ x.report("filter_rows") do
18
+ df.filter(:row) do |r|
19
+ r[:a] == 2 or r[:c] == 55
20
+ end
21
+ end
22
+ end
23
+
24
+ # ===== Benchmarks =====
25
+ #
26
+ # user system total real
27
+ # where 0.000000 0.000000 0.000000 ( 0.002575)
28
+ # filter_rows 0.210000 0.000000 0.210000 ( 0.205403)
data/daru_lite.gemspec ADDED
@@ -0,0 +1,55 @@
1
+ # coding: utf-8
2
+ $:.unshift File.expand_path("../lib", __FILE__)
3
+
4
+ require 'daru_lite/version.rb'
5
+
6
+ DaruLite::DESCRIPTION = <<MSG
7
+ Daru (Data Analysis in RUby) is a library for analysis, manipulation and visualization
8
+ of data. Daru works seamlessly accross interpreters and leverages interpreter-specific
9
+ optimizations whenever they are available.
10
+
11
+ It is the default data storage gem for all the statsample gems (glm, timeseries, etc.)
12
+ and can be used with many others like mixed_models, gnuplotrb and iruby.
13
+
14
+ Daru Lite is a fork of Daru that aims to focus on data manipulation and stability.
15
+ MSG
16
+
17
+ Gem::Specification.new do |spec|
18
+ spec.name = 'daru_lite'
19
+ spec.version = DaruLite::VERSION
20
+ spec.authors = ['Thomas Naude-Filonnière', 'Maxime Lasserre', 'Julie Thomas', 'Amar Slaoua', 'Mourtada Belhantri']
21
+ spec.summary = %q{Data Analysis in RUby, stripped down}
22
+ spec.description = DaruLite::DESCRIPTION
23
+ spec.homepage = "https://github.com/pollandroll/daru"
24
+ spec.license = 'BSD-2-Clause'
25
+
26
+ spec.files = `git ls-files -z`.split("\x0")
27
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
28
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
29
+ spec.require_paths = ["lib"]
30
+
31
+ spec.add_development_dependency 'activerecord', '~> 6.0'
32
+ spec.add_development_dependency 'awesome_print', '~> 1.9.2'
33
+ spec.add_development_dependency 'bundler', '~> 2.1', '>= 2.1.4'
34
+ spec.add_development_dependency 'dbd-sqlite3', '~> 1.2.5'
35
+ spec.add_development_dependency 'dbi', '~> 0.4.5'
36
+ spec.add_development_dependency 'distribution', '~> 0.8'
37
+ spec.add_development_dependency 'matrix', '~> 0.4.2'
38
+ spec.add_development_dependency 'nokogiri', '~> 1.16.2'
39
+ spec.add_development_dependency 'prime', '~> 0.1.2'
40
+ spec.add_development_dependency 'pry', '~> 0.14'
41
+ spec.add_development_dependency 'pry-byebug', '~> 3.10.1'
42
+ spec.add_development_dependency 'rake', '~> 13.0'
43
+ spec.add_development_dependency 'rspec', '~> 3.11'
44
+ spec.add_development_dependency 'rspec-its', '~> 1.3.0'
45
+ spec.add_development_dependency 'rubocop', '~> 1.60'
46
+ spec.add_development_dependency 'rubocop-performance', '~> 1.20.2'
47
+ spec.add_development_dependency 'rubocop-rspec', '~> 2.25'
48
+ spec.add_development_dependency 'ruby-prof', '~> 1.7.0'
49
+ spec.add_development_dependency 'simplecov', '~> 0.22.0'
50
+ spec.add_development_dependency 'spreadsheet', '~> 1.3.0'
51
+ spec.add_development_dependency 'sqlite3', '~> 1.7.2'
52
+ # issue : https://github.com/SciRuby/daru/issues/493 occured
53
+ # with latest version of sqlite3
54
+ spec.add_development_dependency 'webmock', '~> 3.20.0'
55
+ end
data/images/README.md ADDED
@@ -0,0 +1,5 @@
1
+ # Images
2
+
3
+ This folder contains images that are being used in the project README to display code examples.
4
+
5
+ Do not change any names of files.
data/images/con0.png ADDED
Binary file
data/images/con1.png ADDED
Binary file
data/images/init0.png ADDED
Binary file