daru_lite 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,65 @@
1
+ "Group","Treatment","Replicate","RelativeFitness"
2
+ "BKB","Tube",1,0.869962555792838
3
+ "BKB","Tube",2,1.00036299125423
4
+ "BKB","Tube",3,0.982935090384188
5
+ "BAC","Tube",1,0.810391635206191
6
+ "BAC","Tube",2,0.795106571577928
7
+ "JDK","Tube",1,0.849203581734814
8
+ "JDK","Tube",2,0.917636977577209
9
+ "JDK","Tube",3,0.905323024195181
10
+ "ETH","Tube",1,0.930820887284085
11
+ "ETH","Tube",2,0.958183317853959
12
+ "ETH","Tube",3,0.91428823286218
13
+ "SWI","Tube",1,0.918513532826773
14
+ "SWI","Tube",2,0.909023164202865
15
+ "SWI","Tube",3,0.928404818223812
16
+ "PPP","Tube",1,0.990500457882474
17
+ "PPP","Tube",2,0.97156464183624
18
+ "PPP","Tube",3,0.948766905642272
19
+ "ECO","Tube",1,0.996870763277447
20
+ "ECO","Tube",2,0.976750424056972
21
+ "ECO","Tube",3,0.939479247244218
22
+ "DOS","Tube",1,0.9461642789306
23
+ "DOS","Tube",2,0.951056617451754
24
+ "DOS","Tube",3,0.939563175900147
25
+ "FIT","Tube",1,0.942215594296048
26
+ "FIT","Tube",2,0.926698027408171
27
+ "FIT","Tube",3,1.00031750759377
28
+ "HHE","Tube",1,0.944414164259624
29
+ "HHE","Tube",2,0.95194592074032
30
+ "HHE","Tube",3,0.922485751593779
31
+ "H2W","Tube",1,0.915304498822462
32
+ "H2W","Tube",2,0.853906227834699
33
+ "H2W","Tube",3,0.938698147417512
34
+ "BKB","Dish",1,1.15254427639376
35
+ "BKB","Dish",2,1.28708514998039
36
+ "BKB","Dish",3,1.50741545998468
37
+ "BAC","Dish",2,1.65223419104399
38
+ "BAC","Dish",3,1.61502230247434
39
+ "JDK","Dish",1,1.5102682623396
40
+ "JDK","Dish",2,1.58270078151532
41
+ "JDK","Dish",3,1.54715152852448
42
+ "ETH","Dish",1,1.61281555981988
43
+ "ETH","Dish",2,1.36445572541848
44
+ "ETH","Dish",3,1.47155233724852
45
+ "SWI","Dish",1,1.36255560722945
46
+ "SWI","Dish",2,1.51569228710501
47
+ "SWI","Dish",3,1.47714121664773
48
+ "PPP","Dish",1,1.50896917413568
49
+ "PPP","Dish",2,1.58138592886817
50
+ "PPP","Dish",3,1.55356839081014
51
+ "ECO","Dish",1,1.69927564777656
52
+ "ECO","Dish",2,1.45605668065038
53
+ "ECO","Dish",3,1.52825762511041
54
+ "DOS","Dish",1,1.59453120031454
55
+ "DOS","Dish",2,1.58132511409296
56
+ "DOS","Dish",3,1.58558683941181
57
+ "FIT","Dish",1,1.01468578343171
58
+ "FIT","Dish",2,0.955220815085047
59
+ "FIT","Dish",3,1.03597298511451
60
+ "HHE","Dish",1,1.49399422139513
61
+ "HHE","Dish",2,1.32980278545457
62
+ "HHE","Dish",3,1.4505220676174
63
+ "H2W","Dish",1,1.52460143511286
64
+ "H2W","Dish",2,1.53958247554139
65
+ "H2W","Dish",3,1.51149897493835
@@ -0,0 +1,22 @@
1
+ # Date - 28 june 2016. daru version - 0.1.3.1
2
+ # Compare speed of Ruby stdlib CSV and DataFrame.from_csv.
3
+
4
+ require 'benchmark'
5
+ require 'csv'
6
+ require 'daru_lite'
7
+
8
+ Benchmark.bm do |x|
9
+ x.report("Ruby CSV") do
10
+ CSV.read("TradeoffData.csv")
11
+ end
12
+
13
+ x.report("DataFrame.from_csv") do
14
+ DaruLite::DataFrame.from_csv("TradeoffData.csv")
15
+ end
16
+ end
17
+
18
+ # FIXME: Improve this. It's 4 times slower than Ruby CSV reading!!
19
+
20
+ # user system total real
21
+ # Ruby CSV 0.010000 0.000000 0.010000 ( 0.002385)
22
+ # DataFrame.from_csv 0.000000 0.000000 0.000000 ( 0.008225)
@@ -0,0 +1,39 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ Benchmark.bm do |x|
7
+ x.report("Create with Arrays and clone") do
8
+ df = DaruLite::DataFrame.new({
9
+ a: 100000.times.map { rand },
10
+ b: 100000.times.map { rand },
11
+ c: 100000.times.map { rand }
12
+ })
13
+ end
14
+
15
+ x.report("Create with Vectors and clone") do
16
+ df = DaruLite::DataFrame.new({
17
+ a: DaruLite::Vector.new(100000.times.map { rand }),
18
+ b: DaruLite::Vector.new(100000.times.map { rand }),
19
+ c: DaruLite::Vector.new(100000.times.map { rand })
20
+ })
21
+ end
22
+
23
+ x.report("Create with Vector and dont clone") do
24
+ df = DaruLite::DataFrame.new({
25
+ a: DaruLite::Vector.new(100000.times.map { rand }),
26
+ b: DaruLite::Vector.new(100000.times.map { rand }),
27
+ c: DaruLite::Vector.new(100000.times.map { rand })
28
+ }, clone: false)
29
+ end
30
+
31
+ x.report("Create by row from Arrays") do
32
+ end
33
+ end
34
+
35
+ # ===== Benchmarks =====
36
+ # user system total real
37
+ # Create with Arrays and clone 0.940000 0.010000 0.950000 ( 0.959851)
38
+ # Create with Vectors and clone 1.950000 0.020000 1.970000 ( 1.966835)
39
+ # Create with Vector and dont clone 1.170000 0.000000 1.170000 ( 1.177132)
@@ -0,0 +1,34 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+ require 'sqlite3'
6
+ require 'dbi'
7
+ require 'active_record'
8
+
9
+ db_name = 'daru_lite_test.sqlite'
10
+ FileUtils.rm(db_name) if File.file?(db_name)
11
+
12
+ SQLite3::Database.new(db_name).tap do |db|
13
+ db.execute "create table accounts(id integer, name varchar, age integer, primary key(id))"
14
+
15
+ values = 1.upto(100_000).map { |i| %!(#{i},"name_#{i}",#{rand(100)})! }.join(",")
16
+ db.execute "insert into accounts values #{values}"
17
+ end
18
+
19
+ ActiveRecord::Base.establish_connection("sqlite3:#{db_name}")
20
+ ActiveRecord::Base.connection
21
+
22
+ class Account < ActiveRecord::Base; end
23
+
24
+ Benchmark.bm do |x|
25
+ x.report("DataFrame.from_sql") do
26
+ DaruLite::DataFrame.from_sql(ActiveRecord::Base.connection, "SELECT * FROM accounts")
27
+ end
28
+
29
+ x.report("DataFrame.from_activerecord") do
30
+ DaruLite::DataFrame.from_activerecord(Account.all)
31
+ end
32
+ end
33
+
34
+ FileUtils.rm(db_name)
@@ -0,0 +1,45 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ # Check scaling
7
+ base_n = 10000
8
+ 0.upto(2) do |iscale|
9
+ n = base_n * 2**iscale
10
+
11
+ df_h = ('a'..'z').map { |v| v.to_sym }.reduce({}) do |h, v|
12
+ h[v] = DaruLite::Vector.new(1.upto(n).to_a)
13
+ h
14
+ end
15
+
16
+ df = DaruLite::DataFrame.new(df_h)
17
+
18
+ Benchmark.bm do |bm|
19
+ bm.report("dupe (n=#{n})") do
20
+ df.dup
21
+ end
22
+ end
23
+ end
24
+
25
+ # ===== Benchmarks =====
26
+ # System: iMac Late 2013 3.5GHz Core i7
27
+ #
28
+ # user system total real
29
+ #dupe (n=10000) 0.590000 0.020000 0.610000 ( 0.613648)
30
+ # user system total real
31
+ #dupe (n=20000) 1.170000 0.040000 1.210000 ( 1.236629)
32
+ # user system total real
33
+ #dupe (n=40000) 2.390000 0.070000 2.460000 ( 2.511199)
34
+
35
+
36
+
37
+
38
+ # ===== Prior Benchmarks (Daru 0.1.2 - 2707559369c03894a8394714820aabf116b99b20 - 2016-04-25) =====
39
+ # Note that the n here is 100x smaller than above
40
+ # user system total real
41
+ #dupe (n=100) 0.220000 0.000000 0.220000 ( 0.227924)
42
+ # user system total real
43
+ #dupe (n=200) 0.850000 0.000000 0.850000 ( 0.856591)
44
+ # user system total real
45
+ #dupe (n=400) 3.370000 0.020000 3.390000 ( 3.428211)
@@ -0,0 +1,32 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ data = DaruLite::DataFrame.from_csv 'TradeoffData.csv'
7
+
8
+ Benchmark.bm do |x|
9
+ x.report("Single column grouping") do
10
+ @single = data.group_by(['Treatment'])
11
+ end
12
+
13
+ x.report("Multi-column grouping") do
14
+ @multi = data.group_by(['Group', 'Treatment'])
15
+ end
16
+
17
+ x.report("Single mean") do
18
+ @single.mean
19
+ end
20
+
21
+ x.report("Multi mean") do
22
+ @multi.mean
23
+ end
24
+ end
25
+
26
+ # ===== Benchmarks =====
27
+ #
28
+ # user system total real
29
+ # Single column grouping 0.000000 0.000000 0.000000 (0.000340)
30
+ # Multi-column grouping 0.000000 0.000000 0.000000 (0.000855)
31
+ # Single mean 0.000000 0.000000 0.000000 (0.001208)
32
+ # Multi mean 0.000000 0.000000 0.000000 (0.004892)
@@ -0,0 +1,52 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ # Check scaling
7
+ base_n = 10000
8
+ 0.upto(2) do |iscale|
9
+ n = base_n * 2**iscale
10
+ keys = (1..(n)).to_a
11
+ base_data = { idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]}}
12
+ lookup_hash = keys.map { |k| [k, k * 100]}.to_h
13
+
14
+ base_data_df = DaruLite::DataFrame.new(base_data)
15
+ lookup_df = DaruLite::DataFrame.new({ keys: lookup_hash.keys, values: lookup_hash.values })
16
+
17
+ Benchmark.bm do |bm|
18
+ bm.report("Inner join (n=#{n})") do
19
+ base_data_df.join(lookup_df, on: [:keys], how: :inner)
20
+ end
21
+
22
+ bm.report("Outer join (n=#{n})") do
23
+ base_data_df.join(lookup_df, on: [:keys], how: :outer)
24
+ end
25
+ end
26
+ end
27
+
28
+ # ===== Benchmarks =====
29
+ # System: MacBook Pro Mid 2014 3GHz Core i7
30
+ #
31
+ # user system total real
32
+ #Inner join (n=10000) 0.170000 0.000000 0.170000 ( 0.182254)
33
+ #Outer join (n=10000) 0.200000 0.000000 0.200000 ( 0.203022)
34
+ # user system total real
35
+ #Inner join (n=20000) 0.380000 0.000000 0.380000 ( 0.387600)
36
+ #Outer join (n=20000) 0.410000 0.000000 0.410000 ( 0.415644)
37
+ # user system total real
38
+ #Inner join (n=40000) 0.720000 0.010000 0.730000 ( 0.743787)
39
+ #Outer join (n=40000) 0.810000 0.010000 0.820000 ( 0.840871)
40
+
41
+
42
+ # ===== Prior Benchmarks (Daru 0.1.2 - prior to sorted merge algorithm) =====
43
+ # Note that the n here is 10x smaller than above
44
+ # user system total real
45
+ #Inner join (n=1000) 0.170000 0.010000 0.180000 ( 0.175585)
46
+ #Outer join (n=1000) 0.990000 0.000000 0.990000 ( 1.004305)
47
+ # user system total real
48
+ #Inner join (n=2000) 0.440000 0.010000 0.450000 ( 0.446748)
49
+ #Outer join (n=2000) 3.880000 0.010000 3.890000 ( 3.926399)
50
+ # user system total real
51
+ #Inner join (n=4000) 1.670000 0.010000 1.680000 ( 1.680742)
52
+ #Outer join (n=4000) 15.640000 0.060000 15.700000 ( 15.855202)
@@ -0,0 +1,41 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: 100000.times.map { rand },
8
+ b: 100000.times.map { rand },
9
+ c: 100000.times.map { rand }
10
+ })
11
+
12
+ Benchmark.bm do |x|
13
+ x.report("Access single row") do
14
+ df.row[50]
15
+ end
16
+
17
+ x.report("Access rows by comma") do
18
+ df.row[*(5..40000).to_a.shuffle]
19
+ end
20
+
21
+ x.report("Individual rows") do
22
+ rows = []
23
+ index = (5..40000).to_a.shuffle
24
+ index.each do |a|
25
+ rows << df.row[a].to_a
26
+ end
27
+
28
+ DaruLite::DataFrame.rows(rows, order: [:a,:b,:c], index: index)
29
+ end
30
+
31
+ x.report("Access rows by range") do
32
+ df.row[5..40000]
33
+ end
34
+ end
35
+
36
+ # ==== Benchmarks ====
37
+ # user system total real
38
+ # Access single row 0.000000 0.000000 0.000000 ( 0.000059)
39
+ # Access rows by comma 1.410000 0.010000 1.420000 ( 1.420426)
40
+ # Individual rows 1.480000 0.000000 1.480000 ( 1.488531)
41
+ # Access rows by range 1.440000 0.010000 1.450000 ( 1.436750)
@@ -0,0 +1,36 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: 100000.times.map { rand },
8
+ b: 100000.times.map { rand },
9
+ c: 100000.times.map { rand }
10
+ })
11
+
12
+ Benchmark.bm do |x|
13
+ x.report("Set a single row with Array") do
14
+ df.row[5] = [55,22,65]
15
+ end
16
+
17
+ x.report("Set a single row with DaruLite::Vector") do
18
+ df.row[3456] = DaruLite::Vector.new([3,54,11], index: [:b,:e,:a])
19
+ end
20
+
21
+ x.report("Create a new row with Array") do
22
+ df.row[100001] = [34,66,11]
23
+ end
24
+
25
+ x.report("Create a new row with DaruLite::Vector") do
26
+ df.row[100005] = DaruLite::Vector.new([34,66,11], index: [:a,:b,:t])
27
+ end
28
+ end
29
+
30
+ # ==== Benchmarks ====
31
+ #
32
+ # user system total real
33
+ # Set a single row with Array 0.600000 0.000000 0.600000 ( 0.604718)
34
+ # Set a single row with DaruLite::Vector 0.600000 0.000000 0.600000 ( 0.598599)
35
+ # Create a new row with Array 0.840000 0.010000 0.850000 ( 0.858349)
36
+ # Create a new row with DaruLite::Vector 0.950000 0.000000 0.950000 ( 0.950725)
@@ -0,0 +1,51 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ vector = DaruLite::Vector.new(10000.times.map.to_a.shuffle)
7
+ df = DaruLite::DataFrame.new({
8
+ a: vector,
9
+ b: vector,
10
+ c: vector
11
+ })
12
+ Benchmark.bm do |x|
13
+ x.report("Sort a Vector without any args") do
14
+ vector.sort
15
+ end
16
+
17
+ x.report("Sort vector in descending order with custom <=> operator") do
18
+ vector.sort(ascending: false) { |a,b| a.to_s <=> b.to_s }
19
+ end
20
+
21
+ x.report("Sort single column of DataFrame") do
22
+ df.sort([:a])
23
+ end
24
+
25
+ x.report("Sort two columns of DataFrame") do
26
+ df.sort([:c,:a])
27
+ end
28
+
29
+ x.report("Sort two columns with custom operators in different orders of DataFrame") do
30
+ df.sort([:c,:a], ascending: [true, false],
31
+ by: { c: lambda { |a| a.to_s },
32
+ a: lambda { |a| a+1 } })
33
+ end
34
+ end
35
+
36
+ # FIXME: MASSIVE SPEEDUP NECESSARY!
37
+
38
+ # ===== Benchamarks =====
39
+ # user system total real
40
+ # Sort a Vector without any args 0.130000 0.000000 0.130000 ( 0.128006)
41
+ # Sort vector in descending order with custom <=> operator 0.190000 0.000000 0.190000 ( 0.184604)
42
+ # Sort single column of DataFrame 2502.450000 0.000000 2502.450000 (2503.808073)
43
+ # Sort two columns of DataFrame 0.540000 0.000000 0.540000 ( 0.537670)
44
+ # Sort two columns with custom operators in different orders of DataFrame 2084.160000 7.260000 2091.420000 (2092.716603)
45
+
46
+ # ===== Current Benchamarks =====
47
+ # Sort a Vector without any args 0.070000 0.000000 0.070000 ( 0.070323)
48
+ # Sort vector in descending order with custom <=> operator 0.120000 0.000000 0.120000 ( 0.119462)
49
+ # Sort single column of DataFrame 0.940000 0.010000 0.950000 ( 0.950349)
50
+ # Sort two columns of DataFrame 1.490000 0.010000 1.500000 ( 1.505680)
51
+ # Sort two columns with custom operators in different orders of DataFrame 1.480000 0.000000 1.480000 ( 1.495839)
@@ -0,0 +1,28 @@
1
+ require 'daru_lite'
2
+ require 'benchmark'
3
+
4
+ vector = DaruLite::Vector.new(
5
+ (10**6).times.map.to_a.shuffle,
6
+ missing_values: 100.times.map.to_a.shuffle
7
+ )
8
+
9
+ vector = DaruLite::Vector.new(
10
+ 10000.times.map.to_a.shuffle,
11
+ missing_values: 100.times.map.to_a.shuffle,
12
+ )
13
+
14
+ Benchmark.bm do |x|
15
+ x.report("Mean of a vector") do
16
+ vector.mean
17
+ end
18
+
19
+ x.report("Minimum of a vector") do
20
+ vector.min
21
+ end
22
+ end
23
+
24
+ # ===== Benchmarks =====
25
+ #
26
+ # user system total real
27
+ # Mean of a vector 0.130000 0.010000 0.140000 ( 0.145534)
28
+ # Min of a vector 0.150000 0.000000 0.150000 ( 0.163623)
@@ -0,0 +1,31 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: 10000.times.map { rand },
8
+ b: 10000.times.map { rand },
9
+ c: 10000.times.map { rand }
10
+ })
11
+
12
+ Benchmark.bm do |x|
13
+ x.report("Single Vector access") do
14
+ df[:a]
15
+ end
16
+
17
+ x.report("Access as range") do
18
+ df[:a..:c]
19
+ end
20
+
21
+ x.report("Access with commas") do
22
+ df[:a, :c]
23
+ end
24
+ end
25
+
26
+ # ======== Benchmarks =======
27
+ #
28
+ # user system total real
29
+ # Single Vector access 0.000000 0.000000 0.000000 ( 0.000012)
30
+ # Access as range 0.090000 0.000000 0.090000 ( 0.084584)
31
+ # Access with commas 0.050000 0.000000 0.050000 ( 0.051951)
@@ -0,0 +1,42 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: 100000.times.map { rand },
8
+ b: 100000.times.map { rand },
9
+ c: 100000.times.map { rand }
10
+ })
11
+
12
+ index = DaruLite::Index.new((0...100000).to_a.shuffle)
13
+
14
+ Benchmark.bm do |x|
15
+ x.report("Assign new vector as Array") do
16
+ df[:d] = 100000.times.map { rand }
17
+ end
18
+
19
+ x.report("Reassign same vector as Array") do
20
+ df[:a] = 100000.times.map { rand }
21
+ end
22
+
23
+ x.report("Assign new Vector as DaruLite::Vector") do
24
+ df[:e] = DaruLite::Vector.new(100000.times.map { rand })
25
+ end
26
+
27
+ x.report("Reassign same Vector as DaruLite::Vector") do
28
+ df[:b] = DaruLite::Vector.new(100000.times.map { rand })
29
+ end
30
+
31
+ x.report("Reassgin differently indexed DaruLite::Vector") do
32
+ df[:b] = DaruLite::Vector.new(100000.times.map { rand }, index: index)
33
+ end
34
+ end
35
+
36
+ # ===== Benchmarks =====
37
+ # user system total real
38
+ # Assign new vector as Array 0.370000 0.000000 0.370000 (0.364515)
39
+ # Reassign same vector as Array 0.470000 0.000000 0.470000 (0.471408)
40
+ # Assign new Vector as DaruLite::Vector 0.940000 0.000000 0.940000 (0.947879)
41
+ # Reassign same Vector as DaruLite::Vector 0.760000 0.020000 0.780000 (0.769969)
42
+ # Reassgin differently indexed DaruLite::Vector <Too embarassingly slow.>
@@ -0,0 +1,48 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: 100000.times.map { |i| i },
8
+ b: 100000.times.map { |i| i },
9
+ c: 100000.times.map { |i| i }
10
+ }, index: DaruLite::Index.new(100000.times.map.to_a.shuffle))
11
+
12
+ puts "Benchmarking DataFrame#where\n"
13
+ Benchmark.bm do |x|
14
+ x.report("Basic one liner") do
15
+ df.where(df[:a].mt(2341))
16
+ end
17
+
18
+ x.report("Little complex statement") do
19
+ df.where(df[:a].lt(235) | df[:b].eq(2341) | df[:c].in([35,355,22]))
20
+ end
21
+ end
22
+
23
+ puts "Benchmarking Vector#where\n"
24
+ v = DaruLite::Vector.new(
25
+ 100000.times.map { |i| i }, index: 100000.times.map.to_a.shuffle)
26
+
27
+ Benchmark.bm do |x|
28
+ x.report("Basic one liner") do
29
+ v.where(v.mteq(1000))
30
+ end
31
+
32
+ x.report("Little complex statement") do
33
+ v.where(v.lt(235) & v.eq(2341) | v.in([23,511,55]))
34
+ end
35
+ end
36
+
37
+ # ====== Benchmarks ======
38
+ #
39
+ # Benchmarking DataFrame#where
40
+ #
41
+ # user system total real
42
+ # Basic one liner 0.700000 0.000000 0.700000 (0.703532)
43
+ # Little complex statement 0.120000 0.000000 0.120000 (0.121765)
44
+ #
45
+ # Benchmarking Vector#where
46
+ # user system total real
47
+ # Basic one liner 0.240000 0.000000 0.240000 (0.245787)
48
+ # Little complex statement 0.100000 0.000000 0.100000 (0.094423)
@@ -0,0 +1,28 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru_lite'
5
+
6
+ df = DaruLite::DataFrame.new({
7
+ a: [1,2,3,4,5,6]*100,
8
+ b: ['a','b','c','d','e','f']*100,
9
+ c: [11,22,33,44,55,66]*100
10
+ }, index: (1..600).to_a.shuffle)
11
+
12
+ Benchmark.bm do |x|
13
+ x.report("where") do
14
+ df.where(df[:a].eq(2) | df[:c].eq(55))
15
+ end
16
+
17
+ x.report("filter_rows") do
18
+ df.filter(:row) do |r|
19
+ r[:a] == 2 or r[:c] == 55
20
+ end
21
+ end
22
+ end
23
+
24
+ # ===== Benchmarks =====
25
+ #
26
+ # user system total real
27
+ # where 0.000000 0.000000 0.000000 ( 0.002575)
28
+ # filter_rows 0.210000 0.000000 0.210000 ( 0.205403)
data/daru_lite.gemspec ADDED
@@ -0,0 +1,55 @@
1
+ # coding: utf-8
2
+ $:.unshift File.expand_path("../lib", __FILE__)
3
+
4
+ require 'daru_lite/version.rb'
5
+
6
+ DaruLite::DESCRIPTION = <<MSG
7
+ Daru (Data Analysis in RUby) is a library for analysis, manipulation and visualization
8
+ of data. Daru works seamlessly accross interpreters and leverages interpreter-specific
9
+ optimizations whenever they are available.
10
+
11
+ It is the default data storage gem for all the statsample gems (glm, timeseries, etc.)
12
+ and can be used with many others like mixed_models, gnuplotrb and iruby.
13
+
14
+ Daru Lite is a fork of Daru that aims to focus on data manipulation and stability.
15
+ MSG
16
+
17
+ Gem::Specification.new do |spec|
18
+ spec.name = 'daru_lite'
19
+ spec.version = DaruLite::VERSION
20
+ spec.authors = ['Thomas Naude-Filonnière', 'Maxime Lasserre', 'Julie Thomas', 'Amar Slaoua', 'Mourtada Belhantri']
21
+ spec.summary = %q{Data Analysis in RUby, stripped down}
22
+ spec.description = DaruLite::DESCRIPTION
23
+ spec.homepage = "https://github.com/pollandroll/daru"
24
+ spec.license = 'BSD-2-Clause'
25
+
26
+ spec.files = `git ls-files -z`.split("\x0")
27
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
28
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
29
+ spec.require_paths = ["lib"]
30
+
31
+ spec.add_development_dependency 'activerecord', '~> 6.0'
32
+ spec.add_development_dependency 'awesome_print', '~> 1.9.2'
33
+ spec.add_development_dependency 'bundler', '~> 2.1', '>= 2.1.4'
34
+ spec.add_development_dependency 'dbd-sqlite3', '~> 1.2.5'
35
+ spec.add_development_dependency 'dbi', '~> 0.4.5'
36
+ spec.add_development_dependency 'distribution', '~> 0.8'
37
+ spec.add_development_dependency 'matrix', '~> 0.4.2'
38
+ spec.add_development_dependency 'nokogiri', '~> 1.16.2'
39
+ spec.add_development_dependency 'prime', '~> 0.1.2'
40
+ spec.add_development_dependency 'pry', '~> 0.14'
41
+ spec.add_development_dependency 'pry-byebug', '~> 3.10.1'
42
+ spec.add_development_dependency 'rake', '~> 13.0'
43
+ spec.add_development_dependency 'rspec', '~> 3.11'
44
+ spec.add_development_dependency 'rspec-its', '~> 1.3.0'
45
+ spec.add_development_dependency 'rubocop', '~> 1.60'
46
+ spec.add_development_dependency 'rubocop-performance', '~> 1.20.2'
47
+ spec.add_development_dependency 'rubocop-rspec', '~> 2.25'
48
+ spec.add_development_dependency 'ruby-prof', '~> 1.7.0'
49
+ spec.add_development_dependency 'simplecov', '~> 0.22.0'
50
+ spec.add_development_dependency 'spreadsheet', '~> 1.3.0'
51
+ spec.add_development_dependency 'sqlite3', '~> 1.7.2'
52
+ # issue : https://github.com/SciRuby/daru/issues/493 occured
53
+ # with latest version of sqlite3
54
+ spec.add_development_dependency 'webmock', '~> 3.20.0'
55
+ end
data/images/README.md ADDED
@@ -0,0 +1,5 @@
1
+ # Images
2
+
3
+ This folder contains images that are being used in the project README to display code examples.
4
+
5
+ Do not change any names of files.
data/images/con0.png ADDED
Binary file
data/images/con1.png ADDED
Binary file
data/images/init0.png ADDED
Binary file