daru 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +6 -6
  3. data/.gitignore +2 -0
  4. data/CONTRIBUTING.md +7 -3
  5. data/History.md +36 -0
  6. data/README.md +21 -13
  7. data/Rakefile +16 -1
  8. data/benchmarks/TradeoffData.csv +65 -0
  9. data/benchmarks/dataframe_creation.rb +39 -0
  10. data/benchmarks/group_by.rb +32 -0
  11. data/benchmarks/row_access.rb +41 -0
  12. data/benchmarks/row_assign.rb +36 -0
  13. data/benchmarks/sorting.rb +44 -0
  14. data/benchmarks/vector_access.rb +31 -0
  15. data/benchmarks/vector_assign.rb +42 -0
  16. data/benchmarks/where_clause.rb +48 -0
  17. data/benchmarks/where_vs_filter.rb +28 -0
  18. data/daru.gemspec +29 -5
  19. data/lib/daru.rb +30 -1
  20. data/lib/daru/accessors/array_wrapper.rb +2 -2
  21. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
  22. data/lib/daru/core/group_by.rb +112 -31
  23. data/lib/daru/core/merge.rb +170 -0
  24. data/lib/daru/core/query.rb +95 -0
  25. data/lib/daru/dataframe.rb +335 -223
  26. data/lib/daru/date_time/index.rb +550 -0
  27. data/lib/daru/date_time/offsets.rb +397 -0
  28. data/lib/daru/index.rb +266 -54
  29. data/lib/daru/io/io.rb +1 -2
  30. data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
  31. data/lib/daru/maths/arithmetic/vector.rb +2 -2
  32. data/lib/daru/maths/statistics/dataframe.rb +58 -8
  33. data/lib/daru/maths/statistics/vector.rb +229 -0
  34. data/lib/daru/vector.rb +230 -80
  35. data/lib/daru/version.rb +1 -1
  36. data/spec/core/group_by_spec.rb +16 -16
  37. data/spec/core/merge_spec.rb +52 -0
  38. data/spec/core/query_spec.rb +171 -0
  39. data/spec/dataframe_spec.rb +278 -280
  40. data/spec/date_time/data_spec.rb +199 -0
  41. data/spec/date_time/index_spec.rb +433 -0
  42. data/spec/date_time/offsets_spec.rb +371 -0
  43. data/spec/fixtures/stock_data.csv +500 -0
  44. data/spec/index_spec.rb +317 -11
  45. data/spec/io/io_spec.rb +18 -17
  46. data/spec/math/arithmetic/dataframe_spec.rb +3 -3
  47. data/spec/math/statistics/dataframe_spec.rb +39 -1
  48. data/spec/math/statistics/vector_spec.rb +163 -1
  49. data/spec/monkeys_spec.rb +4 -0
  50. data/spec/spec_helper.rb +3 -0
  51. data/spec/vector_spec.rb +125 -60
  52. metadata +71 -14
  53. data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
  54. data/lib/daru/multi_index.rb +0 -216
  55. data/spec/multi_index_spec.rb +0 -216
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e48778067b94afc9f1060d7d6d4212029b421f2
4
- data.tar.gz: 5d0ed9cc2fcf70562e0fcf2767c593e1f8fbfa54
3
+ metadata.gz: 91fd17361715d81a4eda319e4695bc53a44979de
4
+ data.tar.gz: 8768b7c62619d4f8446e8167a95990ee59553dde
5
5
  SHA512:
6
- metadata.gz: 778ad55b592865e08388eac0001cdbce6bc01f58fa77ed8e2a2b72e44d8a54fc2d289f241352affd98b6424326416634ef729889163175f9eb64c83e471fb7e2
7
- data.tar.gz: c498252daf63597adc0255810d3eb7b60c102ef086117bf451de821faaa8e196933570b06f88e6415b51c1ef6ea2ee6f60afce18b65691483741158954c73d0b
6
+ metadata.gz: bd7d40bb2e1b7ed2f4ea5a598556e8a58f5bf35c2fcbe895150a21752b4ca223ca184b6c7c6e1c1c30bf753df478578beec1d99aff2c31d4d7cfe2520013c47d
7
+ data.tar.gz: 264425a5bcd87e2eca1261792d3adf16a497ac20ec1c9c04743cafc84e9f4335525a2c289d36be63b9f88d8fcf19f84cc70824b6ada29ce3105872bc20231d18
data/.build.sh CHANGED
@@ -6,9 +6,9 @@ gem build nmatrix.gemspec
6
6
  gem install nmatrix-0.1.0.gem
7
7
  cd ..
8
8
  rm -rf nmatrix
9
- git clone https://github.com/v0dro/gsl-nmatrix
10
- cd gsl-nmatrix
11
- gem build gsl-nmatrix.gemspec
12
- gem install gsl-nmatrix-1.17.gem
13
- cd ..
14
- rm -rf gsl-nmatrix
9
+ # git clone https://github.com/v0dro/gsl-nmatrix
10
+ # cd gsl-nmatrix
11
+ # gem build gsl-nmatrix.gemspec
12
+ # gem install gsl-nmatrix-1.17.gem
13
+ # cd ..
14
+ # rm -rf gsl-nmatrix
data/.gitignore CHANGED
@@ -1,2 +1,4 @@
1
1
  *.gem
2
2
  Gemfile.lock
3
+ doc/
4
+ .yardoc/
@@ -2,9 +2,9 @@
2
2
 
3
3
  ## Installing daru development dependencies
4
4
 
5
- If you want to run the full rspec suite, you will need the latest unreleased nmatrix and gsl-nmatrix ruby gems. They will released upstream soon but please follow this procedure for now.
5
+ If you want to run the full rspec suite, you will need the latest unreleased nmatrix gem. They will released upstream soon but please follow this procedure for now.
6
6
 
7
- Keep in mind that either nmatrix OR gsl-nmatrix are NOT NECESSARY for using daru. They are just required for an optional speed up.
7
+ Keep in mind that either nmatrix or rb-gsl are NOT NECESSARY for using daru. They are just required for an optional speed up and for running the test suite.
8
8
 
9
9
  To install dependencies, execute the following commands:
10
10
 
@@ -16,7 +16,7 @@ To install dependencies, execute the following commands:
16
16
  `sudo apt-get install -y libgsl0-dev r-base r-base-dev`
17
17
  `sudo Rscript -e "install.packages(c('Rserve','irr'),,'http://cran.us.r-project.org')"`
18
18
 
19
- Then execute the .build.sh script to clone and install the latest nmatrix and gsl-nmatrix on your system:
19
+ Then execute the [.build.sh script](https://github.com/v0dro/daru/blob/master/.build.sh) to clone and install the latest nmatrix system:
20
20
 
21
21
  `./.build.sh`
22
22
 
@@ -29,3 +29,7 @@ And run the test suite (should be all green with pending tests):
29
29
  `bundle exec rspec`
30
30
 
31
31
  If you have problems installing nmatrix, please consult the [nmatrix installation wiki](https://github.com/SciRuby/nmatrix/wiki/Installation) or the [mailing list](https://groups.google.com/forum/#!forum/sciruby-dev).
32
+
33
+ ## Daru internals
34
+
35
+ To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
data/History.md CHANGED
@@ -1,3 +1,39 @@
1
+ # 0.1.1
2
+
3
+ * Enhancements
4
+ - Added a new class Daru::Offsets for providing a uniform API to jump between dates.
5
+ - Added benchmarking scripts
6
+ - Added a new Arel-like querying syntax for Vector and DataFrame. This will allow faster and more intuitive lookup of data than using loops such as filter.
7
+ - Vector
8
+ - #concat now compulsorily requires a second index argument.
9
+ - Added new method #index= to change the index directly.
10
+ - Added basic functions for rolling statistics - mean, std, count, etc.
11
+ - Added cumulative sum function.
12
+ - Added #keep_if.
13
+ - Added #count_values.
14
+ - Indexing
15
+ - Changed Index so that it now accepts all sorts of data (not restricted to only Symbols as it was previously).
16
+ - Re wrote MultiIndex in levels and labels form so that its faster and more accomodative of different kinds of index levels.
17
+ - Changed .new to return appropriate index object based on data passed.
18
+ - Added .from_tuple and .from_array methods to MultiIndex.
19
+ - Added union and intersection behaviour to Index and MultiIndex.
20
+ - Added a new index, DateTimeIndex for indexing with time-based data.
21
+ - Optimized range search for Index.
22
+ - DataFrame
23
+ - Removed the DataFrameByVector class and the #vector function. Now only
24
+ way to access a Vector in a DF is by using the #[] operator.
25
+ - Added new method #index= and #vectors= for changing row and column indexes directly.
26
+ - Optimized Vector value setting and retreival.
27
+ - Added inner, outer, left outer and right outer joins with the #join method.
28
+ - Added #set_index.
29
+ * Changes
30
+ - Removed the + operator overload from Index and replaced in with union.
31
+ - Removed the second 'values' argument from Daru::Index because it's redundant.
32
+ - Changed behaviour of Vector#reindex and DataFrame#reindex and #reindex_vectors to preserve indexing of original data when possible.
33
+ * Fixes
34
+ - Fixed DataFrame#delete_row and Vector#delete_if.
35
+ - Fixed Vector#rename.
36
+
1
37
  # 0.1.0
2
38
 
3
39
  * Fixes
data/README.md CHANGED
@@ -19,7 +19,8 @@ Written in pure Ruby so should work with all ruby implementations. Tested with M
19
19
  * Data structures:
20
20
  - Vector - A basic 1-D vector.
21
21
  - DataFrame - A 2-D spreadsheet-like structure for manipulating and storing data sets. This is daru's primary data structure.
22
- * Compatible with [IRuby notebook](https://github.com/SciRuby/iruby) and [statsample](https://github.com/SciRuby/statsample).
22
+ * Compatible with [IRuby notebook](https://github.com/SciRuby/iruby), [statsample](https://github.com/SciRuby/statsample), [statsample-glm](https://github.com/SciRuby/statsample-glm) and [statsample-timeseries](https://github.com/SciRuby/statsample-timeseries).
23
+ * Support for time series.
23
24
  * Singly and hierarchially indexed data structures.
24
25
  * Flexible and intuitive API for manipulation and analysis of data.
25
26
  * Easy plotting, statistics and arithmetic.
@@ -27,27 +28,41 @@ Written in pure Ruby so should work with all ruby implementations. Tested with M
27
28
  * Optional speed and space optimization on MRI with [NMatrix](https://github.com/SciRuby/nmatrix) and GSL.
28
29
  * Easy splitting, aggregation and grouping of data.
29
30
  * Quickly reducing data with pivot tables for quick data summary.
30
- * Import and exports dataset from and to Excel, CSV, Databases and plain text files.
31
+ * Import and export data from and to Excel, CSV, SQL Databases and plain text files.
31
32
 
32
33
  ## Notebooks
33
34
 
34
- ### Usage
35
+ #### Notebooks on most use cases
35
36
 
37
+ * [Overview of most daru functions](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Daru%20Demo.ipynb)
36
38
  * [Basic Creation of Vectors and DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Creation%20of%20Vector%20and%20DataFrame.ipynb)
37
39
  * [Detailed Usage of Daru::Vector](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20Vector.ipynb)
38
40
  * [Detailed Usage of Daru::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20DataFrame.ipynb)
39
41
  * [Visualizing Data With Daru::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Visualizing%20data%20with%20daru%20DataFrame.ipynb)
42
+ * [Searching and combining data in daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Searching%20and%20Combining%20Data.ipynb)
40
43
  * [Grouping, Splitting and Pivoting Data](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Grouping%2C%20Splitting%20and%20Pivoting.ipynb)
41
44
 
45
+ #### Notebooks on Time series
46
+
47
+ * [Basic Time Series](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Basic%20Time%20Series.ipynb)
48
+ * [Time Series Analysis and Plotting](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Time%20Series%20Functions.ipynb)
49
+
42
50
  ### Case Studies
43
51
 
44
52
  * [Logistic Regression Analysis with daru and statsample-glm](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Logistic%20Regression%20with%20daru%20and%20statsample-glm.ipynb)
45
53
  * [Finding and Plotting most heard artists from a Last.fm dataset](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Finding%20and%20plotting%20the%20most%20heard%20artists%20on%20last%20fm.ipynb)
54
+ * [Analyzing baby names with daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Analyzing%20baby%20names/Use%20Case%20-%20Daru%20for%20analyzing%20baby%20names%20data.ipynb)
46
55
 
47
56
  ## Blog Posts
48
57
 
49
58
  * [Data Analysis in RUby: Basic data manipulation and plotting](http://v0dro.github.io/blog/2014/11/25/data-analysis-in-ruby-basic-data-manipulation-and-plotting/)
50
59
  * [Data Analysis in RUby: Splitting, sorting, aggregating data and data types](http://v0dro.github.io/blog/2015/02/24/data-analysis-in-ruby-part-2/)
60
+ * [Finding and Combining data in daru](http://v0dro.github.io/blog/2015/08/03/finding-and-combining-data-in-daru/)
61
+
62
+ ### Time series
63
+
64
+ * [Analysis of Time Series in daru](http://v0dro.github.io/blog/2015/07/31/analysis-of-time-series-in-daru/)
65
+ * [Date Offsets in Daru](http://v0dro.github.io/blog/2015/07/27/date-offsets-in-daru/)
51
66
 
52
67
  ## Documentation
53
68
 
@@ -55,32 +70,25 @@ Docs can be found [here](https://rubygems.org/gems/daru).
55
70
 
56
71
  ## Roadmap
57
72
 
58
- * Automate testing for both MRI and JRuby.
59
73
  * Enable creation of DataFrame by only specifying an NMatrix/MDArray in initialize. Vector naming happens automatically (alphabetic) or is specified in an Array.
60
- * Completely test all functionality for MDArray.
61
74
  * Basic Data manipulation and analysis operations:
62
75
  - DF concat
63
- * Option to express a DataFrame as an NMatrix or MDArray so as to use more efficient storage techniques.
64
76
  * Assignment of a column to a single number should set the entire column to that number.
65
- * == between daru_vector and string/number.
66
77
  * Multiple column assignment with []=
67
78
  * Multiple value assignment for vectors with []=.
68
79
  * #find\_max function which will evaluate a block and return the row for the value of the block is max.
69
- * Function to check if a value of a row/vector is within a specified range.
70
- * Create a new vector in map_rows if any of the already present rows dont match the one assigned in the block.
71
80
  * Sort by index.
72
- * Statistics on DataFrame over rows and columns.
73
- * Cumulative sum.
74
- * Time series support.
81
+ * Statistics on DataFrame over rows.
75
82
  * Calculate percentage change.
76
83
  * Have some sample data sets for users to play around with. Should be able to load these from the code itself.
77
84
  * Sorting with missing data present.
78
- * re_index should re establish previous index values in the newly supplied index.
79
85
 
80
86
  ## Contributing
81
87
 
82
88
  Pick a feature from the Roadmap or the issue tracker or think of your own and send me a Pull Request!
83
89
 
90
+ For details see [CONTRIBUTING](https://github.com/v0dro/daru/blob/master/CONTRIBUTING.md).
91
+
84
92
  ## Acknowledgements
85
93
 
86
94
  * Google and the Ruby Science Foundation for the Google Summer of Code 2015 grant for further developing daru and integrating it with other ruby gems.
data/Rakefile CHANGED
@@ -1,6 +1,21 @@
1
1
  require 'rspec/core/rake_task'
2
2
  require 'bundler/gem_tasks'
3
3
 
4
+ lib_folder = File.expand_path("../lib", __FILE__)
4
5
  RSpec::Core::RakeTask.new(:spec)
5
6
 
6
- task :default => :spec
7
+ task :default => :spec
8
+
9
+ task :console do |task|
10
+ cmd = [ 'irb', "-r '#{lib_folder}/daru.rb'" ]
11
+ run *cmd
12
+ end
13
+
14
+ task :pry do |task|
15
+ cmd = [ 'pry', "-r '#{lib_folder}/daru.rb'" ]
16
+ run *cmd
17
+ end
18
+
19
+ def run *cmd
20
+ sh(cmd.join(" "))
21
+ end
@@ -0,0 +1,65 @@
1
+ "Group","Treatment","Replicate","RelativeFitness"
2
+ "BKB","Tube",1,0.869962555792838
3
+ "BKB","Tube",2,1.00036299125423
4
+ "BKB","Tube",3,0.982935090384188
5
+ "BAC","Tube",1,0.810391635206191
6
+ "BAC","Tube",2,0.795106571577928
7
+ "JDK","Tube",1,0.849203581734814
8
+ "JDK","Tube",2,0.917636977577209
9
+ "JDK","Tube",3,0.905323024195181
10
+ "ETH","Tube",1,0.930820887284085
11
+ "ETH","Tube",2,0.958183317853959
12
+ "ETH","Tube",3,0.91428823286218
13
+ "SWI","Tube",1,0.918513532826773
14
+ "SWI","Tube",2,0.909023164202865
15
+ "SWI","Tube",3,0.928404818223812
16
+ "PPP","Tube",1,0.990500457882474
17
+ "PPP","Tube",2,0.97156464183624
18
+ "PPP","Tube",3,0.948766905642272
19
+ "ECO","Tube",1,0.996870763277447
20
+ "ECO","Tube",2,0.976750424056972
21
+ "ECO","Tube",3,0.939479247244218
22
+ "DOS","Tube",1,0.9461642789306
23
+ "DOS","Tube",2,0.951056617451754
24
+ "DOS","Tube",3,0.939563175900147
25
+ "FIT","Tube",1,0.942215594296048
26
+ "FIT","Tube",2,0.926698027408171
27
+ "FIT","Tube",3,1.00031750759377
28
+ "HHE","Tube",1,0.944414164259624
29
+ "HHE","Tube",2,0.95194592074032
30
+ "HHE","Tube",3,0.922485751593779
31
+ "H2W","Tube",1,0.915304498822462
32
+ "H2W","Tube",2,0.853906227834699
33
+ "H2W","Tube",3,0.938698147417512
34
+ "BKB","Dish",1,1.15254427639376
35
+ "BKB","Dish",2,1.28708514998039
36
+ "BKB","Dish",3,1.50741545998468
37
+ "BAC","Dish",2,1.65223419104399
38
+ "BAC","Dish",3,1.61502230247434
39
+ "JDK","Dish",1,1.5102682623396
40
+ "JDK","Dish",2,1.58270078151532
41
+ "JDK","Dish",3,1.54715152852448
42
+ "ETH","Dish",1,1.61281555981988
43
+ "ETH","Dish",2,1.36445572541848
44
+ "ETH","Dish",3,1.47155233724852
45
+ "SWI","Dish",1,1.36255560722945
46
+ "SWI","Dish",2,1.51569228710501
47
+ "SWI","Dish",3,1.47714121664773
48
+ "PPP","Dish",1,1.50896917413568
49
+ "PPP","Dish",2,1.58138592886817
50
+ "PPP","Dish",3,1.55356839081014
51
+ "ECO","Dish",1,1.69927564777656
52
+ "ECO","Dish",2,1.45605668065038
53
+ "ECO","Dish",3,1.52825762511041
54
+ "DOS","Dish",1,1.59453120031454
55
+ "DOS","Dish",2,1.58132511409296
56
+ "DOS","Dish",3,1.58558683941181
57
+ "FIT","Dish",1,1.01468578343171
58
+ "FIT","Dish",2,0.955220815085047
59
+ "FIT","Dish",3,1.03597298511451
60
+ "HHE","Dish",1,1.49399422139513
61
+ "HHE","Dish",2,1.32980278545457
62
+ "HHE","Dish",3,1.4505220676174
63
+ "H2W","Dish",1,1.52460143511286
64
+ "H2W","Dish",2,1.53958247554139
65
+ "H2W","Dish",3,1.51149897493835
@@ -0,0 +1,39 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru'
5
+
6
+ Benchmark.bm do |x|
7
+ x.report("Create with Arrays and clone") do
8
+ df = Daru::DataFrame.new({
9
+ a: 100000.times.map { rand },
10
+ b: 100000.times.map { rand },
11
+ c: 100000.times.map { rand }
12
+ })
13
+ end
14
+
15
+ x.report("Create with Vectors and clone") do
16
+ df = Daru::DataFrame.new({
17
+ a: Daru::Vector.new(100000.times.map { rand }),
18
+ b: Daru::Vector.new(100000.times.map { rand }),
19
+ c: Daru::Vector.new(100000.times.map { rand })
20
+ })
21
+ end
22
+
23
+ x.report("Create with Vector and dont clone") do
24
+ df = Daru::DataFrame.new({
25
+ a: Daru::Vector.new(100000.times.map { rand }),
26
+ b: Daru::Vector.new(100000.times.map { rand }),
27
+ c: Daru::Vector.new(100000.times.map { rand })
28
+ }, clone: false)
29
+ end
30
+
31
+ x.report("Create by row from Arrays") do
32
+ end
33
+ end
34
+
35
+ # ===== Benchmarks =====
36
+ # user system total real
37
+ # Create with Arrays and clone 0.940000 0.010000 0.950000 ( 0.959851)
38
+ # Create with Vectors and clone 1.950000 0.020000 1.970000 ( 1.966835)
39
+ # Create with Vector and dont clone 1.170000 0.000000 1.170000 ( 1.177132)
@@ -0,0 +1,32 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru'
5
+
6
+ data = Daru::DataFrame.from_csv 'TradeoffData.csv'
7
+
8
+ Benchmark.bm do |x|
9
+ x.report("Single column grouping") do
10
+ @single = data.group_by([:Treatment])
11
+ end
12
+
13
+ x.report("Multi-column grouping") do
14
+ @multi = data.group_by([:Group, :Treatment])
15
+ end
16
+
17
+ x.report("Single mean") do
18
+ @single.mean
19
+ end
20
+
21
+ x.report("Multi mean") do
22
+ @multi.mean
23
+ end
24
+ end
25
+
26
+ # ===== Benchmarks =====
27
+ #
28
+ # user system total real
29
+ # Single column grouping 0.000000 0.000000 0.000000 (0.000356)
30
+ # Multi-column grouping 0.000000 0.000000 0.000000 (0.000958)
31
+ # Single mean 0.000000 0.000000 0.000000 (0.000865)
32
+ # Multi mean 0.000000 0.000000 0.000000 (0.002748)
@@ -0,0 +1,41 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru'
5
+
6
+ df = Daru::DataFrame.new({
7
+ a: 100000.times.map { rand },
8
+ b: 100000.times.map { rand },
9
+ c: 100000.times.map { rand }
10
+ })
11
+
12
+ Benchmark.bm do |x|
13
+ x.report("Access single row") do
14
+ df.row[50]
15
+ end
16
+
17
+ x.report("Access rows by comma") do
18
+ df.row[*(5..40000).to_a.shuffle]
19
+ end
20
+
21
+ x.report("Individual rows") do
22
+ rows = []
23
+ index = (5..40000).to_a.shuffle
24
+ index.each do |a|
25
+ rows << df.row[a].to_a
26
+ end
27
+
28
+ Daru::DataFrame.rows(rows, order: [:a,:b,:c], index: index)
29
+ end
30
+
31
+ x.report("Access rows by range") do
32
+ df.row[5..40000]
33
+ end
34
+ end
35
+
36
+ # ==== Benchmarks ====
37
+ # user system total real
38
+ # Access single row 0.000000 0.000000 0.000000 ( 0.000059)
39
+ # Access rows by comma 1.410000 0.010000 1.420000 ( 1.420426)
40
+ # Individual rows 1.480000 0.000000 1.480000 ( 1.488531)
41
+ # Access rows by range 1.440000 0.010000 1.450000 ( 1.436750)
@@ -0,0 +1,36 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru'
5
+
6
+ df = Daru::DataFrame.new({
7
+ a: 100000.times.map { rand },
8
+ b: 100000.times.map { rand },
9
+ c: 100000.times.map { rand }
10
+ })
11
+
12
+ Benchmark.bm do |x|
13
+ x.report("Set a single row with Array") do
14
+ df.row[5] = [55,22,65]
15
+ end
16
+
17
+ x.report("Set a single row with Daru::Vector") do
18
+ df.row[3456] = Daru::Vector.new([3,54,11], index: [:b,:e,:a])
19
+ end
20
+
21
+ x.report("Create a new row with Array") do
22
+ df.row[100001] = [34,66,11]
23
+ end
24
+
25
+ x.report("Create a new row with Daru::Vector") do
26
+ df.row[100005] = Daru::Vector.new([34,66,11], index: [:a,:b,:t])
27
+ end
28
+ end
29
+
30
+ # ==== Benchmarks ====
31
+ #
32
+ # user system total real
33
+ # Set a single row with Array 0.600000 0.000000 0.600000 ( 0.604718)
34
+ # Set a single row with Daru::Vector 0.600000 0.000000 0.600000 ( 0.598599)
35
+ # Create a new row with Array 0.840000 0.010000 0.850000 ( 0.858349)
36
+ # Create a new row with Daru::Vector 0.950000 0.000000 0.950000 ( 0.950725)
@@ -0,0 +1,44 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru'
5
+
6
+ vector = Daru::Vector.new(10000.times.map.to_a.shuffle)
7
+ df = Daru::DataFrame.new({
8
+ a: vector,
9
+ b: vector,
10
+ c: vector
11
+ })
12
+ Benchmark.bm do |x|
13
+ x.report("Sort a Vector without any args") do
14
+ vector.sort
15
+ end
16
+
17
+ x.report("Sort vector in descending order with custom <=> operator") do
18
+ vector.sort(ascending: false) { |a,b| a.to_s <=> b.to_s }
19
+ end
20
+
21
+ x.report("Sort single column of DataFrame") do
22
+ df.sort([:a])
23
+ end
24
+
25
+ x.report("Sort two columns of DataFrame") do
26
+ df.sort([:c,:a])
27
+ end
28
+
29
+ x.report("Sort two columns with custom operators in different orders of DataFrame") do
30
+ df.sort([:c,:a], ascending: [true, false],
31
+ by: { c: lambda { |a,b| a.to_s <=> b.to_s },
32
+ a: lambda { |a,b| (a+1) <=> (b+1) } })
33
+ end
34
+ end
35
+
36
+ # FIXME: MASSIVE SPEEDUP NECESSARY!
37
+
38
+ # ===== Benchamarks =====
39
+ # user system total real
40
+ # Sort a Vector without any args 0.130000 0.000000 0.130000 ( 0.128006)
41
+ # Sort vector in descending order with custom <=> operator 0.190000 0.000000 0.190000 ( 0.184604)
42
+ # Sort single column of DataFrame 2502.450000 0.000000 2502.450000 (2503.808073)
43
+ # Sort two columns of DataFrame 0.540000 0.000000 0.540000 ( 0.537670)
44
+ # Sort two columns with custom operators in different orders of DataFrame 2084.160000 7.260000 2091.420000 (2092.716603)