daru 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +6 -6
- data/.gitignore +2 -0
- data/CONTRIBUTING.md +7 -3
- data/History.md +36 -0
- data/README.md +21 -13
- data/Rakefile +16 -1
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +44 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru.gemspec +29 -5
- data/lib/daru.rb +30 -1
- data/lib/daru/accessors/array_wrapper.rb +2 -2
- data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
- data/lib/daru/core/group_by.rb +112 -31
- data/lib/daru/core/merge.rb +170 -0
- data/lib/daru/core/query.rb +95 -0
- data/lib/daru/dataframe.rb +335 -223
- data/lib/daru/date_time/index.rb +550 -0
- data/lib/daru/date_time/offsets.rb +397 -0
- data/lib/daru/index.rb +266 -54
- data/lib/daru/io/io.rb +1 -2
- data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
- data/lib/daru/maths/arithmetic/vector.rb +2 -2
- data/lib/daru/maths/statistics/dataframe.rb +58 -8
- data/lib/daru/maths/statistics/vector.rb +229 -0
- data/lib/daru/vector.rb +230 -80
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +16 -16
- data/spec/core/merge_spec.rb +52 -0
- data/spec/core/query_spec.rb +171 -0
- data/spec/dataframe_spec.rb +278 -280
- data/spec/date_time/data_spec.rb +199 -0
- data/spec/date_time/index_spec.rb +433 -0
- data/spec/date_time/offsets_spec.rb +371 -0
- data/spec/fixtures/stock_data.csv +500 -0
- data/spec/index_spec.rb +317 -11
- data/spec/io/io_spec.rb +18 -17
- data/spec/math/arithmetic/dataframe_spec.rb +3 -3
- data/spec/math/statistics/dataframe_spec.rb +39 -1
- data/spec/math/statistics/vector_spec.rb +163 -1
- data/spec/monkeys_spec.rb +4 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/vector_spec.rb +125 -60
- metadata +71 -14
- data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
- data/lib/daru/multi_index.rb +0 -216
- data/spec/multi_index_spec.rb +0 -216
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91fd17361715d81a4eda319e4695bc53a44979de
|
4
|
+
data.tar.gz: 8768b7c62619d4f8446e8167a95990ee59553dde
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bd7d40bb2e1b7ed2f4ea5a598556e8a58f5bf35c2fcbe895150a21752b4ca223ca184b6c7c6e1c1c30bf753df478578beec1d99aff2c31d4d7cfe2520013c47d
|
7
|
+
data.tar.gz: 264425a5bcd87e2eca1261792d3adf16a497ac20ec1c9c04743cafc84e9f4335525a2c289d36be63b9f88d8fcf19f84cc70824b6ada29ce3105872bc20231d18
|
data/.build.sh
CHANGED
@@ -6,9 +6,9 @@ gem build nmatrix.gemspec
|
|
6
6
|
gem install nmatrix-0.1.0.gem
|
7
7
|
cd ..
|
8
8
|
rm -rf nmatrix
|
9
|
-
git clone https://github.com/v0dro/gsl-nmatrix
|
10
|
-
cd gsl-nmatrix
|
11
|
-
gem build gsl-nmatrix.gemspec
|
12
|
-
gem install gsl-nmatrix-1.17.gem
|
13
|
-
cd ..
|
14
|
-
rm -rf gsl-nmatrix
|
9
|
+
# git clone https://github.com/v0dro/gsl-nmatrix
|
10
|
+
# cd gsl-nmatrix
|
11
|
+
# gem build gsl-nmatrix.gemspec
|
12
|
+
# gem install gsl-nmatrix-1.17.gem
|
13
|
+
# cd ..
|
14
|
+
# rm -rf gsl-nmatrix
|
data/.gitignore
CHANGED
data/CONTRIBUTING.md
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
|
3
3
|
## Installing daru development dependencies
|
4
4
|
|
5
|
-
If you want to run the full rspec suite, you will need the latest unreleased nmatrix
|
5
|
+
If you want to run the full rspec suite, you will need the latest unreleased nmatrix gem. They will released upstream soon but please follow this procedure for now.
|
6
6
|
|
7
|
-
Keep in mind that either nmatrix
|
7
|
+
Keep in mind that either nmatrix or rb-gsl are NOT NECESSARY for using daru. They are just required for an optional speed up and for running the test suite.
|
8
8
|
|
9
9
|
To install dependencies, execute the following commands:
|
10
10
|
|
@@ -16,7 +16,7 @@ To install dependencies, execute the following commands:
|
|
16
16
|
`sudo apt-get install -y libgsl0-dev r-base r-base-dev`
|
17
17
|
`sudo Rscript -e "install.packages(c('Rserve','irr'),,'http://cran.us.r-project.org')"`
|
18
18
|
|
19
|
-
Then execute the .build.sh script to clone and install the latest nmatrix
|
19
|
+
Then execute the [.build.sh script](https://github.com/v0dro/daru/blob/master/.build.sh) to clone and install the latest nmatrix system:
|
20
20
|
|
21
21
|
`./.build.sh`
|
22
22
|
|
@@ -29,3 +29,7 @@ And run the test suite (should be all green with pending tests):
|
|
29
29
|
`bundle exec rspec`
|
30
30
|
|
31
31
|
If you have problems installing nmatrix, please consult the [nmatrix installation wiki](https://github.com/SciRuby/nmatrix/wiki/Installation) or the [mailing list](https://groups.google.com/forum/#!forum/sciruby-dev).
|
32
|
+
|
33
|
+
## Daru internals
|
34
|
+
|
35
|
+
To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
|
data/History.md
CHANGED
@@ -1,3 +1,39 @@
|
|
1
|
+
# 0.1.1
|
2
|
+
|
3
|
+
* Enhancements
|
4
|
+
- Added a new class Daru::Offsets for providing a uniform API to jump between dates.
|
5
|
+
- Added benchmarking scripts
|
6
|
+
- Added a new Arel-like querying syntax for Vector and DataFrame. This will allow faster and more intuitive lookup of data than using loops such as filter.
|
7
|
+
- Vector
|
8
|
+
- #concat now compulsorily requires a second index argument.
|
9
|
+
- Added new method #index= to change the index directly.
|
10
|
+
- Added basic functions for rolling statistics - mean, std, count, etc.
|
11
|
+
- Added cumulative sum function.
|
12
|
+
- Added #keep_if.
|
13
|
+
- Added #count_values.
|
14
|
+
- Indexing
|
15
|
+
- Changed Index so that it now accepts all sorts of data (not restricted to only Symbols as it was previously).
|
16
|
+
- Re wrote MultiIndex in levels and labels form so that its faster and more accomodative of different kinds of index levels.
|
17
|
+
- Changed .new to return appropriate index object based on data passed.
|
18
|
+
- Added .from_tuple and .from_array methods to MultiIndex.
|
19
|
+
- Added union and intersection behaviour to Index and MultiIndex.
|
20
|
+
- Added a new index, DateTimeIndex for indexing with time-based data.
|
21
|
+
- Optimized range search for Index.
|
22
|
+
- DataFrame
|
23
|
+
- Removed the DataFrameByVector class and the #vector function. Now only
|
24
|
+
way to access a Vector in a DF is by using the #[] operator.
|
25
|
+
- Added new method #index= and #vectors= for changing row and column indexes directly.
|
26
|
+
- Optimized Vector value setting and retreival.
|
27
|
+
- Added inner, outer, left outer and right outer joins with the #join method.
|
28
|
+
- Added #set_index.
|
29
|
+
* Changes
|
30
|
+
- Removed the + operator overload from Index and replaced in with union.
|
31
|
+
- Removed the second 'values' argument from Daru::Index because it's redundant.
|
32
|
+
- Changed behaviour of Vector#reindex and DataFrame#reindex and #reindex_vectors to preserve indexing of original data when possible.
|
33
|
+
* Fixes
|
34
|
+
- Fixed DataFrame#delete_row and Vector#delete_if.
|
35
|
+
- Fixed Vector#rename.
|
36
|
+
|
1
37
|
# 0.1.0
|
2
38
|
|
3
39
|
* Fixes
|
data/README.md
CHANGED
@@ -19,7 +19,8 @@ Written in pure Ruby so should work with all ruby implementations. Tested with M
|
|
19
19
|
* Data structures:
|
20
20
|
- Vector - A basic 1-D vector.
|
21
21
|
- DataFrame - A 2-D spreadsheet-like structure for manipulating and storing data sets. This is daru's primary data structure.
|
22
|
-
* Compatible with [IRuby notebook](https://github.com/SciRuby/iruby) and [statsample](https://github.com/SciRuby/statsample).
|
22
|
+
* Compatible with [IRuby notebook](https://github.com/SciRuby/iruby), [statsample](https://github.com/SciRuby/statsample), [statsample-glm](https://github.com/SciRuby/statsample-glm) and [statsample-timeseries](https://github.com/SciRuby/statsample-timeseries).
|
23
|
+
* Support for time series.
|
23
24
|
* Singly and hierarchially indexed data structures.
|
24
25
|
* Flexible and intuitive API for manipulation and analysis of data.
|
25
26
|
* Easy plotting, statistics and arithmetic.
|
@@ -27,27 +28,41 @@ Written in pure Ruby so should work with all ruby implementations. Tested with M
|
|
27
28
|
* Optional speed and space optimization on MRI with [NMatrix](https://github.com/SciRuby/nmatrix) and GSL.
|
28
29
|
* Easy splitting, aggregation and grouping of data.
|
29
30
|
* Quickly reducing data with pivot tables for quick data summary.
|
30
|
-
* Import and
|
31
|
+
* Import and export data from and to Excel, CSV, SQL Databases and plain text files.
|
31
32
|
|
32
33
|
## Notebooks
|
33
34
|
|
34
|
-
|
35
|
+
#### Notebooks on most use cases
|
35
36
|
|
37
|
+
* [Overview of most daru functions](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Daru%20Demo.ipynb)
|
36
38
|
* [Basic Creation of Vectors and DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Creation%20of%20Vector%20and%20DataFrame.ipynb)
|
37
39
|
* [Detailed Usage of Daru::Vector](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20Vector.ipynb)
|
38
40
|
* [Detailed Usage of Daru::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20DataFrame.ipynb)
|
39
41
|
* [Visualizing Data With Daru::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Visualizing%20data%20with%20daru%20DataFrame.ipynb)
|
42
|
+
* [Searching and combining data in daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Searching%20and%20Combining%20Data.ipynb)
|
40
43
|
* [Grouping, Splitting and Pivoting Data](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Grouping%2C%20Splitting%20and%20Pivoting.ipynb)
|
41
44
|
|
45
|
+
#### Notebooks on Time series
|
46
|
+
|
47
|
+
* [Basic Time Series](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Basic%20Time%20Series.ipynb)
|
48
|
+
* [Time Series Analysis and Plotting](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Time%20Series%20Functions.ipynb)
|
49
|
+
|
42
50
|
### Case Studies
|
43
51
|
|
44
52
|
* [Logistic Regression Analysis with daru and statsample-glm](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Logistic%20Regression%20with%20daru%20and%20statsample-glm.ipynb)
|
45
53
|
* [Finding and Plotting most heard artists from a Last.fm dataset](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Finding%20and%20plotting%20the%20most%20heard%20artists%20on%20last%20fm.ipynb)
|
54
|
+
* [Analyzing baby names with daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Analyzing%20baby%20names/Use%20Case%20-%20Daru%20for%20analyzing%20baby%20names%20data.ipynb)
|
46
55
|
|
47
56
|
## Blog Posts
|
48
57
|
|
49
58
|
* [Data Analysis in RUby: Basic data manipulation and plotting](http://v0dro.github.io/blog/2014/11/25/data-analysis-in-ruby-basic-data-manipulation-and-plotting/)
|
50
59
|
* [Data Analysis in RUby: Splitting, sorting, aggregating data and data types](http://v0dro.github.io/blog/2015/02/24/data-analysis-in-ruby-part-2/)
|
60
|
+
* [Finding and Combining data in daru](http://v0dro.github.io/blog/2015/08/03/finding-and-combining-data-in-daru/)
|
61
|
+
|
62
|
+
### Time series
|
63
|
+
|
64
|
+
* [Analysis of Time Series in daru](http://v0dro.github.io/blog/2015/07/31/analysis-of-time-series-in-daru/)
|
65
|
+
* [Date Offsets in Daru](http://v0dro.github.io/blog/2015/07/27/date-offsets-in-daru/)
|
51
66
|
|
52
67
|
## Documentation
|
53
68
|
|
@@ -55,32 +70,25 @@ Docs can be found [here](https://rubygems.org/gems/daru).
|
|
55
70
|
|
56
71
|
## Roadmap
|
57
72
|
|
58
|
-
* Automate testing for both MRI and JRuby.
|
59
73
|
* Enable creation of DataFrame by only specifying an NMatrix/MDArray in initialize. Vector naming happens automatically (alphabetic) or is specified in an Array.
|
60
|
-
* Completely test all functionality for MDArray.
|
61
74
|
* Basic Data manipulation and analysis operations:
|
62
75
|
- DF concat
|
63
|
-
* Option to express a DataFrame as an NMatrix or MDArray so as to use more efficient storage techniques.
|
64
76
|
* Assignment of a column to a single number should set the entire column to that number.
|
65
|
-
* == between daru_vector and string/number.
|
66
77
|
* Multiple column assignment with []=
|
67
78
|
* Multiple value assignment for vectors with []=.
|
68
79
|
* #find\_max function which will evaluate a block and return the row for the value of the block is max.
|
69
|
-
* Function to check if a value of a row/vector is within a specified range.
|
70
|
-
* Create a new vector in map_rows if any of the already present rows dont match the one assigned in the block.
|
71
80
|
* Sort by index.
|
72
|
-
* Statistics on DataFrame over rows
|
73
|
-
* Cumulative sum.
|
74
|
-
* Time series support.
|
81
|
+
* Statistics on DataFrame over rows.
|
75
82
|
* Calculate percentage change.
|
76
83
|
* Have some sample data sets for users to play around with. Should be able to load these from the code itself.
|
77
84
|
* Sorting with missing data present.
|
78
|
-
* re_index should re establish previous index values in the newly supplied index.
|
79
85
|
|
80
86
|
## Contributing
|
81
87
|
|
82
88
|
Pick a feature from the Roadmap or the issue tracker or think of your own and send me a Pull Request!
|
83
89
|
|
90
|
+
For details see [CONTRIBUTING](https://github.com/v0dro/daru/blob/master/CONTRIBUTING.md).
|
91
|
+
|
84
92
|
## Acknowledgements
|
85
93
|
|
86
94
|
* Google and the Ruby Science Foundation for the Google Summer of Code 2015 grant for further developing daru and integrating it with other ruby gems.
|
data/Rakefile
CHANGED
@@ -1,6 +1,21 @@
|
|
1
1
|
require 'rspec/core/rake_task'
|
2
2
|
require 'bundler/gem_tasks'
|
3
3
|
|
4
|
+
lib_folder = File.expand_path("../lib", __FILE__)
|
4
5
|
RSpec::Core::RakeTask.new(:spec)
|
5
6
|
|
6
|
-
task :default => :spec
|
7
|
+
task :default => :spec
|
8
|
+
|
9
|
+
task :console do |task|
|
10
|
+
cmd = [ 'irb', "-r '#{lib_folder}/daru.rb'" ]
|
11
|
+
run *cmd
|
12
|
+
end
|
13
|
+
|
14
|
+
task :pry do |task|
|
15
|
+
cmd = [ 'pry', "-r '#{lib_folder}/daru.rb'" ]
|
16
|
+
run *cmd
|
17
|
+
end
|
18
|
+
|
19
|
+
def run *cmd
|
20
|
+
sh(cmd.join(" "))
|
21
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
"Group","Treatment","Replicate","RelativeFitness"
|
2
|
+
"BKB","Tube",1,0.869962555792838
|
3
|
+
"BKB","Tube",2,1.00036299125423
|
4
|
+
"BKB","Tube",3,0.982935090384188
|
5
|
+
"BAC","Tube",1,0.810391635206191
|
6
|
+
"BAC","Tube",2,0.795106571577928
|
7
|
+
"JDK","Tube",1,0.849203581734814
|
8
|
+
"JDK","Tube",2,0.917636977577209
|
9
|
+
"JDK","Tube",3,0.905323024195181
|
10
|
+
"ETH","Tube",1,0.930820887284085
|
11
|
+
"ETH","Tube",2,0.958183317853959
|
12
|
+
"ETH","Tube",3,0.91428823286218
|
13
|
+
"SWI","Tube",1,0.918513532826773
|
14
|
+
"SWI","Tube",2,0.909023164202865
|
15
|
+
"SWI","Tube",3,0.928404818223812
|
16
|
+
"PPP","Tube",1,0.990500457882474
|
17
|
+
"PPP","Tube",2,0.97156464183624
|
18
|
+
"PPP","Tube",3,0.948766905642272
|
19
|
+
"ECO","Tube",1,0.996870763277447
|
20
|
+
"ECO","Tube",2,0.976750424056972
|
21
|
+
"ECO","Tube",3,0.939479247244218
|
22
|
+
"DOS","Tube",1,0.9461642789306
|
23
|
+
"DOS","Tube",2,0.951056617451754
|
24
|
+
"DOS","Tube",3,0.939563175900147
|
25
|
+
"FIT","Tube",1,0.942215594296048
|
26
|
+
"FIT","Tube",2,0.926698027408171
|
27
|
+
"FIT","Tube",3,1.00031750759377
|
28
|
+
"HHE","Tube",1,0.944414164259624
|
29
|
+
"HHE","Tube",2,0.95194592074032
|
30
|
+
"HHE","Tube",3,0.922485751593779
|
31
|
+
"H2W","Tube",1,0.915304498822462
|
32
|
+
"H2W","Tube",2,0.853906227834699
|
33
|
+
"H2W","Tube",3,0.938698147417512
|
34
|
+
"BKB","Dish",1,1.15254427639376
|
35
|
+
"BKB","Dish",2,1.28708514998039
|
36
|
+
"BKB","Dish",3,1.50741545998468
|
37
|
+
"BAC","Dish",2,1.65223419104399
|
38
|
+
"BAC","Dish",3,1.61502230247434
|
39
|
+
"JDK","Dish",1,1.5102682623396
|
40
|
+
"JDK","Dish",2,1.58270078151532
|
41
|
+
"JDK","Dish",3,1.54715152852448
|
42
|
+
"ETH","Dish",1,1.61281555981988
|
43
|
+
"ETH","Dish",2,1.36445572541848
|
44
|
+
"ETH","Dish",3,1.47155233724852
|
45
|
+
"SWI","Dish",1,1.36255560722945
|
46
|
+
"SWI","Dish",2,1.51569228710501
|
47
|
+
"SWI","Dish",3,1.47714121664773
|
48
|
+
"PPP","Dish",1,1.50896917413568
|
49
|
+
"PPP","Dish",2,1.58138592886817
|
50
|
+
"PPP","Dish",3,1.55356839081014
|
51
|
+
"ECO","Dish",1,1.69927564777656
|
52
|
+
"ECO","Dish",2,1.45605668065038
|
53
|
+
"ECO","Dish",3,1.52825762511041
|
54
|
+
"DOS","Dish",1,1.59453120031454
|
55
|
+
"DOS","Dish",2,1.58132511409296
|
56
|
+
"DOS","Dish",3,1.58558683941181
|
57
|
+
"FIT","Dish",1,1.01468578343171
|
58
|
+
"FIT","Dish",2,0.955220815085047
|
59
|
+
"FIT","Dish",3,1.03597298511451
|
60
|
+
"HHE","Dish",1,1.49399422139513
|
61
|
+
"HHE","Dish",2,1.32980278545457
|
62
|
+
"HHE","Dish",3,1.4505220676174
|
63
|
+
"H2W","Dish",1,1.52460143511286
|
64
|
+
"H2W","Dish",2,1.53958247554139
|
65
|
+
"H2W","Dish",3,1.51149897493835
|
@@ -0,0 +1,39 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
Benchmark.bm do |x|
|
7
|
+
x.report("Create with Arrays and clone") do
|
8
|
+
df = Daru::DataFrame.new({
|
9
|
+
a: 100000.times.map { rand },
|
10
|
+
b: 100000.times.map { rand },
|
11
|
+
c: 100000.times.map { rand }
|
12
|
+
})
|
13
|
+
end
|
14
|
+
|
15
|
+
x.report("Create with Vectors and clone") do
|
16
|
+
df = Daru::DataFrame.new({
|
17
|
+
a: Daru::Vector.new(100000.times.map { rand }),
|
18
|
+
b: Daru::Vector.new(100000.times.map { rand }),
|
19
|
+
c: Daru::Vector.new(100000.times.map { rand })
|
20
|
+
})
|
21
|
+
end
|
22
|
+
|
23
|
+
x.report("Create with Vector and dont clone") do
|
24
|
+
df = Daru::DataFrame.new({
|
25
|
+
a: Daru::Vector.new(100000.times.map { rand }),
|
26
|
+
b: Daru::Vector.new(100000.times.map { rand }),
|
27
|
+
c: Daru::Vector.new(100000.times.map { rand })
|
28
|
+
}, clone: false)
|
29
|
+
end
|
30
|
+
|
31
|
+
x.report("Create by row from Arrays") do
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# ===== Benchmarks =====
|
36
|
+
# user system total real
|
37
|
+
# Create with Arrays and clone 0.940000 0.010000 0.950000 ( 0.959851)
|
38
|
+
# Create with Vectors and clone 1.950000 0.020000 1.970000 ( 1.966835)
|
39
|
+
# Create with Vector and dont clone 1.170000 0.000000 1.170000 ( 1.177132)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
data = Daru::DataFrame.from_csv 'TradeoffData.csv'
|
7
|
+
|
8
|
+
Benchmark.bm do |x|
|
9
|
+
x.report("Single column grouping") do
|
10
|
+
@single = data.group_by([:Treatment])
|
11
|
+
end
|
12
|
+
|
13
|
+
x.report("Multi-column grouping") do
|
14
|
+
@multi = data.group_by([:Group, :Treatment])
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Single mean") do
|
18
|
+
@single.mean
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Multi mean") do
|
22
|
+
@multi.mean
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# ===== Benchmarks =====
|
27
|
+
#
|
28
|
+
# user system total real
|
29
|
+
# Single column grouping 0.000000 0.000000 0.000000 (0.000356)
|
30
|
+
# Multi-column grouping 0.000000 0.000000 0.000000 (0.000958)
|
31
|
+
# Single mean 0.000000 0.000000 0.000000 (0.000865)
|
32
|
+
# Multi mean 0.000000 0.000000 0.000000 (0.002748)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: 100000.times.map { rand },
|
8
|
+
b: 100000.times.map { rand },
|
9
|
+
c: 100000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Access single row") do
|
14
|
+
df.row[50]
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Access rows by comma") do
|
18
|
+
df.row[*(5..40000).to_a.shuffle]
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Individual rows") do
|
22
|
+
rows = []
|
23
|
+
index = (5..40000).to_a.shuffle
|
24
|
+
index.each do |a|
|
25
|
+
rows << df.row[a].to_a
|
26
|
+
end
|
27
|
+
|
28
|
+
Daru::DataFrame.rows(rows, order: [:a,:b,:c], index: index)
|
29
|
+
end
|
30
|
+
|
31
|
+
x.report("Access rows by range") do
|
32
|
+
df.row[5..40000]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# ==== Benchmarks ====
|
37
|
+
# user system total real
|
38
|
+
# Access single row 0.000000 0.000000 0.000000 ( 0.000059)
|
39
|
+
# Access rows by comma 1.410000 0.010000 1.420000 ( 1.420426)
|
40
|
+
# Individual rows 1.480000 0.000000 1.480000 ( 1.488531)
|
41
|
+
# Access rows by range 1.440000 0.010000 1.450000 ( 1.436750)
|
@@ -0,0 +1,36 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: 100000.times.map { rand },
|
8
|
+
b: 100000.times.map { rand },
|
9
|
+
c: 100000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Set a single row with Array") do
|
14
|
+
df.row[5] = [55,22,65]
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Set a single row with Daru::Vector") do
|
18
|
+
df.row[3456] = Daru::Vector.new([3,54,11], index: [:b,:e,:a])
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Create a new row with Array") do
|
22
|
+
df.row[100001] = [34,66,11]
|
23
|
+
end
|
24
|
+
|
25
|
+
x.report("Create a new row with Daru::Vector") do
|
26
|
+
df.row[100005] = Daru::Vector.new([34,66,11], index: [:a,:b,:t])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# ==== Benchmarks ====
|
31
|
+
#
|
32
|
+
# user system total real
|
33
|
+
# Set a single row with Array 0.600000 0.000000 0.600000 ( 0.604718)
|
34
|
+
# Set a single row with Daru::Vector 0.600000 0.000000 0.600000 ( 0.598599)
|
35
|
+
# Create a new row with Array 0.840000 0.010000 0.850000 ( 0.858349)
|
36
|
+
# Create a new row with Daru::Vector 0.950000 0.000000 0.950000 ( 0.950725)
|
@@ -0,0 +1,44 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
vector = Daru::Vector.new(10000.times.map.to_a.shuffle)
|
7
|
+
df = Daru::DataFrame.new({
|
8
|
+
a: vector,
|
9
|
+
b: vector,
|
10
|
+
c: vector
|
11
|
+
})
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Sort a Vector without any args") do
|
14
|
+
vector.sort
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Sort vector in descending order with custom <=> operator") do
|
18
|
+
vector.sort(ascending: false) { |a,b| a.to_s <=> b.to_s }
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Sort single column of DataFrame") do
|
22
|
+
df.sort([:a])
|
23
|
+
end
|
24
|
+
|
25
|
+
x.report("Sort two columns of DataFrame") do
|
26
|
+
df.sort([:c,:a])
|
27
|
+
end
|
28
|
+
|
29
|
+
x.report("Sort two columns with custom operators in different orders of DataFrame") do
|
30
|
+
df.sort([:c,:a], ascending: [true, false],
|
31
|
+
by: { c: lambda { |a,b| a.to_s <=> b.to_s },
|
32
|
+
a: lambda { |a,b| (a+1) <=> (b+1) } })
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# FIXME: MASSIVE SPEEDUP NECESSARY!
|
37
|
+
|
38
|
+
# ===== Benchamarks =====
|
39
|
+
# user system total real
|
40
|
+
# Sort a Vector without any args 0.130000 0.000000 0.130000 ( 0.128006)
|
41
|
+
# Sort vector in descending order with custom <=> operator 0.190000 0.000000 0.190000 ( 0.184604)
|
42
|
+
# Sort single column of DataFrame 2502.450000 0.000000 2502.450000 (2503.808073)
|
43
|
+
# Sort two columns of DataFrame 0.540000 0.000000 0.540000 ( 0.537670)
|
44
|
+
# Sort two columns with custom operators in different orders of DataFrame 2084.160000 7.260000 2091.420000 (2092.716603)
|