daru 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +6 -6
- data/.gitignore +2 -0
- data/CONTRIBUTING.md +7 -3
- data/History.md +36 -0
- data/README.md +21 -13
- data/Rakefile +16 -1
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +44 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru.gemspec +29 -5
- data/lib/daru.rb +30 -1
- data/lib/daru/accessors/array_wrapper.rb +2 -2
- data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
- data/lib/daru/core/group_by.rb +112 -31
- data/lib/daru/core/merge.rb +170 -0
- data/lib/daru/core/query.rb +95 -0
- data/lib/daru/dataframe.rb +335 -223
- data/lib/daru/date_time/index.rb +550 -0
- data/lib/daru/date_time/offsets.rb +397 -0
- data/lib/daru/index.rb +266 -54
- data/lib/daru/io/io.rb +1 -2
- data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
- data/lib/daru/maths/arithmetic/vector.rb +2 -2
- data/lib/daru/maths/statistics/dataframe.rb +58 -8
- data/lib/daru/maths/statistics/vector.rb +229 -0
- data/lib/daru/vector.rb +230 -80
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +16 -16
- data/spec/core/merge_spec.rb +52 -0
- data/spec/core/query_spec.rb +171 -0
- data/spec/dataframe_spec.rb +278 -280
- data/spec/date_time/data_spec.rb +199 -0
- data/spec/date_time/index_spec.rb +433 -0
- data/spec/date_time/offsets_spec.rb +371 -0
- data/spec/fixtures/stock_data.csv +500 -0
- data/spec/index_spec.rb +317 -11
- data/spec/io/io_spec.rb +18 -17
- data/spec/math/arithmetic/dataframe_spec.rb +3 -3
- data/spec/math/statistics/dataframe_spec.rb +39 -1
- data/spec/math/statistics/vector_spec.rb +163 -1
- data/spec/monkeys_spec.rb +4 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/vector_spec.rb +125 -60
- metadata +71 -14
- data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
- data/lib/daru/multi_index.rb +0 -216
- data/spec/multi_index_spec.rb +0 -216
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91fd17361715d81a4eda319e4695bc53a44979de
|
4
|
+
data.tar.gz: 8768b7c62619d4f8446e8167a95990ee59553dde
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bd7d40bb2e1b7ed2f4ea5a598556e8a58f5bf35c2fcbe895150a21752b4ca223ca184b6c7c6e1c1c30bf753df478578beec1d99aff2c31d4d7cfe2520013c47d
|
7
|
+
data.tar.gz: 264425a5bcd87e2eca1261792d3adf16a497ac20ec1c9c04743cafc84e9f4335525a2c289d36be63b9f88d8fcf19f84cc70824b6ada29ce3105872bc20231d18
|
data/.build.sh
CHANGED
@@ -6,9 +6,9 @@ gem build nmatrix.gemspec
|
|
6
6
|
gem install nmatrix-0.1.0.gem
|
7
7
|
cd ..
|
8
8
|
rm -rf nmatrix
|
9
|
-
git clone https://github.com/v0dro/gsl-nmatrix
|
10
|
-
cd gsl-nmatrix
|
11
|
-
gem build gsl-nmatrix.gemspec
|
12
|
-
gem install gsl-nmatrix-1.17.gem
|
13
|
-
cd ..
|
14
|
-
rm -rf gsl-nmatrix
|
9
|
+
# git clone https://github.com/v0dro/gsl-nmatrix
|
10
|
+
# cd gsl-nmatrix
|
11
|
+
# gem build gsl-nmatrix.gemspec
|
12
|
+
# gem install gsl-nmatrix-1.17.gem
|
13
|
+
# cd ..
|
14
|
+
# rm -rf gsl-nmatrix
|
data/.gitignore
CHANGED
data/CONTRIBUTING.md
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
|
3
3
|
## Installing daru development dependencies
|
4
4
|
|
5
|
-
If you want to run the full rspec suite, you will need the latest unreleased nmatrix
|
5
|
+
If you want to run the full rspec suite, you will need the latest unreleased nmatrix gem. They will released upstream soon but please follow this procedure for now.
|
6
6
|
|
7
|
-
Keep in mind that either nmatrix
|
7
|
+
Keep in mind that either nmatrix or rb-gsl are NOT NECESSARY for using daru. They are just required for an optional speed up and for running the test suite.
|
8
8
|
|
9
9
|
To install dependencies, execute the following commands:
|
10
10
|
|
@@ -16,7 +16,7 @@ To install dependencies, execute the following commands:
|
|
16
16
|
`sudo apt-get install -y libgsl0-dev r-base r-base-dev`
|
17
17
|
`sudo Rscript -e "install.packages(c('Rserve','irr'),,'http://cran.us.r-project.org')"`
|
18
18
|
|
19
|
-
Then execute the .build.sh script to clone and install the latest nmatrix
|
19
|
+
Then execute the [.build.sh script](https://github.com/v0dro/daru/blob/master/.build.sh) to clone and install the latest nmatrix system:
|
20
20
|
|
21
21
|
`./.build.sh`
|
22
22
|
|
@@ -29,3 +29,7 @@ And run the test suite (should be all green with pending tests):
|
|
29
29
|
`bundle exec rspec`
|
30
30
|
|
31
31
|
If you have problems installing nmatrix, please consult the [nmatrix installation wiki](https://github.com/SciRuby/nmatrix/wiki/Installation) or the [mailing list](https://groups.google.com/forum/#!forum/sciruby-dev).
|
32
|
+
|
33
|
+
## Daru internals
|
34
|
+
|
35
|
+
To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
|
data/History.md
CHANGED
@@ -1,3 +1,39 @@
|
|
1
|
+
# 0.1.1
|
2
|
+
|
3
|
+
* Enhancements
|
4
|
+
- Added a new class Daru::Offsets for providing a uniform API to jump between dates.
|
5
|
+
- Added benchmarking scripts
|
6
|
+
- Added a new Arel-like querying syntax for Vector and DataFrame. This will allow faster and more intuitive lookup of data than using loops such as filter.
|
7
|
+
- Vector
|
8
|
+
- #concat now compulsorily requires a second index argument.
|
9
|
+
- Added new method #index= to change the index directly.
|
10
|
+
- Added basic functions for rolling statistics - mean, std, count, etc.
|
11
|
+
- Added cumulative sum function.
|
12
|
+
- Added #keep_if.
|
13
|
+
- Added #count_values.
|
14
|
+
- Indexing
|
15
|
+
- Changed Index so that it now accepts all sorts of data (not restricted to only Symbols as it was previously).
|
16
|
+
- Re wrote MultiIndex in levels and labels form so that its faster and more accomodative of different kinds of index levels.
|
17
|
+
- Changed .new to return appropriate index object based on data passed.
|
18
|
+
- Added .from_tuple and .from_array methods to MultiIndex.
|
19
|
+
- Added union and intersection behaviour to Index and MultiIndex.
|
20
|
+
- Added a new index, DateTimeIndex for indexing with time-based data.
|
21
|
+
- Optimized range search for Index.
|
22
|
+
- DataFrame
|
23
|
+
- Removed the DataFrameByVector class and the #vector function. Now only
|
24
|
+
way to access a Vector in a DF is by using the #[] operator.
|
25
|
+
- Added new method #index= and #vectors= for changing row and column indexes directly.
|
26
|
+
- Optimized Vector value setting and retreival.
|
27
|
+
- Added inner, outer, left outer and right outer joins with the #join method.
|
28
|
+
- Added #set_index.
|
29
|
+
* Changes
|
30
|
+
- Removed the + operator overload from Index and replaced in with union.
|
31
|
+
- Removed the second 'values' argument from Daru::Index because it's redundant.
|
32
|
+
- Changed behaviour of Vector#reindex and DataFrame#reindex and #reindex_vectors to preserve indexing of original data when possible.
|
33
|
+
* Fixes
|
34
|
+
- Fixed DataFrame#delete_row and Vector#delete_if.
|
35
|
+
- Fixed Vector#rename.
|
36
|
+
|
1
37
|
# 0.1.0
|
2
38
|
|
3
39
|
* Fixes
|
data/README.md
CHANGED
@@ -19,7 +19,8 @@ Written in pure Ruby so should work with all ruby implementations. Tested with M
|
|
19
19
|
* Data structures:
|
20
20
|
- Vector - A basic 1-D vector.
|
21
21
|
- DataFrame - A 2-D spreadsheet-like structure for manipulating and storing data sets. This is daru's primary data structure.
|
22
|
-
* Compatible with [IRuby notebook](https://github.com/SciRuby/iruby) and [statsample](https://github.com/SciRuby/statsample).
|
22
|
+
* Compatible with [IRuby notebook](https://github.com/SciRuby/iruby), [statsample](https://github.com/SciRuby/statsample), [statsample-glm](https://github.com/SciRuby/statsample-glm) and [statsample-timeseries](https://github.com/SciRuby/statsample-timeseries).
|
23
|
+
* Support for time series.
|
23
24
|
* Singly and hierarchially indexed data structures.
|
24
25
|
* Flexible and intuitive API for manipulation and analysis of data.
|
25
26
|
* Easy plotting, statistics and arithmetic.
|
@@ -27,27 +28,41 @@ Written in pure Ruby so should work with all ruby implementations. Tested with M
|
|
27
28
|
* Optional speed and space optimization on MRI with [NMatrix](https://github.com/SciRuby/nmatrix) and GSL.
|
28
29
|
* Easy splitting, aggregation and grouping of data.
|
29
30
|
* Quickly reducing data with pivot tables for quick data summary.
|
30
|
-
* Import and
|
31
|
+
* Import and export data from and to Excel, CSV, SQL Databases and plain text files.
|
31
32
|
|
32
33
|
## Notebooks
|
33
34
|
|
34
|
-
|
35
|
+
#### Notebooks on most use cases
|
35
36
|
|
37
|
+
* [Overview of most daru functions](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Daru%20Demo.ipynb)
|
36
38
|
* [Basic Creation of Vectors and DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Creation%20of%20Vector%20and%20DataFrame.ipynb)
|
37
39
|
* [Detailed Usage of Daru::Vector](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20Vector.ipynb)
|
38
40
|
* [Detailed Usage of Daru::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20DataFrame.ipynb)
|
39
41
|
* [Visualizing Data With Daru::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Visualizing%20data%20with%20daru%20DataFrame.ipynb)
|
42
|
+
* [Searching and combining data in daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Searching%20and%20Combining%20Data.ipynb)
|
40
43
|
* [Grouping, Splitting and Pivoting Data](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Grouping%2C%20Splitting%20and%20Pivoting.ipynb)
|
41
44
|
|
45
|
+
#### Notebooks on Time series
|
46
|
+
|
47
|
+
* [Basic Time Series](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Basic%20Time%20Series.ipynb)
|
48
|
+
* [Time Series Analysis and Plotting](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Time%20Series%20Functions.ipynb)
|
49
|
+
|
42
50
|
### Case Studies
|
43
51
|
|
44
52
|
* [Logistic Regression Analysis with daru and statsample-glm](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Logistic%20Regression%20with%20daru%20and%20statsample-glm.ipynb)
|
45
53
|
* [Finding and Plotting most heard artists from a Last.fm dataset](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Finding%20and%20plotting%20the%20most%20heard%20artists%20on%20last%20fm.ipynb)
|
54
|
+
* [Analyzing baby names with daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Analyzing%20baby%20names/Use%20Case%20-%20Daru%20for%20analyzing%20baby%20names%20data.ipynb)
|
46
55
|
|
47
56
|
## Blog Posts
|
48
57
|
|
49
58
|
* [Data Analysis in RUby: Basic data manipulation and plotting](http://v0dro.github.io/blog/2014/11/25/data-analysis-in-ruby-basic-data-manipulation-and-plotting/)
|
50
59
|
* [Data Analysis in RUby: Splitting, sorting, aggregating data and data types](http://v0dro.github.io/blog/2015/02/24/data-analysis-in-ruby-part-2/)
|
60
|
+
* [Finding and Combining data in daru](http://v0dro.github.io/blog/2015/08/03/finding-and-combining-data-in-daru/)
|
61
|
+
|
62
|
+
### Time series
|
63
|
+
|
64
|
+
* [Analysis of Time Series in daru](http://v0dro.github.io/blog/2015/07/31/analysis-of-time-series-in-daru/)
|
65
|
+
* [Date Offsets in Daru](http://v0dro.github.io/blog/2015/07/27/date-offsets-in-daru/)
|
51
66
|
|
52
67
|
## Documentation
|
53
68
|
|
@@ -55,32 +70,25 @@ Docs can be found [here](https://rubygems.org/gems/daru).
|
|
55
70
|
|
56
71
|
## Roadmap
|
57
72
|
|
58
|
-
* Automate testing for both MRI and JRuby.
|
59
73
|
* Enable creation of DataFrame by only specifying an NMatrix/MDArray in initialize. Vector naming happens automatically (alphabetic) or is specified in an Array.
|
60
|
-
* Completely test all functionality for MDArray.
|
61
74
|
* Basic Data manipulation and analysis operations:
|
62
75
|
- DF concat
|
63
|
-
* Option to express a DataFrame as an NMatrix or MDArray so as to use more efficient storage techniques.
|
64
76
|
* Assignment of a column to a single number should set the entire column to that number.
|
65
|
-
* == between daru_vector and string/number.
|
66
77
|
* Multiple column assignment with []=
|
67
78
|
* Multiple value assignment for vectors with []=.
|
68
79
|
* #find\_max function which will evaluate a block and return the row for the value of the block is max.
|
69
|
-
* Function to check if a value of a row/vector is within a specified range.
|
70
|
-
* Create a new vector in map_rows if any of the already present rows dont match the one assigned in the block.
|
71
80
|
* Sort by index.
|
72
|
-
* Statistics on DataFrame over rows
|
73
|
-
* Cumulative sum.
|
74
|
-
* Time series support.
|
81
|
+
* Statistics on DataFrame over rows.
|
75
82
|
* Calculate percentage change.
|
76
83
|
* Have some sample data sets for users to play around with. Should be able to load these from the code itself.
|
77
84
|
* Sorting with missing data present.
|
78
|
-
* re_index should re establish previous index values in the newly supplied index.
|
79
85
|
|
80
86
|
## Contributing
|
81
87
|
|
82
88
|
Pick a feature from the Roadmap or the issue tracker or think of your own and send me a Pull Request!
|
83
89
|
|
90
|
+
For details see [CONTRIBUTING](https://github.com/v0dro/daru/blob/master/CONTRIBUTING.md).
|
91
|
+
|
84
92
|
## Acknowledgements
|
85
93
|
|
86
94
|
* Google and the Ruby Science Foundation for the Google Summer of Code 2015 grant for further developing daru and integrating it with other ruby gems.
|
data/Rakefile
CHANGED
@@ -1,6 +1,21 @@
|
|
1
1
|
require 'rspec/core/rake_task'
|
2
2
|
require 'bundler/gem_tasks'
|
3
3
|
|
4
|
+
lib_folder = File.expand_path("../lib", __FILE__)
|
4
5
|
RSpec::Core::RakeTask.new(:spec)
|
5
6
|
|
6
|
-
task :default => :spec
|
7
|
+
task :default => :spec
|
8
|
+
|
9
|
+
task :console do |task|
|
10
|
+
cmd = [ 'irb', "-r '#{lib_folder}/daru.rb'" ]
|
11
|
+
run *cmd
|
12
|
+
end
|
13
|
+
|
14
|
+
task :pry do |task|
|
15
|
+
cmd = [ 'pry', "-r '#{lib_folder}/daru.rb'" ]
|
16
|
+
run *cmd
|
17
|
+
end
|
18
|
+
|
19
|
+
def run *cmd
|
20
|
+
sh(cmd.join(" "))
|
21
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
"Group","Treatment","Replicate","RelativeFitness"
|
2
|
+
"BKB","Tube",1,0.869962555792838
|
3
|
+
"BKB","Tube",2,1.00036299125423
|
4
|
+
"BKB","Tube",3,0.982935090384188
|
5
|
+
"BAC","Tube",1,0.810391635206191
|
6
|
+
"BAC","Tube",2,0.795106571577928
|
7
|
+
"JDK","Tube",1,0.849203581734814
|
8
|
+
"JDK","Tube",2,0.917636977577209
|
9
|
+
"JDK","Tube",3,0.905323024195181
|
10
|
+
"ETH","Tube",1,0.930820887284085
|
11
|
+
"ETH","Tube",2,0.958183317853959
|
12
|
+
"ETH","Tube",3,0.91428823286218
|
13
|
+
"SWI","Tube",1,0.918513532826773
|
14
|
+
"SWI","Tube",2,0.909023164202865
|
15
|
+
"SWI","Tube",3,0.928404818223812
|
16
|
+
"PPP","Tube",1,0.990500457882474
|
17
|
+
"PPP","Tube",2,0.97156464183624
|
18
|
+
"PPP","Tube",3,0.948766905642272
|
19
|
+
"ECO","Tube",1,0.996870763277447
|
20
|
+
"ECO","Tube",2,0.976750424056972
|
21
|
+
"ECO","Tube",3,0.939479247244218
|
22
|
+
"DOS","Tube",1,0.9461642789306
|
23
|
+
"DOS","Tube",2,0.951056617451754
|
24
|
+
"DOS","Tube",3,0.939563175900147
|
25
|
+
"FIT","Tube",1,0.942215594296048
|
26
|
+
"FIT","Tube",2,0.926698027408171
|
27
|
+
"FIT","Tube",3,1.00031750759377
|
28
|
+
"HHE","Tube",1,0.944414164259624
|
29
|
+
"HHE","Tube",2,0.95194592074032
|
30
|
+
"HHE","Tube",3,0.922485751593779
|
31
|
+
"H2W","Tube",1,0.915304498822462
|
32
|
+
"H2W","Tube",2,0.853906227834699
|
33
|
+
"H2W","Tube",3,0.938698147417512
|
34
|
+
"BKB","Dish",1,1.15254427639376
|
35
|
+
"BKB","Dish",2,1.28708514998039
|
36
|
+
"BKB","Dish",3,1.50741545998468
|
37
|
+
"BAC","Dish",2,1.65223419104399
|
38
|
+
"BAC","Dish",3,1.61502230247434
|
39
|
+
"JDK","Dish",1,1.5102682623396
|
40
|
+
"JDK","Dish",2,1.58270078151532
|
41
|
+
"JDK","Dish",3,1.54715152852448
|
42
|
+
"ETH","Dish",1,1.61281555981988
|
43
|
+
"ETH","Dish",2,1.36445572541848
|
44
|
+
"ETH","Dish",3,1.47155233724852
|
45
|
+
"SWI","Dish",1,1.36255560722945
|
46
|
+
"SWI","Dish",2,1.51569228710501
|
47
|
+
"SWI","Dish",3,1.47714121664773
|
48
|
+
"PPP","Dish",1,1.50896917413568
|
49
|
+
"PPP","Dish",2,1.58138592886817
|
50
|
+
"PPP","Dish",3,1.55356839081014
|
51
|
+
"ECO","Dish",1,1.69927564777656
|
52
|
+
"ECO","Dish",2,1.45605668065038
|
53
|
+
"ECO","Dish",3,1.52825762511041
|
54
|
+
"DOS","Dish",1,1.59453120031454
|
55
|
+
"DOS","Dish",2,1.58132511409296
|
56
|
+
"DOS","Dish",3,1.58558683941181
|
57
|
+
"FIT","Dish",1,1.01468578343171
|
58
|
+
"FIT","Dish",2,0.955220815085047
|
59
|
+
"FIT","Dish",3,1.03597298511451
|
60
|
+
"HHE","Dish",1,1.49399422139513
|
61
|
+
"HHE","Dish",2,1.32980278545457
|
62
|
+
"HHE","Dish",3,1.4505220676174
|
63
|
+
"H2W","Dish",1,1.52460143511286
|
64
|
+
"H2W","Dish",2,1.53958247554139
|
65
|
+
"H2W","Dish",3,1.51149897493835
|
@@ -0,0 +1,39 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
Benchmark.bm do |x|
|
7
|
+
x.report("Create with Arrays and clone") do
|
8
|
+
df = Daru::DataFrame.new({
|
9
|
+
a: 100000.times.map { rand },
|
10
|
+
b: 100000.times.map { rand },
|
11
|
+
c: 100000.times.map { rand }
|
12
|
+
})
|
13
|
+
end
|
14
|
+
|
15
|
+
x.report("Create with Vectors and clone") do
|
16
|
+
df = Daru::DataFrame.new({
|
17
|
+
a: Daru::Vector.new(100000.times.map { rand }),
|
18
|
+
b: Daru::Vector.new(100000.times.map { rand }),
|
19
|
+
c: Daru::Vector.new(100000.times.map { rand })
|
20
|
+
})
|
21
|
+
end
|
22
|
+
|
23
|
+
x.report("Create with Vector and dont clone") do
|
24
|
+
df = Daru::DataFrame.new({
|
25
|
+
a: Daru::Vector.new(100000.times.map { rand }),
|
26
|
+
b: Daru::Vector.new(100000.times.map { rand }),
|
27
|
+
c: Daru::Vector.new(100000.times.map { rand })
|
28
|
+
}, clone: false)
|
29
|
+
end
|
30
|
+
|
31
|
+
x.report("Create by row from Arrays") do
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# ===== Benchmarks =====
|
36
|
+
# user system total real
|
37
|
+
# Create with Arrays and clone 0.940000 0.010000 0.950000 ( 0.959851)
|
38
|
+
# Create with Vectors and clone 1.950000 0.020000 1.970000 ( 1.966835)
|
39
|
+
# Create with Vector and dont clone 1.170000 0.000000 1.170000 ( 1.177132)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
data = Daru::DataFrame.from_csv 'TradeoffData.csv'
|
7
|
+
|
8
|
+
Benchmark.bm do |x|
|
9
|
+
x.report("Single column grouping") do
|
10
|
+
@single = data.group_by([:Treatment])
|
11
|
+
end
|
12
|
+
|
13
|
+
x.report("Multi-column grouping") do
|
14
|
+
@multi = data.group_by([:Group, :Treatment])
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Single mean") do
|
18
|
+
@single.mean
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Multi mean") do
|
22
|
+
@multi.mean
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# ===== Benchmarks =====
|
27
|
+
#
|
28
|
+
# user system total real
|
29
|
+
# Single column grouping 0.000000 0.000000 0.000000 (0.000356)
|
30
|
+
# Multi-column grouping 0.000000 0.000000 0.000000 (0.000958)
|
31
|
+
# Single mean 0.000000 0.000000 0.000000 (0.000865)
|
32
|
+
# Multi mean 0.000000 0.000000 0.000000 (0.002748)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: 100000.times.map { rand },
|
8
|
+
b: 100000.times.map { rand },
|
9
|
+
c: 100000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Access single row") do
|
14
|
+
df.row[50]
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Access rows by comma") do
|
18
|
+
df.row[*(5..40000).to_a.shuffle]
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Individual rows") do
|
22
|
+
rows = []
|
23
|
+
index = (5..40000).to_a.shuffle
|
24
|
+
index.each do |a|
|
25
|
+
rows << df.row[a].to_a
|
26
|
+
end
|
27
|
+
|
28
|
+
Daru::DataFrame.rows(rows, order: [:a,:b,:c], index: index)
|
29
|
+
end
|
30
|
+
|
31
|
+
x.report("Access rows by range") do
|
32
|
+
df.row[5..40000]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# ==== Benchmarks ====
|
37
|
+
# user system total real
|
38
|
+
# Access single row 0.000000 0.000000 0.000000 ( 0.000059)
|
39
|
+
# Access rows by comma 1.410000 0.010000 1.420000 ( 1.420426)
|
40
|
+
# Individual rows 1.480000 0.000000 1.480000 ( 1.488531)
|
41
|
+
# Access rows by range 1.440000 0.010000 1.450000 ( 1.436750)
|
@@ -0,0 +1,36 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: 100000.times.map { rand },
|
8
|
+
b: 100000.times.map { rand },
|
9
|
+
c: 100000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Set a single row with Array") do
|
14
|
+
df.row[5] = [55,22,65]
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Set a single row with Daru::Vector") do
|
18
|
+
df.row[3456] = Daru::Vector.new([3,54,11], index: [:b,:e,:a])
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Create a new row with Array") do
|
22
|
+
df.row[100001] = [34,66,11]
|
23
|
+
end
|
24
|
+
|
25
|
+
x.report("Create a new row with Daru::Vector") do
|
26
|
+
df.row[100005] = Daru::Vector.new([34,66,11], index: [:a,:b,:t])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# ==== Benchmarks ====
|
31
|
+
#
|
32
|
+
# user system total real
|
33
|
+
# Set a single row with Array 0.600000 0.000000 0.600000 ( 0.604718)
|
34
|
+
# Set a single row with Daru::Vector 0.600000 0.000000 0.600000 ( 0.598599)
|
35
|
+
# Create a new row with Array 0.840000 0.010000 0.850000 ( 0.858349)
|
36
|
+
# Create a new row with Daru::Vector 0.950000 0.000000 0.950000 ( 0.950725)
|
@@ -0,0 +1,44 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
vector = Daru::Vector.new(10000.times.map.to_a.shuffle)
|
7
|
+
df = Daru::DataFrame.new({
|
8
|
+
a: vector,
|
9
|
+
b: vector,
|
10
|
+
c: vector
|
11
|
+
})
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Sort a Vector without any args") do
|
14
|
+
vector.sort
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Sort vector in descending order with custom <=> operator") do
|
18
|
+
vector.sort(ascending: false) { |a,b| a.to_s <=> b.to_s }
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Sort single column of DataFrame") do
|
22
|
+
df.sort([:a])
|
23
|
+
end
|
24
|
+
|
25
|
+
x.report("Sort two columns of DataFrame") do
|
26
|
+
df.sort([:c,:a])
|
27
|
+
end
|
28
|
+
|
29
|
+
x.report("Sort two columns with custom operators in different orders of DataFrame") do
|
30
|
+
df.sort([:c,:a], ascending: [true, false],
|
31
|
+
by: { c: lambda { |a,b| a.to_s <=> b.to_s },
|
32
|
+
a: lambda { |a,b| (a+1) <=> (b+1) } })
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# FIXME: MASSIVE SPEEDUP NECESSARY!
|
37
|
+
|
38
|
+
# ===== Benchamarks =====
|
39
|
+
# user system total real
|
40
|
+
# Sort a Vector without any args 0.130000 0.000000 0.130000 ( 0.128006)
|
41
|
+
# Sort vector in descending order with custom <=> operator 0.190000 0.000000 0.190000 ( 0.184604)
|
42
|
+
# Sort single column of DataFrame 2502.450000 0.000000 2502.450000 (2503.808073)
|
43
|
+
# Sort two columns of DataFrame 0.540000 0.000000 0.540000 ( 0.537670)
|
44
|
+
# Sort two columns with custom operators in different orders of DataFrame 2084.160000 7.260000 2091.420000 (2092.716603)
|