daru 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +99 -0
- data/.rubocop_todo.yml +44 -0
- data/.travis.yml +3 -1
- data/CONTRIBUTING.md +5 -1
- data/History.md +43 -0
- data/README.md +3 -4
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +7 -7
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/sorting.rb +9 -2
- data/benchmarks/statistics.rb +39 -0
- data/daru.gemspec +4 -4
- data/lib/daru.rb +9 -9
- data/lib/daru/accessors/array_wrapper.rb +15 -11
- data/lib/daru/accessors/dataframe_by_row.rb +1 -1
- data/lib/daru/accessors/gsl_wrapper.rb +30 -19
- data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
- data/lib/daru/core/group_by.rb +69 -16
- data/lib/daru/core/merge.rb +135 -151
- data/lib/daru/core/query.rb +9 -30
- data/lib/daru/dataframe.rb +476 -439
- data/lib/daru/date_time/index.rb +150 -137
- data/lib/daru/date_time/offsets.rb +45 -41
- data/lib/daru/extensions/rserve.rb +4 -4
- data/lib/daru/index.rb +88 -64
- data/lib/daru/io/io.rb +33 -34
- data/lib/daru/io/sql_data_source.rb +11 -11
- data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
- data/lib/daru/maths/arithmetic/vector.rb +9 -14
- data/lib/daru/maths/statistics/dataframe.rb +89 -61
- data/lib/daru/maths/statistics/vector.rb +226 -97
- data/lib/daru/monkeys.rb +23 -30
- data/lib/daru/plotting/dataframe.rb +27 -28
- data/lib/daru/plotting/vector.rb +12 -13
- data/lib/daru/vector.rb +221 -330
- data/lib/daru/version.rb +2 -2
- data/spec/core/group_by_spec.rb +16 -0
- data/spec/core/merge_spec.rb +30 -14
- data/spec/dataframe_spec.rb +268 -14
- data/spec/index_spec.rb +23 -5
- data/spec/io/io_spec.rb +37 -16
- data/spec/math/statistics/dataframe_spec.rb +40 -8
- data/spec/math/statistics/vector_spec.rb +135 -10
- data/spec/monkeys_spec.rb +3 -3
- data/spec/vector_spec.rb +157 -25
- metadata +41 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a72d4b2565e47c5c4112aac514a7191bd4f962c
|
4
|
+
data.tar.gz: 2f68b0bb56e621f36d32f6bb9ecc541a61af7323
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 950a36a9956dd37ac334bd1b657cfb8a74994b1c771846b359d8c885d8ec3c62edb75df18a951086799c77a2fe7c3425c64c0f069fd85b50470095d13ba323c0
|
7
|
+
data.tar.gz: 0b9d8815d90f947a7a2dcf5447dd46057dbe559ecd3959e88e4932afb945183a168c46fb131bdaca9c07682a28e408d3d19b2e716b57ddad0638b375371f6c3b
|
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
AllCops:
|
4
|
+
Include:
|
5
|
+
- 'lib/**/*'
|
6
|
+
Exclude:
|
7
|
+
- 'spec/*'
|
8
|
+
- 'spec/**/*'
|
9
|
+
- 'vendor/**/*'
|
10
|
+
- 'benchmarks/*'
|
11
|
+
DisplayCopNames: true
|
12
|
+
|
13
|
+
# Preferred codebase style ---------------------------------------------
|
14
|
+
Style/ExtraSpacing:
|
15
|
+
AllowForAlignment: true
|
16
|
+
|
17
|
+
Style/FormatString:
|
18
|
+
EnforcedStyle: percent
|
19
|
+
|
20
|
+
Style/AndOr:
|
21
|
+
EnforcedStyle: conditionals
|
22
|
+
|
23
|
+
Style/SpaceAroundEqualsInParameterDefault:
|
24
|
+
EnforcedStyle: no_space
|
25
|
+
|
26
|
+
Style/SpaceInsideBlockBraces:
|
27
|
+
EnforcedStyle: space
|
28
|
+
|
29
|
+
Style/SpaceInsideHashLiteralBraces:
|
30
|
+
EnforcedStyle: no_space
|
31
|
+
|
32
|
+
Style/AlignParameters:
|
33
|
+
EnforcedStyle: with_fixed_indentation
|
34
|
+
|
35
|
+
Style/EmptyElse:
|
36
|
+
EnforcedStyle: empty
|
37
|
+
|
38
|
+
Style/ParallelAssignment:
|
39
|
+
Enabled: false
|
40
|
+
|
41
|
+
Style/DoubleNegation:
|
42
|
+
Enabled: false
|
43
|
+
|
44
|
+
Style/SingleLineBlockParams:
|
45
|
+
Enabled: false
|
46
|
+
|
47
|
+
Style/PerlBackrefs:
|
48
|
+
Enabled: false
|
49
|
+
|
50
|
+
Style/SpaceAfterComma:
|
51
|
+
Enabled: false
|
52
|
+
|
53
|
+
Style/SpaceAroundOperators:
|
54
|
+
Enabled: false
|
55
|
+
|
56
|
+
Style/EmptyCaseCondition:
|
57
|
+
Enabled: false
|
58
|
+
|
59
|
+
# Neither of prefered styles are good enough :(
|
60
|
+
Style/BlockDelimiters:
|
61
|
+
Enabled: false
|
62
|
+
|
63
|
+
# TODO -----------------------------------------------------------------
|
64
|
+
|
65
|
+
Style/Documentation:
|
66
|
+
Enabled: false
|
67
|
+
|
68
|
+
# To discuss and decide ------------------------------------------------
|
69
|
+
|
70
|
+
# FIXME: in fact, rescue modifier is rarely a good choice.
|
71
|
+
# But currently I can't fully grasp the three places they are used.
|
72
|
+
# So, leaving them intact. - zverok, 2016-05-07
|
73
|
+
Style/RescueModifier:
|
74
|
+
Exclude:
|
75
|
+
- 'lib/daru/accessors/gsl_wrapper.rb'
|
76
|
+
- 'lib/daru/dataframe.rb'
|
77
|
+
- 'lib/daru/io/sql_data_source.rb'
|
78
|
+
|
79
|
+
# FIXME: once we should enable and fix it - zverok, 2016-05-07
|
80
|
+
Style/Alias:
|
81
|
+
Enabled: false
|
82
|
+
|
83
|
+
# FIXME: should decide about this.
|
84
|
+
# Personally I prefer (as most of Ruby community) to use parens, but
|
85
|
+
# we also can enforce style to NOT using them. Yet it definitely should
|
86
|
+
# be only one style. Current codebase uses ~400 method defs without and
|
87
|
+
# ~ 100 method defs with them. - zverok, 2016-05-07
|
88
|
+
Style/MethodDefParentheses:
|
89
|
+
Enabled: false
|
90
|
+
|
91
|
+
# Should be fixed, but require change of public API --------------------
|
92
|
+
|
93
|
+
# Bans methods like `has_missing_data?`, `is_number?` and so on - started
|
94
|
+
# with unnecessary has_ or is_.
|
95
|
+
Style/PredicateName:
|
96
|
+
Exclude:
|
97
|
+
- 'lib/daru/dataframe.rb'
|
98
|
+
- 'lib/daru/monkeys.rb'
|
99
|
+
- 'lib/daru/vector.rb'
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2016-05-06 16:48:54 +0300 using RuboCop version 0.39.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 66
|
10
|
+
Metrics/AbcSize:
|
11
|
+
Max: 110
|
12
|
+
|
13
|
+
# Offense count: 6
|
14
|
+
Metrics/BlockNesting:
|
15
|
+
Max: 6
|
16
|
+
|
17
|
+
# Offense count: 6
|
18
|
+
# Configuration parameters: CountComments.
|
19
|
+
Metrics/ClassLength:
|
20
|
+
Max: 1400
|
21
|
+
|
22
|
+
# Offense count: 26
|
23
|
+
Metrics/CyclomaticComplexity:
|
24
|
+
Max: 22
|
25
|
+
|
26
|
+
# Offense count: 273
|
27
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes.
|
28
|
+
# URISchemes: http, https
|
29
|
+
Metrics/LineLength:
|
30
|
+
Max: 164
|
31
|
+
|
32
|
+
# Offense count: 81
|
33
|
+
# Configuration parameters: CountComments.
|
34
|
+
Metrics/MethodLength:
|
35
|
+
Max: 100
|
36
|
+
|
37
|
+
# Offense count: 3
|
38
|
+
# Configuration parameters: CountComments.
|
39
|
+
Metrics/ModuleLength:
|
40
|
+
Max: 419
|
41
|
+
|
42
|
+
# Offense count: 22
|
43
|
+
Metrics/PerceivedComplexity:
|
44
|
+
Max: 28
|
data/.travis.yml
CHANGED
data/CONTRIBUTING.md
CHANGED
@@ -21,6 +21,10 @@ And run the test suite (should be all green with pending tests):
|
|
21
21
|
|
22
22
|
If you have problems installing nmatrix, please consult the [nmatrix installation wiki](https://github.com/SciRuby/nmatrix/wiki/Installation) or the [mailing list](https://groups.google.com/forum/#!forum/sciruby-dev).
|
23
23
|
|
24
|
+
While preparing your pull requests, don't forget to check your code with Rubocop:
|
25
|
+
|
26
|
+
`bundle exec rubocop`
|
27
|
+
|
24
28
|
## Daru internals
|
25
29
|
|
26
|
-
To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
|
30
|
+
To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
|
data/History.md
CHANGED
@@ -1,3 +1,46 @@
|
|
1
|
+
# 0.1.3 (May 2016)
|
2
|
+
|
3
|
+
* Enhancements
|
4
|
+
- Proper error handling for case where an index specified by the user is not actually present in the DataFrame/Vector (@lokeshh).
|
5
|
+
- DataFrame CSV writer function will now supress headers when passing headers: false (@gnilrets).
|
6
|
+
- Refactor Index and MultiIndex so that a Vector or DataFrame can access the actual index number without having to check the exact type of index every time (@lokeshh).
|
7
|
+
- Refactor `Vector#[]=` to not use conditionals (@lokeshh).
|
8
|
+
- Custom `#dup` method for `Daru::DateTimeIndex` (@Deepakkoli93).
|
9
|
+
- Massive performance boost to Vector and DataFrame sorting by using in-built Array#sort and removing previous hand-made sort (@lokeshh).
|
10
|
+
- Handle nils in sorting for Vectors and DataFrame (@lokeshh, @gnilrets).
|
11
|
+
- Add #describe function for Vectors (@shahsaurabh0605).
|
12
|
+
- Adds support for concatenating dataframes that don't share all the same vectors (@gnilrets).
|
13
|
+
- Massive performance enhancement for joins using the sorted merge method (@gnilrets).
|
14
|
+
- New statistics methods and tests for DataFrame (@shahsaurabh0605).
|
15
|
+
- Add explicit conversion to hash for DataFrame (DataFrame#to_h, Vector#to_h) and remove implicit conversion to hash (DataFrame#to_hash, Vector#to_hash) (@gnilrets).
|
16
|
+
- Add `DataFrame#rename_vectors` for simplifying renaming of vectors in DataFrame (@gnilrets).
|
17
|
+
- MultiIndex raises error on accessing an invalid index (@shreyanshd).
|
18
|
+
- Order columns as given in the CSV file when reading into a DataFrame from CSV using `DataFrame.from_csv` (@lokeshh).
|
19
|
+
- Add `Vector#percent_change` and `DataFrame#percent_change` (@shahsaurabh0605).
|
20
|
+
- Faster `DataFrame#filter_rows` (@lokeshh).
|
21
|
+
- Added `Vector#emv` for calculating exponential moving variance of Vector (@shahsaurabh0605).
|
22
|
+
- Add support for associating metadata with a Vector or DataFrame using the :metadata option (@gnilrets).
|
23
|
+
- Add `Vector#emsd` for calculating exponential moving standard deviation of Vector (@shahsaurabh0605).
|
24
|
+
- Sample and population covariance functions for Vector (@shahsaurabh0605).
|
25
|
+
- Improve `DataFrame#dup` performance (@gnilrets).
|
26
|
+
- Add `Daru::DataFrame::Core::GroupBy#reduce` for reducing groups by passing a block (@gnilrets).
|
27
|
+
- Add rubocop as development dependency and make changes suggested by it to conform to the Ruby Style Guide (@zverok).
|
28
|
+
- Allow Daru::Index to be initialized by a Range (@lokeshh).
|
29
|
+
* Fixes
|
30
|
+
- Fix conflict with narray that caused namespace clashes with nmatrix in case both narray and nmatrix were installed on the user's system (@lokeshh).
|
31
|
+
- Fix bug with dataframe concatenation that caused modifying the arrays that
|
32
|
+
compose the vectors in the original dataframes (@gnilrets).
|
33
|
+
- Fix an error where the Vectors in an empty DataFrame would not be assigned correct names (@lokeshh).
|
34
|
+
- Correct spelling mistakes and fix broken links in README (@lokeshh).
|
35
|
+
- Fix bug in Vector#mode (@sunshineyyy).
|
36
|
+
- Fix `Vector#index_of` method to handle dtype :array differently (@lokeshh).
|
37
|
+
- Fix `DateTimeIndex#include?` method since it was raising an exception when index not found. It returns false now (@Phitherek).
|
38
|
+
- Handle nils in group_by keys (@gnilrets).
|
39
|
+
- Handle nils for statistics methods in Vector and DataFrame for :array and :gsl data (@lokeshh).
|
40
|
+
- Fix `DataFrame#clone` when no arguments have been passed to it (@lokeshh).
|
41
|
+
- Fix bug when joining empty dataframes (@gnilrets).
|
42
|
+
|
43
|
+
|
1
44
|
# 0.1.2
|
2
45
|
|
3
46
|
* Enhancements
|
data/README.md
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
|
8
8
|
daru (Data Analysis in RUby) is a library for storage, analysis, manipulation and visualization of data in Ruby.
|
9
9
|
|
10
|
-
daru makes it easy and
|
10
|
+
daru makes it easy and intuitive to process data predominantly through 2 data structures: `Daru::DataFrame` and `Daru::Vector`. Written in pure Ruby works with all ruby implementations. Tested with MRI 2.0, 2.1, 2.2 and 2.3.
|
11
11
|
|
12
12
|
## Features
|
13
13
|
|
@@ -16,7 +16,7 @@ daru makes it easy and intuituive to process data predominantly through 2 data s
|
|
16
16
|
- DataFrame - A 2-D spreadsheet-like structure for manipulating and storing data sets. This is daru's primary data structure.
|
17
17
|
* Compatible with [IRuby notebook](https://github.com/SciRuby/iruby), [statsample](https://github.com/SciRuby/statsample), [statsample-glm](https://github.com/SciRuby/statsample-glm) and [statsample-timeseries](https://github.com/SciRuby/statsample-timeseries).
|
18
18
|
* Support for time series.
|
19
|
-
* Singly and
|
19
|
+
* Singly and hierarchically indexed data structures.
|
20
20
|
* Flexible and intuitive API for manipulation and analysis of data.
|
21
21
|
* Easy plotting, statistics and arithmetic.
|
22
22
|
* Plentiful iterators.
|
@@ -150,7 +150,7 @@ data_frame.where(
|
|
150
150
|
|
151
151
|
*Plotting*
|
152
152
|
|
153
|
-
Daru supports plotting of interactive graphs with [nyaplot](). You can easily create a plot with the `#plot` method. Here we plot the gallons sold on the Y axis and name of the brand on the X axis in a bar graph.
|
153
|
+
Daru supports plotting of interactive graphs with [nyaplot](https://github.com/domitry/nyaplot). You can easily create a plot with the `#plot` method. Here we plot the gallons sold on the Y axis and name of the brand on the X axis in a bar graph.
|
154
154
|
``` ruby
|
155
155
|
data_frame.plot type: :bar, x: 'Beer', y: 'Gallons sold' do |plot, diagram|
|
156
156
|
plot.x_label "Beer"
|
@@ -179,7 +179,6 @@ Docs can be found [here](https://rubygems.org/gems/daru).
|
|
179
179
|
* Statistics on DataFrame over rows.
|
180
180
|
* Calculate percentage change.
|
181
181
|
* Have some sample data sets for users to play around with. Should be able to load these from the code itself.
|
182
|
-
* Sorting with missing data present.
|
183
182
|
|
184
183
|
## Contributing
|
185
184
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
# Check scaling
|
7
|
+
base_n = 10000
|
8
|
+
0.upto(2) do |iscale|
|
9
|
+
n = base_n * 2**iscale
|
10
|
+
|
11
|
+
df_h = ('a'..'z').map { |v| v.to_sym }.reduce({}) do |h, v|
|
12
|
+
h[v] = Daru::Vector.new(1.upto(n).to_a)
|
13
|
+
h
|
14
|
+
end
|
15
|
+
|
16
|
+
df = Daru::DataFrame.new(df_h)
|
17
|
+
|
18
|
+
Benchmark.bm do |bm|
|
19
|
+
bm.report("dupe (n=#{n})") do
|
20
|
+
df.dup
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# ===== Benchmarks =====
|
26
|
+
# System: iMac Late 2013 3.5GHz Core i7
|
27
|
+
#
|
28
|
+
# user system total real
|
29
|
+
#dupe (n=10000) 0.590000 0.020000 0.610000 ( 0.613648)
|
30
|
+
# user system total real
|
31
|
+
#dupe (n=20000) 1.170000 0.040000 1.210000 ( 1.236629)
|
32
|
+
# user system total real
|
33
|
+
#dupe (n=40000) 2.390000 0.070000 2.460000 ( 2.511199)
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
# ===== Prior Benchmarks (Daru 0.1.2 - 2707559369c03894a8394714820aabf116b99b20 - 2016-04-25) =====
|
39
|
+
# Note that the n here is 100x smaller than above
|
40
|
+
# user system total real
|
41
|
+
#dupe (n=100) 0.220000 0.000000 0.220000 ( 0.227924)
|
42
|
+
# user system total real
|
43
|
+
#dupe (n=200) 0.850000 0.000000 0.850000 ( 0.856591)
|
44
|
+
# user system total real
|
45
|
+
#dupe (n=400) 3.370000 0.020000 3.390000 ( 3.428211)
|
data/benchmarks/group_by.rb
CHANGED
@@ -7,11 +7,11 @@ data = Daru::DataFrame.from_csv 'TradeoffData.csv'
|
|
7
7
|
|
8
8
|
Benchmark.bm do |x|
|
9
9
|
x.report("Single column grouping") do
|
10
|
-
@single = data.group_by([
|
10
|
+
@single = data.group_by(['Treatment'])
|
11
11
|
end
|
12
12
|
|
13
13
|
x.report("Multi-column grouping") do
|
14
|
-
@multi = data.group_by([
|
14
|
+
@multi = data.group_by(['Group', 'Treatment'])
|
15
15
|
end
|
16
16
|
|
17
17
|
x.report("Single mean") do
|
@@ -24,9 +24,9 @@ Benchmark.bm do |x|
|
|
24
24
|
end
|
25
25
|
|
26
26
|
# ===== Benchmarks =====
|
27
|
-
#
|
27
|
+
#
|
28
28
|
# user system total real
|
29
|
-
# Single column grouping
|
30
|
-
# Multi-column grouping
|
31
|
-
# Single mean
|
32
|
-
# Multi mean
|
29
|
+
# Single column grouping 0.000000 0.000000 0.000000 (0.000340)
|
30
|
+
# Multi-column grouping 0.000000 0.000000 0.000000 (0.000855)
|
31
|
+
# Single mean 0.000000 0.000000 0.000000 (0.001208)
|
32
|
+
# Multi mean 0.000000 0.000000 0.000000 (0.004892)
|
@@ -0,0 +1,52 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
# Check scaling
|
7
|
+
base_n = 10000
|
8
|
+
0.upto(2) do |iscale|
|
9
|
+
n = base_n * 2**iscale
|
10
|
+
keys = (1..(n)).to_a
|
11
|
+
base_data = { idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]}}
|
12
|
+
lookup_hash = keys.map { |k| [k, k * 100]}.to_h
|
13
|
+
|
14
|
+
base_data_df = Daru::DataFrame.new(base_data)
|
15
|
+
lookup_df = Daru::DataFrame.new({ keys: lookup_hash.keys, values: lookup_hash.values })
|
16
|
+
|
17
|
+
Benchmark.bm do |bm|
|
18
|
+
bm.report("Inner join (n=#{n})") do
|
19
|
+
base_data_df.join(lookup_df, on: [:keys], how: :inner)
|
20
|
+
end
|
21
|
+
|
22
|
+
bm.report("Outer join (n=#{n})") do
|
23
|
+
base_data_df.join(lookup_df, on: [:keys], how: :outer)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# ===== Benchmarks =====
|
29
|
+
# System: MacBook Pro Mid 2014 3GHz Core i7
|
30
|
+
#
|
31
|
+
# user system total real
|
32
|
+
#Inner join (n=10000) 0.170000 0.000000 0.170000 ( 0.182254)
|
33
|
+
#Outer join (n=10000) 0.200000 0.000000 0.200000 ( 0.203022)
|
34
|
+
# user system total real
|
35
|
+
#Inner join (n=20000) 0.380000 0.000000 0.380000 ( 0.387600)
|
36
|
+
#Outer join (n=20000) 0.410000 0.000000 0.410000 ( 0.415644)
|
37
|
+
# user system total real
|
38
|
+
#Inner join (n=40000) 0.720000 0.010000 0.730000 ( 0.743787)
|
39
|
+
#Outer join (n=40000) 0.810000 0.010000 0.820000 ( 0.840871)
|
40
|
+
|
41
|
+
|
42
|
+
# ===== Prior Benchmarks (Daru 0.1.2 - prior to sorted merge algorithm) =====
|
43
|
+
# Note that the n here is 10x smaller than above
|
44
|
+
# user system total real
|
45
|
+
#Inner join (n=1000) 0.170000 0.010000 0.180000 ( 0.175585)
|
46
|
+
#Outer join (n=1000) 0.990000 0.000000 0.990000 ( 1.004305)
|
47
|
+
# user system total real
|
48
|
+
#Inner join (n=2000) 0.440000 0.010000 0.450000 ( 0.446748)
|
49
|
+
#Outer join (n=2000) 3.880000 0.010000 3.890000 ( 3.926399)
|
50
|
+
# user system total real
|
51
|
+
#Inner join (n=4000) 1.670000 0.010000 1.680000 ( 1.680742)
|
52
|
+
#Outer join (n=4000) 15.640000 0.060000 15.700000 ( 15.855202)
|
data/benchmarks/sorting.rb
CHANGED
@@ -28,8 +28,8 @@ Benchmark.bm do |x|
|
|
28
28
|
|
29
29
|
x.report("Sort two columns with custom operators in different orders of DataFrame") do
|
30
30
|
df.sort([:c,:a], ascending: [true, false],
|
31
|
-
by: { c: lambda { |a
|
32
|
-
a: lambda { |a
|
31
|
+
by: { c: lambda { |a| a.to_s },
|
32
|
+
a: lambda { |a| a+1 } })
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
@@ -42,3 +42,10 @@ end
|
|
42
42
|
# Sort single column of DataFrame 2502.450000 0.000000 2502.450000 (2503.808073)
|
43
43
|
# Sort two columns of DataFrame 0.540000 0.000000 0.540000 ( 0.537670)
|
44
44
|
# Sort two columns with custom operators in different orders of DataFrame 2084.160000 7.260000 2091.420000 (2092.716603)
|
45
|
+
|
46
|
+
# ===== Current Benchamarks =====
|
47
|
+
# Sort a Vector without any args 0.070000 0.000000 0.070000 ( 0.070323)
|
48
|
+
# Sort vector in descending order with custom <=> operator 0.120000 0.000000 0.120000 ( 0.119462)
|
49
|
+
# Sort single column of DataFrame 0.940000 0.010000 0.950000 ( 0.950349)
|
50
|
+
# Sort two columns of DataFrame 1.490000 0.010000 1.500000 ( 1.505680)
|
51
|
+
# Sort two columns with custom operators in different orders of DataFrame 1.480000 0.000000 1.480000 ( 1.495839)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'daru'
|
2
|
+
require 'benchmark'
|
3
|
+
|
4
|
+
vector = Daru::Vector.new(
|
5
|
+
(10**6).times.map.to_a.shuffle,
|
6
|
+
missing_values: 100.times.map.to_a.shuffle
|
7
|
+
)
|
8
|
+
|
9
|
+
vector_gsl = Daru::Vector.new(
|
10
|
+
10000.times.map.to_a.shuffle,
|
11
|
+
missing_values: 100.times.map.to_a.shuffle,
|
12
|
+
dtype: :gsl
|
13
|
+
)
|
14
|
+
|
15
|
+
Benchmark.bm do |x|
|
16
|
+
x.report("Mean of a vector") do
|
17
|
+
vector.mean
|
18
|
+
end
|
19
|
+
|
20
|
+
x.report("Minimum of a vector") do
|
21
|
+
vector.min
|
22
|
+
end
|
23
|
+
|
24
|
+
x.report("Mean of a vector with data type gsl") do
|
25
|
+
vector_gsl.mean
|
26
|
+
end
|
27
|
+
|
28
|
+
x.report "Minimum of a vector with data type gsl" do
|
29
|
+
vector_gsl.min
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# ===== Benchmarks =====
|
34
|
+
#
|
35
|
+
# user system total real
|
36
|
+
# Mean of a vector 0.130000 0.010000 0.140000 ( 0.145534)
|
37
|
+
# Min of a vector 0.150000 0.000000 0.150000 ( 0.163623)
|
38
|
+
# Mean of a gsl vector 0.000000 0.000000 0.000000 ( 0.001037)
|
39
|
+
# Min of a gsl vector 0.000000 0.000000 0.000000 ( 0.001251)
|