daru 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +99 -0
- data/.rubocop_todo.yml +44 -0
- data/.travis.yml +3 -1
- data/CONTRIBUTING.md +5 -1
- data/History.md +43 -0
- data/README.md +3 -4
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +7 -7
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/sorting.rb +9 -2
- data/benchmarks/statistics.rb +39 -0
- data/daru.gemspec +4 -4
- data/lib/daru.rb +9 -9
- data/lib/daru/accessors/array_wrapper.rb +15 -11
- data/lib/daru/accessors/dataframe_by_row.rb +1 -1
- data/lib/daru/accessors/gsl_wrapper.rb +30 -19
- data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
- data/lib/daru/core/group_by.rb +69 -16
- data/lib/daru/core/merge.rb +135 -151
- data/lib/daru/core/query.rb +9 -30
- data/lib/daru/dataframe.rb +476 -439
- data/lib/daru/date_time/index.rb +150 -137
- data/lib/daru/date_time/offsets.rb +45 -41
- data/lib/daru/extensions/rserve.rb +4 -4
- data/lib/daru/index.rb +88 -64
- data/lib/daru/io/io.rb +33 -34
- data/lib/daru/io/sql_data_source.rb +11 -11
- data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
- data/lib/daru/maths/arithmetic/vector.rb +9 -14
- data/lib/daru/maths/statistics/dataframe.rb +89 -61
- data/lib/daru/maths/statistics/vector.rb +226 -97
- data/lib/daru/monkeys.rb +23 -30
- data/lib/daru/plotting/dataframe.rb +27 -28
- data/lib/daru/plotting/vector.rb +12 -13
- data/lib/daru/vector.rb +221 -330
- data/lib/daru/version.rb +2 -2
- data/spec/core/group_by_spec.rb +16 -0
- data/spec/core/merge_spec.rb +30 -14
- data/spec/dataframe_spec.rb +268 -14
- data/spec/index_spec.rb +23 -5
- data/spec/io/io_spec.rb +37 -16
- data/spec/math/statistics/dataframe_spec.rb +40 -8
- data/spec/math/statistics/vector_spec.rb +135 -10
- data/spec/monkeys_spec.rb +3 -3
- data/spec/vector_spec.rb +157 -25
- metadata +41 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a72d4b2565e47c5c4112aac514a7191bd4f962c
|
4
|
+
data.tar.gz: 2f68b0bb56e621f36d32f6bb9ecc541a61af7323
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 950a36a9956dd37ac334bd1b657cfb8a74994b1c771846b359d8c885d8ec3c62edb75df18a951086799c77a2fe7c3425c64c0f069fd85b50470095d13ba323c0
|
7
|
+
data.tar.gz: 0b9d8815d90f947a7a2dcf5447dd46057dbe559ecd3959e88e4932afb945183a168c46fb131bdaca9c07682a28e408d3d19b2e716b57ddad0638b375371f6c3b
|
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
AllCops:
|
4
|
+
Include:
|
5
|
+
- 'lib/**/*'
|
6
|
+
Exclude:
|
7
|
+
- 'spec/*'
|
8
|
+
- 'spec/**/*'
|
9
|
+
- 'vendor/**/*'
|
10
|
+
- 'benchmarks/*'
|
11
|
+
DisplayCopNames: true
|
12
|
+
|
13
|
+
# Preferred codebase style ---------------------------------------------
|
14
|
+
Style/ExtraSpacing:
|
15
|
+
AllowForAlignment: true
|
16
|
+
|
17
|
+
Style/FormatString:
|
18
|
+
EnforcedStyle: percent
|
19
|
+
|
20
|
+
Style/AndOr:
|
21
|
+
EnforcedStyle: conditionals
|
22
|
+
|
23
|
+
Style/SpaceAroundEqualsInParameterDefault:
|
24
|
+
EnforcedStyle: no_space
|
25
|
+
|
26
|
+
Style/SpaceInsideBlockBraces:
|
27
|
+
EnforcedStyle: space
|
28
|
+
|
29
|
+
Style/SpaceInsideHashLiteralBraces:
|
30
|
+
EnforcedStyle: no_space
|
31
|
+
|
32
|
+
Style/AlignParameters:
|
33
|
+
EnforcedStyle: with_fixed_indentation
|
34
|
+
|
35
|
+
Style/EmptyElse:
|
36
|
+
EnforcedStyle: empty
|
37
|
+
|
38
|
+
Style/ParallelAssignment:
|
39
|
+
Enabled: false
|
40
|
+
|
41
|
+
Style/DoubleNegation:
|
42
|
+
Enabled: false
|
43
|
+
|
44
|
+
Style/SingleLineBlockParams:
|
45
|
+
Enabled: false
|
46
|
+
|
47
|
+
Style/PerlBackrefs:
|
48
|
+
Enabled: false
|
49
|
+
|
50
|
+
Style/SpaceAfterComma:
|
51
|
+
Enabled: false
|
52
|
+
|
53
|
+
Style/SpaceAroundOperators:
|
54
|
+
Enabled: false
|
55
|
+
|
56
|
+
Style/EmptyCaseCondition:
|
57
|
+
Enabled: false
|
58
|
+
|
59
|
+
# Neither of prefered styles are good enough :(
|
60
|
+
Style/BlockDelimiters:
|
61
|
+
Enabled: false
|
62
|
+
|
63
|
+
# TODO -----------------------------------------------------------------
|
64
|
+
|
65
|
+
Style/Documentation:
|
66
|
+
Enabled: false
|
67
|
+
|
68
|
+
# To discuss and decide ------------------------------------------------
|
69
|
+
|
70
|
+
# FIXME: in fact, rescue modifier is rarely a good choice.
|
71
|
+
# But currently I can't fully grasp the three places they are used.
|
72
|
+
# So, leaving them intact. - zverok, 2016-05-07
|
73
|
+
Style/RescueModifier:
|
74
|
+
Exclude:
|
75
|
+
- 'lib/daru/accessors/gsl_wrapper.rb'
|
76
|
+
- 'lib/daru/dataframe.rb'
|
77
|
+
- 'lib/daru/io/sql_data_source.rb'
|
78
|
+
|
79
|
+
# FIXME: once we should enable and fix it - zverok, 2016-05-07
|
80
|
+
Style/Alias:
|
81
|
+
Enabled: false
|
82
|
+
|
83
|
+
# FIXME: should decide about this.
|
84
|
+
# Personally I prefer (as most of Ruby community) to use parens, but
|
85
|
+
# we also can enforce style to NOT using them. Yet it definitely should
|
86
|
+
# be only one style. Current codebase uses ~400 method defs without and
|
87
|
+
# ~ 100 method defs with them. - zverok, 2016-05-07
|
88
|
+
Style/MethodDefParentheses:
|
89
|
+
Enabled: false
|
90
|
+
|
91
|
+
# Should be fixed, but require change of public API --------------------
|
92
|
+
|
93
|
+
# Bans methods like `has_missing_data?`, `is_number?` and so on - started
|
94
|
+
# with unnecessary has_ or is_.
|
95
|
+
Style/PredicateName:
|
96
|
+
Exclude:
|
97
|
+
- 'lib/daru/dataframe.rb'
|
98
|
+
- 'lib/daru/monkeys.rb'
|
99
|
+
- 'lib/daru/vector.rb'
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2016-05-06 16:48:54 +0300 using RuboCop version 0.39.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 66
|
10
|
+
Metrics/AbcSize:
|
11
|
+
Max: 110
|
12
|
+
|
13
|
+
# Offense count: 6
|
14
|
+
Metrics/BlockNesting:
|
15
|
+
Max: 6
|
16
|
+
|
17
|
+
# Offense count: 6
|
18
|
+
# Configuration parameters: CountComments.
|
19
|
+
Metrics/ClassLength:
|
20
|
+
Max: 1400
|
21
|
+
|
22
|
+
# Offense count: 26
|
23
|
+
Metrics/CyclomaticComplexity:
|
24
|
+
Max: 22
|
25
|
+
|
26
|
+
# Offense count: 273
|
27
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes.
|
28
|
+
# URISchemes: http, https
|
29
|
+
Metrics/LineLength:
|
30
|
+
Max: 164
|
31
|
+
|
32
|
+
# Offense count: 81
|
33
|
+
# Configuration parameters: CountComments.
|
34
|
+
Metrics/MethodLength:
|
35
|
+
Max: 100
|
36
|
+
|
37
|
+
# Offense count: 3
|
38
|
+
# Configuration parameters: CountComments.
|
39
|
+
Metrics/ModuleLength:
|
40
|
+
Max: 419
|
41
|
+
|
42
|
+
# Offense count: 22
|
43
|
+
Metrics/PerceivedComplexity:
|
44
|
+
Max: 28
|
data/.travis.yml
CHANGED
data/CONTRIBUTING.md
CHANGED
@@ -21,6 +21,10 @@ And run the test suite (should be all green with pending tests):
|
|
21
21
|
|
22
22
|
If you have problems installing nmatrix, please consult the [nmatrix installation wiki](https://github.com/SciRuby/nmatrix/wiki/Installation) or the [mailing list](https://groups.google.com/forum/#!forum/sciruby-dev).
|
23
23
|
|
24
|
+
While preparing your pull requests, don't forget to check your code with Rubocop:
|
25
|
+
|
26
|
+
`bundle exec rubocop`
|
27
|
+
|
24
28
|
## Daru internals
|
25
29
|
|
26
|
-
To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
|
30
|
+
To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
|
data/History.md
CHANGED
@@ -1,3 +1,46 @@
|
|
1
|
+
# 0.1.3 (May 2016)
|
2
|
+
|
3
|
+
* Enhancements
|
4
|
+
- Proper error handling for case where an index specified by the user is not actually present in the DataFrame/Vector (@lokeshh).
|
5
|
+
- DataFrame CSV writer function will now supress headers when passing headers: false (@gnilrets).
|
6
|
+
- Refactor Index and MultiIndex so that a Vector or DataFrame can access the actual index number without having to check the exact type of index every time (@lokeshh).
|
7
|
+
- Refactor `Vector#[]=` to not use conditionals (@lokeshh).
|
8
|
+
- Custom `#dup` method for `Daru::DateTimeIndex` (@Deepakkoli93).
|
9
|
+
- Massive performance boost to Vector and DataFrame sorting by using in-built Array#sort and removing previous hand-made sort (@lokeshh).
|
10
|
+
- Handle nils in sorting for Vectors and DataFrame (@lokeshh, @gnilrets).
|
11
|
+
- Add #describe function for Vectors (@shahsaurabh0605).
|
12
|
+
- Adds support for concatenating dataframes that don't share all the same vectors (@gnilrets).
|
13
|
+
- Massive performance enhancement for joins using the sorted merge method (@gnilrets).
|
14
|
+
- New statistics methods and tests for DataFrame (@shahsaurabh0605).
|
15
|
+
- Add explicit conversion to hash for DataFrame (DataFrame#to_h, Vector#to_h) and remove implicit conversion to hash (DataFrame#to_hash, Vector#to_hash) (@gnilrets).
|
16
|
+
- Add `DataFrame#rename_vectors` for simplifying renaming of vectors in DataFrame (@gnilrets).
|
17
|
+
- MultiIndex raises error on accessing an invalid index (@shreyanshd).
|
18
|
+
- Order columns as given in the CSV file when reading into a DataFrame from CSV using `DataFrame.from_csv` (@lokeshh).
|
19
|
+
- Add `Vector#percent_change` and `DataFrame#percent_change` (@shahsaurabh0605).
|
20
|
+
- Faster `DataFrame#filter_rows` (@lokeshh).
|
21
|
+
- Added `Vector#emv` for calculating exponential moving variance of Vector (@shahsaurabh0605).
|
22
|
+
- Add support for associating metadata with a Vector or DataFrame using the :metadata option (@gnilrets).
|
23
|
+
- Add `Vector#emsd` for calculating exponential moving standard deviation of Vector (@shahsaurabh0605).
|
24
|
+
- Sample and population covariance functions for Vector (@shahsaurabh0605).
|
25
|
+
- Improve `DataFrame#dup` performance (@gnilrets).
|
26
|
+
- Add `Daru::DataFrame::Core::GroupBy#reduce` for reducing groups by passing a block (@gnilrets).
|
27
|
+
- Add rubocop as development dependency and make changes suggested by it to conform to the Ruby Style Guide (@zverok).
|
28
|
+
- Allow Daru::Index to be initialized by a Range (@lokeshh).
|
29
|
+
* Fixes
|
30
|
+
- Fix conflict with narray that caused namespace clashes with nmatrix in case both narray and nmatrix were installed on the user's system (@lokeshh).
|
31
|
+
- Fix bug with dataframe concatenation that caused modifying the arrays that
|
32
|
+
compose the vectors in the original dataframes (@gnilrets).
|
33
|
+
- Fix an error where the Vectors in an empty DataFrame would not be assigned correct names (@lokeshh).
|
34
|
+
- Correct spelling mistakes and fix broken links in README (@lokeshh).
|
35
|
+
- Fix bug in Vector#mode (@sunshineyyy).
|
36
|
+
- Fix `Vector#index_of` method to handle dtype :array differently (@lokeshh).
|
37
|
+
- Fix `DateTimeIndex#include?` method since it was raising an exception when index not found. It returns false now (@Phitherek).
|
38
|
+
- Handle nils in group_by keys (@gnilrets).
|
39
|
+
- Handle nils for statistics methods in Vector and DataFrame for :array and :gsl data (@lokeshh).
|
40
|
+
- Fix `DataFrame#clone` when no arguments have been passed to it (@lokeshh).
|
41
|
+
- Fix bug when joining empty dataframes (@gnilrets).
|
42
|
+
|
43
|
+
|
1
44
|
# 0.1.2
|
2
45
|
|
3
46
|
* Enhancements
|
data/README.md
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
|
8
8
|
daru (Data Analysis in RUby) is a library for storage, analysis, manipulation and visualization of data in Ruby.
|
9
9
|
|
10
|
-
daru makes it easy and
|
10
|
+
daru makes it easy and intuitive to process data predominantly through 2 data structures: `Daru::DataFrame` and `Daru::Vector`. Written in pure Ruby works with all ruby implementations. Tested with MRI 2.0, 2.1, 2.2 and 2.3.
|
11
11
|
|
12
12
|
## Features
|
13
13
|
|
@@ -16,7 +16,7 @@ daru makes it easy and intuituive to process data predominantly through 2 data s
|
|
16
16
|
- DataFrame - A 2-D spreadsheet-like structure for manipulating and storing data sets. This is daru's primary data structure.
|
17
17
|
* Compatible with [IRuby notebook](https://github.com/SciRuby/iruby), [statsample](https://github.com/SciRuby/statsample), [statsample-glm](https://github.com/SciRuby/statsample-glm) and [statsample-timeseries](https://github.com/SciRuby/statsample-timeseries).
|
18
18
|
* Support for time series.
|
19
|
-
* Singly and
|
19
|
+
* Singly and hierarchically indexed data structures.
|
20
20
|
* Flexible and intuitive API for manipulation and analysis of data.
|
21
21
|
* Easy plotting, statistics and arithmetic.
|
22
22
|
* Plentiful iterators.
|
@@ -150,7 +150,7 @@ data_frame.where(
|
|
150
150
|
|
151
151
|
*Plotting*
|
152
152
|
|
153
|
-
Daru supports plotting of interactive graphs with [nyaplot](). You can easily create a plot with the `#plot` method. Here we plot the gallons sold on the Y axis and name of the brand on the X axis in a bar graph.
|
153
|
+
Daru supports plotting of interactive graphs with [nyaplot](https://github.com/domitry/nyaplot). You can easily create a plot with the `#plot` method. Here we plot the gallons sold on the Y axis and name of the brand on the X axis in a bar graph.
|
154
154
|
``` ruby
|
155
155
|
data_frame.plot type: :bar, x: 'Beer', y: 'Gallons sold' do |plot, diagram|
|
156
156
|
plot.x_label "Beer"
|
@@ -179,7 +179,6 @@ Docs can be found [here](https://rubygems.org/gems/daru).
|
|
179
179
|
* Statistics on DataFrame over rows.
|
180
180
|
* Calculate percentage change.
|
181
181
|
* Have some sample data sets for users to play around with. Should be able to load these from the code itself.
|
182
|
-
* Sorting with missing data present.
|
183
182
|
|
184
183
|
## Contributing
|
185
184
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
# Check scaling
|
7
|
+
base_n = 10000
|
8
|
+
0.upto(2) do |iscale|
|
9
|
+
n = base_n * 2**iscale
|
10
|
+
|
11
|
+
df_h = ('a'..'z').map { |v| v.to_sym }.reduce({}) do |h, v|
|
12
|
+
h[v] = Daru::Vector.new(1.upto(n).to_a)
|
13
|
+
h
|
14
|
+
end
|
15
|
+
|
16
|
+
df = Daru::DataFrame.new(df_h)
|
17
|
+
|
18
|
+
Benchmark.bm do |bm|
|
19
|
+
bm.report("dupe (n=#{n})") do
|
20
|
+
df.dup
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# ===== Benchmarks =====
|
26
|
+
# System: iMac Late 2013 3.5GHz Core i7
|
27
|
+
#
|
28
|
+
# user system total real
|
29
|
+
#dupe (n=10000) 0.590000 0.020000 0.610000 ( 0.613648)
|
30
|
+
# user system total real
|
31
|
+
#dupe (n=20000) 1.170000 0.040000 1.210000 ( 1.236629)
|
32
|
+
# user system total real
|
33
|
+
#dupe (n=40000) 2.390000 0.070000 2.460000 ( 2.511199)
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
# ===== Prior Benchmarks (Daru 0.1.2 - 2707559369c03894a8394714820aabf116b99b20 - 2016-04-25) =====
|
39
|
+
# Note that the n here is 100x smaller than above
|
40
|
+
# user system total real
|
41
|
+
#dupe (n=100) 0.220000 0.000000 0.220000 ( 0.227924)
|
42
|
+
# user system total real
|
43
|
+
#dupe (n=200) 0.850000 0.000000 0.850000 ( 0.856591)
|
44
|
+
# user system total real
|
45
|
+
#dupe (n=400) 3.370000 0.020000 3.390000 ( 3.428211)
|
data/benchmarks/group_by.rb
CHANGED
@@ -7,11 +7,11 @@ data = Daru::DataFrame.from_csv 'TradeoffData.csv'
|
|
7
7
|
|
8
8
|
Benchmark.bm do |x|
|
9
9
|
x.report("Single column grouping") do
|
10
|
-
@single = data.group_by([
|
10
|
+
@single = data.group_by(['Treatment'])
|
11
11
|
end
|
12
12
|
|
13
13
|
x.report("Multi-column grouping") do
|
14
|
-
@multi = data.group_by([
|
14
|
+
@multi = data.group_by(['Group', 'Treatment'])
|
15
15
|
end
|
16
16
|
|
17
17
|
x.report("Single mean") do
|
@@ -24,9 +24,9 @@ Benchmark.bm do |x|
|
|
24
24
|
end
|
25
25
|
|
26
26
|
# ===== Benchmarks =====
|
27
|
-
#
|
27
|
+
#
|
28
28
|
# user system total real
|
29
|
-
# Single column grouping
|
30
|
-
# Multi-column grouping
|
31
|
-
# Single mean
|
32
|
-
# Multi mean
|
29
|
+
# Single column grouping 0.000000 0.000000 0.000000 (0.000340)
|
30
|
+
# Multi-column grouping 0.000000 0.000000 0.000000 (0.000855)
|
31
|
+
# Single mean 0.000000 0.000000 0.000000 (0.001208)
|
32
|
+
# Multi mean 0.000000 0.000000 0.000000 (0.004892)
|
@@ -0,0 +1,52 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
# Check scaling
|
7
|
+
base_n = 10000
|
8
|
+
0.upto(2) do |iscale|
|
9
|
+
n = base_n * 2**iscale
|
10
|
+
keys = (1..(n)).to_a
|
11
|
+
base_data = { idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]}}
|
12
|
+
lookup_hash = keys.map { |k| [k, k * 100]}.to_h
|
13
|
+
|
14
|
+
base_data_df = Daru::DataFrame.new(base_data)
|
15
|
+
lookup_df = Daru::DataFrame.new({ keys: lookup_hash.keys, values: lookup_hash.values })
|
16
|
+
|
17
|
+
Benchmark.bm do |bm|
|
18
|
+
bm.report("Inner join (n=#{n})") do
|
19
|
+
base_data_df.join(lookup_df, on: [:keys], how: :inner)
|
20
|
+
end
|
21
|
+
|
22
|
+
bm.report("Outer join (n=#{n})") do
|
23
|
+
base_data_df.join(lookup_df, on: [:keys], how: :outer)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# ===== Benchmarks =====
|
29
|
+
# System: MacBook Pro Mid 2014 3GHz Core i7
|
30
|
+
#
|
31
|
+
# user system total real
|
32
|
+
#Inner join (n=10000) 0.170000 0.000000 0.170000 ( 0.182254)
|
33
|
+
#Outer join (n=10000) 0.200000 0.000000 0.200000 ( 0.203022)
|
34
|
+
# user system total real
|
35
|
+
#Inner join (n=20000) 0.380000 0.000000 0.380000 ( 0.387600)
|
36
|
+
#Outer join (n=20000) 0.410000 0.000000 0.410000 ( 0.415644)
|
37
|
+
# user system total real
|
38
|
+
#Inner join (n=40000) 0.720000 0.010000 0.730000 ( 0.743787)
|
39
|
+
#Outer join (n=40000) 0.810000 0.010000 0.820000 ( 0.840871)
|
40
|
+
|
41
|
+
|
42
|
+
# ===== Prior Benchmarks (Daru 0.1.2 - prior to sorted merge algorithm) =====
|
43
|
+
# Note that the n here is 10x smaller than above
|
44
|
+
# user system total real
|
45
|
+
#Inner join (n=1000) 0.170000 0.010000 0.180000 ( 0.175585)
|
46
|
+
#Outer join (n=1000) 0.990000 0.000000 0.990000 ( 1.004305)
|
47
|
+
# user system total real
|
48
|
+
#Inner join (n=2000) 0.440000 0.010000 0.450000 ( 0.446748)
|
49
|
+
#Outer join (n=2000) 3.880000 0.010000 3.890000 ( 3.926399)
|
50
|
+
# user system total real
|
51
|
+
#Inner join (n=4000) 1.670000 0.010000 1.680000 ( 1.680742)
|
52
|
+
#Outer join (n=4000) 15.640000 0.060000 15.700000 ( 15.855202)
|
data/benchmarks/sorting.rb
CHANGED
@@ -28,8 +28,8 @@ Benchmark.bm do |x|
|
|
28
28
|
|
29
29
|
x.report("Sort two columns with custom operators in different orders of DataFrame") do
|
30
30
|
df.sort([:c,:a], ascending: [true, false],
|
31
|
-
by: { c: lambda { |a
|
32
|
-
a: lambda { |a
|
31
|
+
by: { c: lambda { |a| a.to_s },
|
32
|
+
a: lambda { |a| a+1 } })
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
@@ -42,3 +42,10 @@ end
|
|
42
42
|
# Sort single column of DataFrame 2502.450000 0.000000 2502.450000 (2503.808073)
|
43
43
|
# Sort two columns of DataFrame 0.540000 0.000000 0.540000 ( 0.537670)
|
44
44
|
# Sort two columns with custom operators in different orders of DataFrame 2084.160000 7.260000 2091.420000 (2092.716603)
|
45
|
+
|
46
|
+
# ===== Current Benchamarks =====
|
47
|
+
# Sort a Vector without any args 0.070000 0.000000 0.070000 ( 0.070323)
|
48
|
+
# Sort vector in descending order with custom <=> operator 0.120000 0.000000 0.120000 ( 0.119462)
|
49
|
+
# Sort single column of DataFrame 0.940000 0.010000 0.950000 ( 0.950349)
|
50
|
+
# Sort two columns of DataFrame 1.490000 0.010000 1.500000 ( 1.505680)
|
51
|
+
# Sort two columns with custom operators in different orders of DataFrame 1.480000 0.000000 1.480000 ( 1.495839)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'daru'
|
2
|
+
require 'benchmark'
|
3
|
+
|
4
|
+
vector = Daru::Vector.new(
|
5
|
+
(10**6).times.map.to_a.shuffle,
|
6
|
+
missing_values: 100.times.map.to_a.shuffle
|
7
|
+
)
|
8
|
+
|
9
|
+
vector_gsl = Daru::Vector.new(
|
10
|
+
10000.times.map.to_a.shuffle,
|
11
|
+
missing_values: 100.times.map.to_a.shuffle,
|
12
|
+
dtype: :gsl
|
13
|
+
)
|
14
|
+
|
15
|
+
Benchmark.bm do |x|
|
16
|
+
x.report("Mean of a vector") do
|
17
|
+
vector.mean
|
18
|
+
end
|
19
|
+
|
20
|
+
x.report("Minimum of a vector") do
|
21
|
+
vector.min
|
22
|
+
end
|
23
|
+
|
24
|
+
x.report("Mean of a vector with data type gsl") do
|
25
|
+
vector_gsl.mean
|
26
|
+
end
|
27
|
+
|
28
|
+
x.report "Minimum of a vector with data type gsl" do
|
29
|
+
vector_gsl.min
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# ===== Benchmarks =====
|
34
|
+
#
|
35
|
+
# user system total real
|
36
|
+
# Mean of a vector 0.130000 0.010000 0.140000 ( 0.145534)
|
37
|
+
# Min of a vector 0.150000 0.000000 0.150000 ( 0.163623)
|
38
|
+
# Mean of a gsl vector 0.000000 0.000000 0.000000 ( 0.001037)
|
39
|
+
# Min of a gsl vector 0.000000 0.000000 0.000000 ( 0.001251)
|