daru 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +99 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.travis.yml +3 -1
  6. data/CONTRIBUTING.md +5 -1
  7. data/History.md +43 -0
  8. data/README.md +3 -4
  9. data/benchmarks/duplicating.rb +45 -0
  10. data/benchmarks/group_by.rb +7 -7
  11. data/benchmarks/joining.rb +52 -0
  12. data/benchmarks/sorting.rb +9 -2
  13. data/benchmarks/statistics.rb +39 -0
  14. data/daru.gemspec +4 -4
  15. data/lib/daru.rb +9 -9
  16. data/lib/daru/accessors/array_wrapper.rb +15 -11
  17. data/lib/daru/accessors/dataframe_by_row.rb +1 -1
  18. data/lib/daru/accessors/gsl_wrapper.rb +30 -19
  19. data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
  20. data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
  21. data/lib/daru/core/group_by.rb +69 -16
  22. data/lib/daru/core/merge.rb +135 -151
  23. data/lib/daru/core/query.rb +9 -30
  24. data/lib/daru/dataframe.rb +476 -439
  25. data/lib/daru/date_time/index.rb +150 -137
  26. data/lib/daru/date_time/offsets.rb +45 -41
  27. data/lib/daru/extensions/rserve.rb +4 -4
  28. data/lib/daru/index.rb +88 -64
  29. data/lib/daru/io/io.rb +33 -34
  30. data/lib/daru/io/sql_data_source.rb +11 -11
  31. data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
  32. data/lib/daru/maths/arithmetic/vector.rb +9 -14
  33. data/lib/daru/maths/statistics/dataframe.rb +89 -61
  34. data/lib/daru/maths/statistics/vector.rb +226 -97
  35. data/lib/daru/monkeys.rb +23 -30
  36. data/lib/daru/plotting/dataframe.rb +27 -28
  37. data/lib/daru/plotting/vector.rb +12 -13
  38. data/lib/daru/vector.rb +221 -330
  39. data/lib/daru/version.rb +2 -2
  40. data/spec/core/group_by_spec.rb +16 -0
  41. data/spec/core/merge_spec.rb +30 -14
  42. data/spec/dataframe_spec.rb +268 -14
  43. data/spec/index_spec.rb +23 -5
  44. data/spec/io/io_spec.rb +37 -16
  45. data/spec/math/statistics/dataframe_spec.rb +40 -8
  46. data/spec/math/statistics/vector_spec.rb +135 -10
  47. data/spec/monkeys_spec.rb +3 -3
  48. data/spec/vector_spec.rb +157 -25
  49. metadata +41 -21
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ed2a3e2a4cd9fce8d95af6aac9c3db532eed444f
4
- data.tar.gz: 90ca6a62ee824d20f72a9f6689c03f27d7667168
3
+ metadata.gz: 6a72d4b2565e47c5c4112aac514a7191bd4f962c
4
+ data.tar.gz: 2f68b0bb56e621f36d32f6bb9ecc541a61af7323
5
5
  SHA512:
6
- metadata.gz: e6f3345ef4372e1c45a3d80c0cc61c2b4c72e4c810cfb183f30bfd9285a09639ea39cd0a3597fc63551d7f72398d8d83af4424855018e8a5b2a99274b46625cd
7
- data.tar.gz: 65d262b1deec54680a5fdcfecda3530c9fb9450dbd280c18833b655521418ed340f311253221dfd4e018577b063f9d3638d0d600a108c3b98ec5a7cd2dfe98ec
6
+ metadata.gz: 950a36a9956dd37ac334bd1b657cfb8a74994b1c771846b359d8c885d8ec3c62edb75df18a951086799c77a2fe7c3425c64c0f069fd85b50470095d13ba323c0
7
+ data.tar.gz: 0b9d8815d90f947a7a2dcf5447dd46057dbe559ecd3959e88e4932afb945183a168c46fb131bdaca9c07682a28e408d3d19b2e716b57ddad0638b375371f6c3b
data/.gitignore CHANGED
@@ -2,3 +2,5 @@
2
2
  Gemfile.lock
3
3
  doc/
4
4
  .yardoc/
5
+ .bundle
6
+ vendor/
@@ -0,0 +1,99 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ AllCops:
4
+ Include:
5
+ - 'lib/**/*'
6
+ Exclude:
7
+ - 'spec/*'
8
+ - 'spec/**/*'
9
+ - 'vendor/**/*'
10
+ - 'benchmarks/*'
11
+ DisplayCopNames: true
12
+
13
+ # Preferred codebase style ---------------------------------------------
14
+ Style/ExtraSpacing:
15
+ AllowForAlignment: true
16
+
17
+ Style/FormatString:
18
+ EnforcedStyle: percent
19
+
20
+ Style/AndOr:
21
+ EnforcedStyle: conditionals
22
+
23
+ Style/SpaceAroundEqualsInParameterDefault:
24
+ EnforcedStyle: no_space
25
+
26
+ Style/SpaceInsideBlockBraces:
27
+ EnforcedStyle: space
28
+
29
+ Style/SpaceInsideHashLiteralBraces:
30
+ EnforcedStyle: no_space
31
+
32
+ Style/AlignParameters:
33
+ EnforcedStyle: with_fixed_indentation
34
+
35
+ Style/EmptyElse:
36
+ EnforcedStyle: empty
37
+
38
+ Style/ParallelAssignment:
39
+ Enabled: false
40
+
41
+ Style/DoubleNegation:
42
+ Enabled: false
43
+
44
+ Style/SingleLineBlockParams:
45
+ Enabled: false
46
+
47
+ Style/PerlBackrefs:
48
+ Enabled: false
49
+
50
+ Style/SpaceAfterComma:
51
+ Enabled: false
52
+
53
+ Style/SpaceAroundOperators:
54
+ Enabled: false
55
+
56
+ Style/EmptyCaseCondition:
57
+ Enabled: false
58
+
59
+ # Neither of prefered styles are good enough :(
60
+ Style/BlockDelimiters:
61
+ Enabled: false
62
+
63
+ # TODO -----------------------------------------------------------------
64
+
65
+ Style/Documentation:
66
+ Enabled: false
67
+
68
+ # To discuss and decide ------------------------------------------------
69
+
70
+ # FIXME: in fact, rescue modifier is rarely a good choice.
71
+ # But currently I can't fully grasp the three places they are used.
72
+ # So, leaving them intact. - zverok, 2016-05-07
73
+ Style/RescueModifier:
74
+ Exclude:
75
+ - 'lib/daru/accessors/gsl_wrapper.rb'
76
+ - 'lib/daru/dataframe.rb'
77
+ - 'lib/daru/io/sql_data_source.rb'
78
+
79
+ # FIXME: once we should enable and fix it - zverok, 2016-05-07
80
+ Style/Alias:
81
+ Enabled: false
82
+
83
+ # FIXME: should decide about this.
84
+ # Personally I prefer (as most of Ruby community) to use parens, but
85
+ # we also can enforce style to NOT using them. Yet it definitely should
86
+ # be only one style. Current codebase uses ~400 method defs without and
87
+ # ~ 100 method defs with them. - zverok, 2016-05-07
88
+ Style/MethodDefParentheses:
89
+ Enabled: false
90
+
91
+ # Should be fixed, but require change of public API --------------------
92
+
93
+ # Bans methods like `has_missing_data?`, `is_number?` and so on - started
94
+ # with unnecessary has_ or is_.
95
+ Style/PredicateName:
96
+ Exclude:
97
+ - 'lib/daru/dataframe.rb'
98
+ - 'lib/daru/monkeys.rb'
99
+ - 'lib/daru/vector.rb'
@@ -0,0 +1,44 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2016-05-06 16:48:54 +0300 using RuboCop version 0.39.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 66
10
+ Metrics/AbcSize:
11
+ Max: 110
12
+
13
+ # Offense count: 6
14
+ Metrics/BlockNesting:
15
+ Max: 6
16
+
17
+ # Offense count: 6
18
+ # Configuration parameters: CountComments.
19
+ Metrics/ClassLength:
20
+ Max: 1400
21
+
22
+ # Offense count: 26
23
+ Metrics/CyclomaticComplexity:
24
+ Max: 22
25
+
26
+ # Offense count: 273
27
+ # Configuration parameters: AllowHeredoc, AllowURI, URISchemes.
28
+ # URISchemes: http, https
29
+ Metrics/LineLength:
30
+ Max: 164
31
+
32
+ # Offense count: 81
33
+ # Configuration parameters: CountComments.
34
+ Metrics/MethodLength:
35
+ Max: 100
36
+
37
+ # Offense count: 3
38
+ # Configuration parameters: CountComments.
39
+ Metrics/ModuleLength:
40
+ Max: 419
41
+
42
+ # Offense count: 22
43
+ Metrics/PerceivedComplexity:
44
+ Max: 28
@@ -11,7 +11,9 @@ matrix:
11
11
  fast_finish:
12
12
  true
13
13
 
14
- script: "bundle exec rspec"
14
+ script:
15
+ - bundle exec rspec
16
+ - bundle exec rubocop
15
17
 
16
18
  install:
17
19
  - gem install bundler
@@ -21,6 +21,10 @@ And run the test suite (should be all green with pending tests):
21
21
 
22
22
  If you have problems installing nmatrix, please consult the [nmatrix installation wiki](https://github.com/SciRuby/nmatrix/wiki/Installation) or the [mailing list](https://groups.google.com/forum/#!forum/sciruby-dev).
23
23
 
24
+ While preparing your pull requests, don't forget to check your code with Rubocop:
25
+
26
+ `bundle exec rubocop`
27
+
24
28
  ## Daru internals
25
29
 
26
- To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
30
+ To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
data/History.md CHANGED
@@ -1,3 +1,46 @@
1
+ # 0.1.3 (May 2016)
2
+
3
+ * Enhancements
4
+ - Proper error handling for case where an index specified by the user is not actually present in the DataFrame/Vector (@lokeshh).
5
+ - DataFrame CSV writer function will now supress headers when passing headers: false (@gnilrets).
6
+ - Refactor Index and MultiIndex so that a Vector or DataFrame can access the actual index number without having to check the exact type of index every time (@lokeshh).
7
+ - Refactor `Vector#[]=` to not use conditionals (@lokeshh).
8
+ - Custom `#dup` method for `Daru::DateTimeIndex` (@Deepakkoli93).
9
+ - Massive performance boost to Vector and DataFrame sorting by using in-built Array#sort and removing previous hand-made sort (@lokeshh).
10
+ - Handle nils in sorting for Vectors and DataFrame (@lokeshh, @gnilrets).
11
+ - Add #describe function for Vectors (@shahsaurabh0605).
12
+ - Adds support for concatenating dataframes that don't share all the same vectors (@gnilrets).
13
+ - Massive performance enhancement for joins using the sorted merge method (@gnilrets).
14
+ - New statistics methods and tests for DataFrame (@shahsaurabh0605).
15
+ - Add explicit conversion to hash for DataFrame (DataFrame#to_h, Vector#to_h) and remove implicit conversion to hash (DataFrame#to_hash, Vector#to_hash) (@gnilrets).
16
+ - Add `DataFrame#rename_vectors` for simplifying renaming of vectors in DataFrame (@gnilrets).
17
+ - MultiIndex raises error on accessing an invalid index (@shreyanshd).
18
+ - Order columns as given in the CSV file when reading into a DataFrame from CSV using `DataFrame.from_csv` (@lokeshh).
19
+ - Add `Vector#percent_change` and `DataFrame#percent_change` (@shahsaurabh0605).
20
+ - Faster `DataFrame#filter_rows` (@lokeshh).
21
+ - Added `Vector#emv` for calculating exponential moving variance of Vector (@shahsaurabh0605).
22
+ - Add support for associating metadata with a Vector or DataFrame using the :metadata option (@gnilrets).
23
+ - Add `Vector#emsd` for calculating exponential moving standard deviation of Vector (@shahsaurabh0605).
24
+ - Sample and population covariance functions for Vector (@shahsaurabh0605).
25
+ - Improve `DataFrame#dup` performance (@gnilrets).
26
+ - Add `Daru::DataFrame::Core::GroupBy#reduce` for reducing groups by passing a block (@gnilrets).
27
+ - Add rubocop as development dependency and make changes suggested by it to conform to the Ruby Style Guide (@zverok).
28
+ - Allow Daru::Index to be initialized by a Range (@lokeshh).
29
+ * Fixes
30
+ - Fix conflict with narray that caused namespace clashes with nmatrix in case both narray and nmatrix were installed on the user's system (@lokeshh).
31
+ - Fix bug with dataframe concatenation that caused modifying the arrays that
32
+ compose the vectors in the original dataframes (@gnilrets).
33
+ - Fix an error where the Vectors in an empty DataFrame would not be assigned correct names (@lokeshh).
34
+ - Correct spelling mistakes and fix broken links in README (@lokeshh).
35
+ - Fix bug in Vector#mode (@sunshineyyy).
36
+ - Fix `Vector#index_of` method to handle dtype :array differently (@lokeshh).
37
+ - Fix `DateTimeIndex#include?` method since it was raising an exception when index not found. It returns false now (@Phitherek).
38
+ - Handle nils in group_by keys (@gnilrets).
39
+ - Handle nils for statistics methods in Vector and DataFrame for :array and :gsl data (@lokeshh).
40
+ - Fix `DataFrame#clone` when no arguments have been passed to it (@lokeshh).
41
+ - Fix bug when joining empty dataframes (@gnilrets).
42
+
43
+
1
44
  # 0.1.2
2
45
 
3
46
  * Enhancements
data/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
 
8
8
  daru (Data Analysis in RUby) is a library for storage, analysis, manipulation and visualization of data in Ruby.
9
9
 
10
- daru makes it easy and intuituive to process data predominantly through 2 data structures: `Daru::DataFrame` and `Daru::Vector`. Written in pure Ruby works with all ruby implementations. Tested with MRI 2.0, 2.1, 2.2 and 2.3.
10
+ daru makes it easy and intuitive to process data predominantly through 2 data structures: `Daru::DataFrame` and `Daru::Vector`. Written in pure Ruby works with all ruby implementations. Tested with MRI 2.0, 2.1, 2.2 and 2.3.
11
11
 
12
12
  ## Features
13
13
 
@@ -16,7 +16,7 @@ daru makes it easy and intuituive to process data predominantly through 2 data s
16
16
  - DataFrame - A 2-D spreadsheet-like structure for manipulating and storing data sets. This is daru's primary data structure.
17
17
  * Compatible with [IRuby notebook](https://github.com/SciRuby/iruby), [statsample](https://github.com/SciRuby/statsample), [statsample-glm](https://github.com/SciRuby/statsample-glm) and [statsample-timeseries](https://github.com/SciRuby/statsample-timeseries).
18
18
  * Support for time series.
19
- * Singly and hierarchially indexed data structures.
19
+ * Singly and hierarchically indexed data structures.
20
20
  * Flexible and intuitive API for manipulation and analysis of data.
21
21
  * Easy plotting, statistics and arithmetic.
22
22
  * Plentiful iterators.
@@ -150,7 +150,7 @@ data_frame.where(
150
150
 
151
151
  *Plotting*
152
152
 
153
- Daru supports plotting of interactive graphs with [nyaplot](). You can easily create a plot with the `#plot` method. Here we plot the gallons sold on the Y axis and name of the brand on the X axis in a bar graph.
153
+ Daru supports plotting of interactive graphs with [nyaplot](https://github.com/domitry/nyaplot). You can easily create a plot with the `#plot` method. Here we plot the gallons sold on the Y axis and name of the brand on the X axis in a bar graph.
154
154
  ``` ruby
155
155
  data_frame.plot type: :bar, x: 'Beer', y: 'Gallons sold' do |plot, diagram|
156
156
  plot.x_label "Beer"
@@ -179,7 +179,6 @@ Docs can be found [here](https://rubygems.org/gems/daru).
179
179
  * Statistics on DataFrame over rows.
180
180
  * Calculate percentage change.
181
181
  * Have some sample data sets for users to play around with. Should be able to load these from the code itself.
182
- * Sorting with missing data present.
183
182
 
184
183
  ## Contributing
185
184
 
@@ -0,0 +1,45 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru'
5
+
6
+ # Check scaling
7
+ base_n = 10000
8
+ 0.upto(2) do |iscale|
9
+ n = base_n * 2**iscale
10
+
11
+ df_h = ('a'..'z').map { |v| v.to_sym }.reduce({}) do |h, v|
12
+ h[v] = Daru::Vector.new(1.upto(n).to_a)
13
+ h
14
+ end
15
+
16
+ df = Daru::DataFrame.new(df_h)
17
+
18
+ Benchmark.bm do |bm|
19
+ bm.report("dupe (n=#{n})") do
20
+ df.dup
21
+ end
22
+ end
23
+ end
24
+
25
+ # ===== Benchmarks =====
26
+ # System: iMac Late 2013 3.5GHz Core i7
27
+ #
28
+ # user system total real
29
+ #dupe (n=10000) 0.590000 0.020000 0.610000 ( 0.613648)
30
+ # user system total real
31
+ #dupe (n=20000) 1.170000 0.040000 1.210000 ( 1.236629)
32
+ # user system total real
33
+ #dupe (n=40000) 2.390000 0.070000 2.460000 ( 2.511199)
34
+
35
+
36
+
37
+
38
+ # ===== Prior Benchmarks (Daru 0.1.2 - 2707559369c03894a8394714820aabf116b99b20 - 2016-04-25) =====
39
+ # Note that the n here is 100x smaller than above
40
+ # user system total real
41
+ #dupe (n=100) 0.220000 0.000000 0.220000 ( 0.227924)
42
+ # user system total real
43
+ #dupe (n=200) 0.850000 0.000000 0.850000 ( 0.856591)
44
+ # user system total real
45
+ #dupe (n=400) 3.370000 0.020000 3.390000 ( 3.428211)
@@ -7,11 +7,11 @@ data = Daru::DataFrame.from_csv 'TradeoffData.csv'
7
7
 
8
8
  Benchmark.bm do |x|
9
9
  x.report("Single column grouping") do
10
- @single = data.group_by([:Treatment])
10
+ @single = data.group_by(['Treatment'])
11
11
  end
12
12
 
13
13
  x.report("Multi-column grouping") do
14
- @multi = data.group_by([:Group, :Treatment])
14
+ @multi = data.group_by(['Group', 'Treatment'])
15
15
  end
16
16
 
17
17
  x.report("Single mean") do
@@ -24,9 +24,9 @@ Benchmark.bm do |x|
24
24
  end
25
25
 
26
26
  # ===== Benchmarks =====
27
- #
27
+ #
28
28
  # user system total real
29
- # Single column grouping 0.000000 0.000000 0.000000 (0.000356)
30
- # Multi-column grouping 0.000000 0.000000 0.000000 (0.000958)
31
- # Single mean 0.000000 0.000000 0.000000 (0.000865)
32
- # Multi mean 0.000000 0.000000 0.000000 (0.002748)
29
+ # Single column grouping 0.000000 0.000000 0.000000 (0.000340)
30
+ # Multi-column grouping 0.000000 0.000000 0.000000 (0.000855)
31
+ # Single mean 0.000000 0.000000 0.000000 (0.001208)
32
+ # Multi mean 0.000000 0.000000 0.000000 (0.004892)
@@ -0,0 +1,52 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru'
5
+
6
+ # Check scaling
7
+ base_n = 10000
8
+ 0.upto(2) do |iscale|
9
+ n = base_n * 2**iscale
10
+ keys = (1..(n)).to_a
11
+ base_data = { idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]}}
12
+ lookup_hash = keys.map { |k| [k, k * 100]}.to_h
13
+
14
+ base_data_df = Daru::DataFrame.new(base_data)
15
+ lookup_df = Daru::DataFrame.new({ keys: lookup_hash.keys, values: lookup_hash.values })
16
+
17
+ Benchmark.bm do |bm|
18
+ bm.report("Inner join (n=#{n})") do
19
+ base_data_df.join(lookup_df, on: [:keys], how: :inner)
20
+ end
21
+
22
+ bm.report("Outer join (n=#{n})") do
23
+ base_data_df.join(lookup_df, on: [:keys], how: :outer)
24
+ end
25
+ end
26
+ end
27
+
28
+ # ===== Benchmarks =====
29
+ # System: MacBook Pro Mid 2014 3GHz Core i7
30
+ #
31
+ # user system total real
32
+ #Inner join (n=10000) 0.170000 0.000000 0.170000 ( 0.182254)
33
+ #Outer join (n=10000) 0.200000 0.000000 0.200000 ( 0.203022)
34
+ # user system total real
35
+ #Inner join (n=20000) 0.380000 0.000000 0.380000 ( 0.387600)
36
+ #Outer join (n=20000) 0.410000 0.000000 0.410000 ( 0.415644)
37
+ # user system total real
38
+ #Inner join (n=40000) 0.720000 0.010000 0.730000 ( 0.743787)
39
+ #Outer join (n=40000) 0.810000 0.010000 0.820000 ( 0.840871)
40
+
41
+
42
+ # ===== Prior Benchmarks (Daru 0.1.2 - prior to sorted merge algorithm) =====
43
+ # Note that the n here is 10x smaller than above
44
+ # user system total real
45
+ #Inner join (n=1000) 0.170000 0.010000 0.180000 ( 0.175585)
46
+ #Outer join (n=1000) 0.990000 0.000000 0.990000 ( 1.004305)
47
+ # user system total real
48
+ #Inner join (n=2000) 0.440000 0.010000 0.450000 ( 0.446748)
49
+ #Outer join (n=2000) 3.880000 0.010000 3.890000 ( 3.926399)
50
+ # user system total real
51
+ #Inner join (n=4000) 1.670000 0.010000 1.680000 ( 1.680742)
52
+ #Outer join (n=4000) 15.640000 0.060000 15.700000 ( 15.855202)
@@ -28,8 +28,8 @@ Benchmark.bm do |x|
28
28
 
29
29
  x.report("Sort two columns with custom operators in different orders of DataFrame") do
30
30
  df.sort([:c,:a], ascending: [true, false],
31
- by: { c: lambda { |a,b| a.to_s <=> b.to_s },
32
- a: lambda { |a,b| (a+1) <=> (b+1) } })
31
+ by: { c: lambda { |a| a.to_s },
32
+ a: lambda { |a| a+1 } })
33
33
  end
34
34
  end
35
35
 
@@ -42,3 +42,10 @@ end
42
42
  # Sort single column of DataFrame 2502.450000 0.000000 2502.450000 (2503.808073)
43
43
  # Sort two columns of DataFrame 0.540000 0.000000 0.540000 ( 0.537670)
44
44
  # Sort two columns with custom operators in different orders of DataFrame 2084.160000 7.260000 2091.420000 (2092.716603)
45
+
46
+ # ===== Current Benchamarks =====
47
+ # Sort a Vector without any args 0.070000 0.000000 0.070000 ( 0.070323)
48
+ # Sort vector in descending order with custom <=> operator 0.120000 0.000000 0.120000 ( 0.119462)
49
+ # Sort single column of DataFrame 0.940000 0.010000 0.950000 ( 0.950349)
50
+ # Sort two columns of DataFrame 1.490000 0.010000 1.500000 ( 1.505680)
51
+ # Sort two columns with custom operators in different orders of DataFrame 1.480000 0.000000 1.480000 ( 1.495839)
@@ -0,0 +1,39 @@
1
+ require 'daru'
2
+ require 'benchmark'
3
+
4
+ vector = Daru::Vector.new(
5
+ (10**6).times.map.to_a.shuffle,
6
+ missing_values: 100.times.map.to_a.shuffle
7
+ )
8
+
9
+ vector_gsl = Daru::Vector.new(
10
+ 10000.times.map.to_a.shuffle,
11
+ missing_values: 100.times.map.to_a.shuffle,
12
+ dtype: :gsl
13
+ )
14
+
15
+ Benchmark.bm do |x|
16
+ x.report("Mean of a vector") do
17
+ vector.mean
18
+ end
19
+
20
+ x.report("Minimum of a vector") do
21
+ vector.min
22
+ end
23
+
24
+ x.report("Mean of a vector with data type gsl") do
25
+ vector_gsl.mean
26
+ end
27
+
28
+ x.report "Minimum of a vector with data type gsl" do
29
+ vector_gsl.min
30
+ end
31
+ end
32
+
33
+ # ===== Benchmarks =====
34
+ #
35
+ # user system total real
36
+ # Mean of a vector 0.130000 0.010000 0.140000 ( 0.145534)
37
+ # Min of a vector 0.150000 0.000000 0.150000 ( 0.163623)
38
+ # Mean of a gsl vector 0.000000 0.000000 0.000000 ( 0.001037)
39
+ # Min of a gsl vector 0.000000 0.000000 0.000000 ( 0.001251)