daru 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +99 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.travis.yml +3 -1
  6. data/CONTRIBUTING.md +5 -1
  7. data/History.md +43 -0
  8. data/README.md +3 -4
  9. data/benchmarks/duplicating.rb +45 -0
  10. data/benchmarks/group_by.rb +7 -7
  11. data/benchmarks/joining.rb +52 -0
  12. data/benchmarks/sorting.rb +9 -2
  13. data/benchmarks/statistics.rb +39 -0
  14. data/daru.gemspec +4 -4
  15. data/lib/daru.rb +9 -9
  16. data/lib/daru/accessors/array_wrapper.rb +15 -11
  17. data/lib/daru/accessors/dataframe_by_row.rb +1 -1
  18. data/lib/daru/accessors/gsl_wrapper.rb +30 -19
  19. data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
  20. data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
  21. data/lib/daru/core/group_by.rb +69 -16
  22. data/lib/daru/core/merge.rb +135 -151
  23. data/lib/daru/core/query.rb +9 -30
  24. data/lib/daru/dataframe.rb +476 -439
  25. data/lib/daru/date_time/index.rb +150 -137
  26. data/lib/daru/date_time/offsets.rb +45 -41
  27. data/lib/daru/extensions/rserve.rb +4 -4
  28. data/lib/daru/index.rb +88 -64
  29. data/lib/daru/io/io.rb +33 -34
  30. data/lib/daru/io/sql_data_source.rb +11 -11
  31. data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
  32. data/lib/daru/maths/arithmetic/vector.rb +9 -14
  33. data/lib/daru/maths/statistics/dataframe.rb +89 -61
  34. data/lib/daru/maths/statistics/vector.rb +226 -97
  35. data/lib/daru/monkeys.rb +23 -30
  36. data/lib/daru/plotting/dataframe.rb +27 -28
  37. data/lib/daru/plotting/vector.rb +12 -13
  38. data/lib/daru/vector.rb +221 -330
  39. data/lib/daru/version.rb +2 -2
  40. data/spec/core/group_by_spec.rb +16 -0
  41. data/spec/core/merge_spec.rb +30 -14
  42. data/spec/dataframe_spec.rb +268 -14
  43. data/spec/index_spec.rb +23 -5
  44. data/spec/io/io_spec.rb +37 -16
  45. data/spec/math/statistics/dataframe_spec.rb +40 -8
  46. data/spec/math/statistics/vector_spec.rb +135 -10
  47. data/spec/monkeys_spec.rb +3 -3
  48. data/spec/vector_spec.rb +157 -25
  49. metadata +41 -21
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ed2a3e2a4cd9fce8d95af6aac9c3db532eed444f
4
- data.tar.gz: 90ca6a62ee824d20f72a9f6689c03f27d7667168
3
+ metadata.gz: 6a72d4b2565e47c5c4112aac514a7191bd4f962c
4
+ data.tar.gz: 2f68b0bb56e621f36d32f6bb9ecc541a61af7323
5
5
  SHA512:
6
- metadata.gz: e6f3345ef4372e1c45a3d80c0cc61c2b4c72e4c810cfb183f30bfd9285a09639ea39cd0a3597fc63551d7f72398d8d83af4424855018e8a5b2a99274b46625cd
7
- data.tar.gz: 65d262b1deec54680a5fdcfecda3530c9fb9450dbd280c18833b655521418ed340f311253221dfd4e018577b063f9d3638d0d600a108c3b98ec5a7cd2dfe98ec
6
+ metadata.gz: 950a36a9956dd37ac334bd1b657cfb8a74994b1c771846b359d8c885d8ec3c62edb75df18a951086799c77a2fe7c3425c64c0f069fd85b50470095d13ba323c0
7
+ data.tar.gz: 0b9d8815d90f947a7a2dcf5447dd46057dbe559ecd3959e88e4932afb945183a168c46fb131bdaca9c07682a28e408d3d19b2e716b57ddad0638b375371f6c3b
data/.gitignore CHANGED
@@ -2,3 +2,5 @@
2
2
  Gemfile.lock
3
3
  doc/
4
4
  .yardoc/
5
+ .bundle
6
+ vendor/
@@ -0,0 +1,99 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ AllCops:
4
+ Include:
5
+ - 'lib/**/*'
6
+ Exclude:
7
+ - 'spec/*'
8
+ - 'spec/**/*'
9
+ - 'vendor/**/*'
10
+ - 'benchmarks/*'
11
+ DisplayCopNames: true
12
+
13
+ # Preferred codebase style ---------------------------------------------
14
+ Style/ExtraSpacing:
15
+ AllowForAlignment: true
16
+
17
+ Style/FormatString:
18
+ EnforcedStyle: percent
19
+
20
+ Style/AndOr:
21
+ EnforcedStyle: conditionals
22
+
23
+ Style/SpaceAroundEqualsInParameterDefault:
24
+ EnforcedStyle: no_space
25
+
26
+ Style/SpaceInsideBlockBraces:
27
+ EnforcedStyle: space
28
+
29
+ Style/SpaceInsideHashLiteralBraces:
30
+ EnforcedStyle: no_space
31
+
32
+ Style/AlignParameters:
33
+ EnforcedStyle: with_fixed_indentation
34
+
35
+ Style/EmptyElse:
36
+ EnforcedStyle: empty
37
+
38
+ Style/ParallelAssignment:
39
+ Enabled: false
40
+
41
+ Style/DoubleNegation:
42
+ Enabled: false
43
+
44
+ Style/SingleLineBlockParams:
45
+ Enabled: false
46
+
47
+ Style/PerlBackrefs:
48
+ Enabled: false
49
+
50
+ Style/SpaceAfterComma:
51
+ Enabled: false
52
+
53
+ Style/SpaceAroundOperators:
54
+ Enabled: false
55
+
56
+ Style/EmptyCaseCondition:
57
+ Enabled: false
58
+
59
+ # Neither of prefered styles are good enough :(
60
+ Style/BlockDelimiters:
61
+ Enabled: false
62
+
63
+ # TODO -----------------------------------------------------------------
64
+
65
+ Style/Documentation:
66
+ Enabled: false
67
+
68
+ # To discuss and decide ------------------------------------------------
69
+
70
+ # FIXME: in fact, rescue modifier is rarely a good choice.
71
+ # But currently I can't fully grasp the three places they are used.
72
+ # So, leaving them intact. - zverok, 2016-05-07
73
+ Style/RescueModifier:
74
+ Exclude:
75
+ - 'lib/daru/accessors/gsl_wrapper.rb'
76
+ - 'lib/daru/dataframe.rb'
77
+ - 'lib/daru/io/sql_data_source.rb'
78
+
79
+ # FIXME: once we should enable and fix it - zverok, 2016-05-07
80
+ Style/Alias:
81
+ Enabled: false
82
+
83
+ # FIXME: should decide about this.
84
+ # Personally I prefer (as most of Ruby community) to use parens, but
85
+ # we also can enforce style to NOT using them. Yet it definitely should
86
+ # be only one style. Current codebase uses ~400 method defs without and
87
+ # ~ 100 method defs with them. - zverok, 2016-05-07
88
+ Style/MethodDefParentheses:
89
+ Enabled: false
90
+
91
+ # Should be fixed, but require change of public API --------------------
92
+
93
+ # Bans methods like `has_missing_data?`, `is_number?` and so on - started
94
+ # with unnecessary has_ or is_.
95
+ Style/PredicateName:
96
+ Exclude:
97
+ - 'lib/daru/dataframe.rb'
98
+ - 'lib/daru/monkeys.rb'
99
+ - 'lib/daru/vector.rb'
@@ -0,0 +1,44 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2016-05-06 16:48:54 +0300 using RuboCop version 0.39.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 66
10
+ Metrics/AbcSize:
11
+ Max: 110
12
+
13
+ # Offense count: 6
14
+ Metrics/BlockNesting:
15
+ Max: 6
16
+
17
+ # Offense count: 6
18
+ # Configuration parameters: CountComments.
19
+ Metrics/ClassLength:
20
+ Max: 1400
21
+
22
+ # Offense count: 26
23
+ Metrics/CyclomaticComplexity:
24
+ Max: 22
25
+
26
+ # Offense count: 273
27
+ # Configuration parameters: AllowHeredoc, AllowURI, URISchemes.
28
+ # URISchemes: http, https
29
+ Metrics/LineLength:
30
+ Max: 164
31
+
32
+ # Offense count: 81
33
+ # Configuration parameters: CountComments.
34
+ Metrics/MethodLength:
35
+ Max: 100
36
+
37
+ # Offense count: 3
38
+ # Configuration parameters: CountComments.
39
+ Metrics/ModuleLength:
40
+ Max: 419
41
+
42
+ # Offense count: 22
43
+ Metrics/PerceivedComplexity:
44
+ Max: 28
@@ -11,7 +11,9 @@ matrix:
11
11
  fast_finish:
12
12
  true
13
13
 
14
- script: "bundle exec rspec"
14
+ script:
15
+ - bundle exec rspec
16
+ - bundle exec rubocop
15
17
 
16
18
  install:
17
19
  - gem install bundler
@@ -21,6 +21,10 @@ And run the test suite (should be all green with pending tests):
21
21
 
22
22
  If you have problems installing nmatrix, please consult the [nmatrix installation wiki](https://github.com/SciRuby/nmatrix/wiki/Installation) or the [mailing list](https://groups.google.com/forum/#!forum/sciruby-dev).
23
23
 
24
+ While preparing your pull requests, don't forget to check your code with Rubocop:
25
+
26
+ `bundle exec rubocop`
27
+
24
28
  ## Daru internals
25
29
 
26
- To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
30
+ To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
data/History.md CHANGED
@@ -1,3 +1,46 @@
1
+ # 0.1.3 (May 2016)
2
+
3
+ * Enhancements
4
+ - Proper error handling for case where an index specified by the user is not actually present in the DataFrame/Vector (@lokeshh).
5
+ - DataFrame CSV writer function will now supress headers when passing headers: false (@gnilrets).
6
+ - Refactor Index and MultiIndex so that a Vector or DataFrame can access the actual index number without having to check the exact type of index every time (@lokeshh).
7
+ - Refactor `Vector#[]=` to not use conditionals (@lokeshh).
8
+ - Custom `#dup` method for `Daru::DateTimeIndex` (@Deepakkoli93).
9
+ - Massive performance boost to Vector and DataFrame sorting by using in-built Array#sort and removing previous hand-made sort (@lokeshh).
10
+ - Handle nils in sorting for Vectors and DataFrame (@lokeshh, @gnilrets).
11
+ - Add #describe function for Vectors (@shahsaurabh0605).
12
+ - Adds support for concatenating dataframes that don't share all the same vectors (@gnilrets).
13
+ - Massive performance enhancement for joins using the sorted merge method (@gnilrets).
14
+ - New statistics methods and tests for DataFrame (@shahsaurabh0605).
15
+ - Add explicit conversion to hash for DataFrame (DataFrame#to_h, Vector#to_h) and remove implicit conversion to hash (DataFrame#to_hash, Vector#to_hash) (@gnilrets).
16
+ - Add `DataFrame#rename_vectors` for simplifying renaming of vectors in DataFrame (@gnilrets).
17
+ - MultiIndex raises error on accessing an invalid index (@shreyanshd).
18
+ - Order columns as given in the CSV file when reading into a DataFrame from CSV using `DataFrame.from_csv` (@lokeshh).
19
+ - Add `Vector#percent_change` and `DataFrame#percent_change` (@shahsaurabh0605).
20
+ - Faster `DataFrame#filter_rows` (@lokeshh).
21
+ - Added `Vector#emv` for calculating exponential moving variance of Vector (@shahsaurabh0605).
22
+ - Add support for associating metadata with a Vector or DataFrame using the :metadata option (@gnilrets).
23
+ - Add `Vector#emsd` for calculating exponential moving standard deviation of Vector (@shahsaurabh0605).
24
+ - Sample and population covariance functions for Vector (@shahsaurabh0605).
25
+ - Improve `DataFrame#dup` performance (@gnilrets).
26
+ - Add `Daru::DataFrame::Core::GroupBy#reduce` for reducing groups by passing a block (@gnilrets).
27
+ - Add rubocop as development dependency and make changes suggested by it to conform to the Ruby Style Guide (@zverok).
28
+ - Allow Daru::Index to be initialized by a Range (@lokeshh).
29
+ * Fixes
30
+ - Fix conflict with narray that caused namespace clashes with nmatrix in case both narray and nmatrix were installed on the user's system (@lokeshh).
31
+ - Fix bug with dataframe concatenation that caused modifying the arrays that
32
+ compose the vectors in the original dataframes (@gnilrets).
33
+ - Fix an error where the Vectors in an empty DataFrame would not be assigned correct names (@lokeshh).
34
+ - Correct spelling mistakes and fix broken links in README (@lokeshh).
35
+ - Fix bug in Vector#mode (@sunshineyyy).
36
+ - Fix `Vector#index_of` method to handle dtype :array differently (@lokeshh).
37
+ - Fix `DateTimeIndex#include?` method since it was raising an exception when index not found. It returns false now (@Phitherek).
38
+ - Handle nils in group_by keys (@gnilrets).
39
+ - Handle nils for statistics methods in Vector and DataFrame for :array and :gsl data (@lokeshh).
40
+ - Fix `DataFrame#clone` when no arguments have been passed to it (@lokeshh).
41
+ - Fix bug when joining empty dataframes (@gnilrets).
42
+
43
+
1
44
  # 0.1.2
2
45
 
3
46
  * Enhancements
data/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
 
8
8
  daru (Data Analysis in RUby) is a library for storage, analysis, manipulation and visualization of data in Ruby.
9
9
 
10
- daru makes it easy and intuituive to process data predominantly through 2 data structures: `Daru::DataFrame` and `Daru::Vector`. Written in pure Ruby works with all ruby implementations. Tested with MRI 2.0, 2.1, 2.2 and 2.3.
10
+ daru makes it easy and intuitive to process data predominantly through 2 data structures: `Daru::DataFrame` and `Daru::Vector`. Written in pure Ruby works with all ruby implementations. Tested with MRI 2.0, 2.1, 2.2 and 2.3.
11
11
 
12
12
  ## Features
13
13
 
@@ -16,7 +16,7 @@ daru makes it easy and intuituive to process data predominantly through 2 data s
16
16
  - DataFrame - A 2-D spreadsheet-like structure for manipulating and storing data sets. This is daru's primary data structure.
17
17
  * Compatible with [IRuby notebook](https://github.com/SciRuby/iruby), [statsample](https://github.com/SciRuby/statsample), [statsample-glm](https://github.com/SciRuby/statsample-glm) and [statsample-timeseries](https://github.com/SciRuby/statsample-timeseries).
18
18
  * Support for time series.
19
- * Singly and hierarchially indexed data structures.
19
+ * Singly and hierarchically indexed data structures.
20
20
  * Flexible and intuitive API for manipulation and analysis of data.
21
21
  * Easy plotting, statistics and arithmetic.
22
22
  * Plentiful iterators.
@@ -150,7 +150,7 @@ data_frame.where(
150
150
 
151
151
  *Plotting*
152
152
 
153
- Daru supports plotting of interactive graphs with [nyaplot](). You can easily create a plot with the `#plot` method. Here we plot the gallons sold on the Y axis and name of the brand on the X axis in a bar graph.
153
+ Daru supports plotting of interactive graphs with [nyaplot](https://github.com/domitry/nyaplot). You can easily create a plot with the `#plot` method. Here we plot the gallons sold on the Y axis and name of the brand on the X axis in a bar graph.
154
154
  ``` ruby
155
155
  data_frame.plot type: :bar, x: 'Beer', y: 'Gallons sold' do |plot, diagram|
156
156
  plot.x_label "Beer"
@@ -179,7 +179,6 @@ Docs can be found [here](https://rubygems.org/gems/daru).
179
179
  * Statistics on DataFrame over rows.
180
180
  * Calculate percentage change.
181
181
  * Have some sample data sets for users to play around with. Should be able to load these from the code itself.
182
- * Sorting with missing data present.
183
182
 
184
183
  ## Contributing
185
184
 
@@ -0,0 +1,45 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru'
5
+
6
+ # Check scaling
7
+ base_n = 10000
8
+ 0.upto(2) do |iscale|
9
+ n = base_n * 2**iscale
10
+
11
+ df_h = ('a'..'z').map { |v| v.to_sym }.reduce({}) do |h, v|
12
+ h[v] = Daru::Vector.new(1.upto(n).to_a)
13
+ h
14
+ end
15
+
16
+ df = Daru::DataFrame.new(df_h)
17
+
18
+ Benchmark.bm do |bm|
19
+ bm.report("dupe (n=#{n})") do
20
+ df.dup
21
+ end
22
+ end
23
+ end
24
+
25
+ # ===== Benchmarks =====
26
+ # System: iMac Late 2013 3.5GHz Core i7
27
+ #
28
+ # user system total real
29
+ #dupe (n=10000) 0.590000 0.020000 0.610000 ( 0.613648)
30
+ # user system total real
31
+ #dupe (n=20000) 1.170000 0.040000 1.210000 ( 1.236629)
32
+ # user system total real
33
+ #dupe (n=40000) 2.390000 0.070000 2.460000 ( 2.511199)
34
+
35
+
36
+
37
+
38
+ # ===== Prior Benchmarks (Daru 0.1.2 - 2707559369c03894a8394714820aabf116b99b20 - 2016-04-25) =====
39
+ # Note that the n here is 100x smaller than above
40
+ # user system total real
41
+ #dupe (n=100) 0.220000 0.000000 0.220000 ( 0.227924)
42
+ # user system total real
43
+ #dupe (n=200) 0.850000 0.000000 0.850000 ( 0.856591)
44
+ # user system total real
45
+ #dupe (n=400) 3.370000 0.020000 3.390000 ( 3.428211)
@@ -7,11 +7,11 @@ data = Daru::DataFrame.from_csv 'TradeoffData.csv'
7
7
 
8
8
  Benchmark.bm do |x|
9
9
  x.report("Single column grouping") do
10
- @single = data.group_by([:Treatment])
10
+ @single = data.group_by(['Treatment'])
11
11
  end
12
12
 
13
13
  x.report("Multi-column grouping") do
14
- @multi = data.group_by([:Group, :Treatment])
14
+ @multi = data.group_by(['Group', 'Treatment'])
15
15
  end
16
16
 
17
17
  x.report("Single mean") do
@@ -24,9 +24,9 @@ Benchmark.bm do |x|
24
24
  end
25
25
 
26
26
  # ===== Benchmarks =====
27
- #
27
+ #
28
28
  # user system total real
29
- # Single column grouping 0.000000 0.000000 0.000000 (0.000356)
30
- # Multi-column grouping 0.000000 0.000000 0.000000 (0.000958)
31
- # Single mean 0.000000 0.000000 0.000000 (0.000865)
32
- # Multi mean 0.000000 0.000000 0.000000 (0.002748)
29
+ # Single column grouping 0.000000 0.000000 0.000000 (0.000340)
30
+ # Multi-column grouping 0.000000 0.000000 0.000000 (0.000855)
31
+ # Single mean 0.000000 0.000000 0.000000 (0.001208)
32
+ # Multi mean 0.000000 0.000000 0.000000 (0.004892)
@@ -0,0 +1,52 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'benchmark'
4
+ require 'daru'
5
+
6
+ # Check scaling
7
+ base_n = 10000
8
+ 0.upto(2) do |iscale|
9
+ n = base_n * 2**iscale
10
+ keys = (1..(n)).to_a
11
+ base_data = { idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]}}
12
+ lookup_hash = keys.map { |k| [k, k * 100]}.to_h
13
+
14
+ base_data_df = Daru::DataFrame.new(base_data)
15
+ lookup_df = Daru::DataFrame.new({ keys: lookup_hash.keys, values: lookup_hash.values })
16
+
17
+ Benchmark.bm do |bm|
18
+ bm.report("Inner join (n=#{n})") do
19
+ base_data_df.join(lookup_df, on: [:keys], how: :inner)
20
+ end
21
+
22
+ bm.report("Outer join (n=#{n})") do
23
+ base_data_df.join(lookup_df, on: [:keys], how: :outer)
24
+ end
25
+ end
26
+ end
27
+
28
+ # ===== Benchmarks =====
29
+ # System: MacBook Pro Mid 2014 3GHz Core i7
30
+ #
31
+ # user system total real
32
+ #Inner join (n=10000) 0.170000 0.000000 0.170000 ( 0.182254)
33
+ #Outer join (n=10000) 0.200000 0.000000 0.200000 ( 0.203022)
34
+ # user system total real
35
+ #Inner join (n=20000) 0.380000 0.000000 0.380000 ( 0.387600)
36
+ #Outer join (n=20000) 0.410000 0.000000 0.410000 ( 0.415644)
37
+ # user system total real
38
+ #Inner join (n=40000) 0.720000 0.010000 0.730000 ( 0.743787)
39
+ #Outer join (n=40000) 0.810000 0.010000 0.820000 ( 0.840871)
40
+
41
+
42
+ # ===== Prior Benchmarks (Daru 0.1.2 - prior to sorted merge algorithm) =====
43
+ # Note that the n here is 10x smaller than above
44
+ # user system total real
45
+ #Inner join (n=1000) 0.170000 0.010000 0.180000 ( 0.175585)
46
+ #Outer join (n=1000) 0.990000 0.000000 0.990000 ( 1.004305)
47
+ # user system total real
48
+ #Inner join (n=2000) 0.440000 0.010000 0.450000 ( 0.446748)
49
+ #Outer join (n=2000) 3.880000 0.010000 3.890000 ( 3.926399)
50
+ # user system total real
51
+ #Inner join (n=4000) 1.670000 0.010000 1.680000 ( 1.680742)
52
+ #Outer join (n=4000) 15.640000 0.060000 15.700000 ( 15.855202)
@@ -28,8 +28,8 @@ Benchmark.bm do |x|
28
28
 
29
29
  x.report("Sort two columns with custom operators in different orders of DataFrame") do
30
30
  df.sort([:c,:a], ascending: [true, false],
31
- by: { c: lambda { |a,b| a.to_s <=> b.to_s },
32
- a: lambda { |a,b| (a+1) <=> (b+1) } })
31
+ by: { c: lambda { |a| a.to_s },
32
+ a: lambda { |a| a+1 } })
33
33
  end
34
34
  end
35
35
 
@@ -42,3 +42,10 @@ end
42
42
  # Sort single column of DataFrame 2502.450000 0.000000 2502.450000 (2503.808073)
43
43
  # Sort two columns of DataFrame 0.540000 0.000000 0.540000 ( 0.537670)
44
44
  # Sort two columns with custom operators in different orders of DataFrame 2084.160000 7.260000 2091.420000 (2092.716603)
45
+
46
+ # ===== Current Benchamarks =====
47
+ # Sort a Vector without any args 0.070000 0.000000 0.070000 ( 0.070323)
48
+ # Sort vector in descending order with custom <=> operator 0.120000 0.000000 0.120000 ( 0.119462)
49
+ # Sort single column of DataFrame 0.940000 0.010000 0.950000 ( 0.950349)
50
+ # Sort two columns of DataFrame 1.490000 0.010000 1.500000 ( 1.505680)
51
+ # Sort two columns with custom operators in different orders of DataFrame 1.480000 0.000000 1.480000 ( 1.495839)
@@ -0,0 +1,39 @@
1
+ require 'daru'
2
+ require 'benchmark'
3
+
4
+ vector = Daru::Vector.new(
5
+ (10**6).times.map.to_a.shuffle,
6
+ missing_values: 100.times.map.to_a.shuffle
7
+ )
8
+
9
+ vector_gsl = Daru::Vector.new(
10
+ 10000.times.map.to_a.shuffle,
11
+ missing_values: 100.times.map.to_a.shuffle,
12
+ dtype: :gsl
13
+ )
14
+
15
+ Benchmark.bm do |x|
16
+ x.report("Mean of a vector") do
17
+ vector.mean
18
+ end
19
+
20
+ x.report("Minimum of a vector") do
21
+ vector.min
22
+ end
23
+
24
+ x.report("Mean of a vector with data type gsl") do
25
+ vector_gsl.mean
26
+ end
27
+
28
+ x.report "Minimum of a vector with data type gsl" do
29
+ vector_gsl.min
30
+ end
31
+ end
32
+
33
+ # ===== Benchmarks =====
34
+ #
35
+ # user system total real
36
+ # Mean of a vector 0.130000 0.010000 0.140000 ( 0.145534)
37
+ # Min of a vector 0.150000 0.000000 0.150000 ( 0.163623)
38
+ # Mean of a gsl vector 0.000000 0.000000 0.000000 ( 0.001037)
39
+ # Min of a gsl vector 0.000000 0.000000 0.000000 ( 0.001251)