fast_group_by 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,49 @@
1
+ require "rake"
2
+ require "rake/clean"
3
+ require "rake/gempackagetask"
4
+ require 'rubygems'
5
+
6
+ ################################################################################
7
+ ### Gem
8
+ ################################################################################
9
+
10
+ begin
11
+ # Parse gemspec using the github safety level.
12
+ file = Dir['*.gemspec'].first
13
+ data = File.read(file)
14
+ spec = nil
15
+ # FIXME: Lowered SAFE from 3 to 2 to work with Ruby 1.9 due to rubygems
16
+ # performing a require internally
17
+ Thread.new { spec = eval("$SAFE = 2\n%s" % data)}.join
18
+
19
+ # Create the gem tasks
20
+ Rake::GemPackageTask.new(spec) do |package|
21
+ package.gem_spec = spec
22
+ end
23
+ rescue Exception => e
24
+ printf "WARNING: Error caught (%s): %s\n%s", e.class.name, e.message, e.backtrace[0...5].map {|l| ' %s' % l}.join("\n")
25
+ end
26
+
27
+ desc 'Package and install the gem for the current version'
28
+ task :install => :gem do
29
+ system "sudo gem install -l pkg/%s-%s.gem" % [spec.name, spec.version]
30
+ end
31
+
32
+ desc 'Show files missing from gemspec'
33
+ task :diff do
34
+ files = %w[
35
+ Rakefile
36
+ *README*
37
+ *LICENSE*
38
+ *.gemspec
39
+ bin/*
40
+ lib/**/*
41
+ spec/**/*
42
+ ].map {|pattern| Dir.glob(pattern)}.flatten.select{|f| File.file?(f)}
43
+ missing_files = files - spec.files
44
+ extra_files = spec.files - files
45
+ puts "Missing files:"
46
+ puts missing_files.join(" ")
47
+ puts "Extra files:"
48
+ puts extra_files.join(" ")
49
+ end
@@ -0,0 +1,33 @@
1
+ Gem::Specification.new do |s|
2
+ # Project
3
+ s.name = 'fast_group_by'
4
+ s.summary = "Fast group_by is an Enumerable#group_by implementation that uses hash instead of OrderedHash and is thus faster, but not ordered."
5
+ s.description = s.summary
6
+ s.version = '0.1.0'
7
+ s.date = '2009-07-16'
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Wes Oldenbeuving"]
10
+ s.email = "narnach@gmail.com"
11
+ s.homepage = "http://www.github.com/Narnach/fast_group_by"
12
+
13
+ # Files
14
+ root_files = %w[readme.rdoc Rakefile fast_group_by.gemspec]
15
+ bin_files = []
16
+ lib_files = %w[fast_group_by]
17
+ test_files = %w[]
18
+ spec_files = %w[fast_group_by]
19
+ other_files = %w[]
20
+ s.bindir = "bin"
21
+ s.require_path = "lib"
22
+ s.executables = bin_files
23
+ s.test_files = test_files.map {|f| 'test/%s_test.rb' % f} + spec_files.map {|f| 'spec/%s_spec.rb' % f}
24
+ s.files = root_files + s.test_files + other_files + bin_files.map {|f| 'bin/%s' % f} + lib_files.map {|f| 'lib/%s.rb' % f}
25
+
26
+ # rdoc
27
+ s.has_rdoc = true
28
+ s.extra_rdoc_files = %w[readme.rdoc]
29
+ s.rdoc_options << '--inline-source' << '--line-numbers' << '--main' << 'readme.rdoc'
30
+
31
+ # Requirements
32
+ s.required_ruby_version = ">= 1.8.0"
33
+ end
@@ -0,0 +1,13 @@
1
+ module Enumerable
2
+ # Return a Hash with key-Array pairs. The keys are return values of the
3
+ # block that is called with each element in the collection.
4
+ # All elements with the same return key are added to the Array associated
5
+ # with that key.
6
+ def fast_group_by(&block)
7
+ res = Hash.new { |hash, key| hash[key] = [] }
8
+ each do |e|
9
+ res[block.call(e)] << e
10
+ end
11
+ res
12
+ end
13
+ end
@@ -0,0 +1,45 @@
1
+ = Fast group_by
2
+
3
+ An unordered group_by implementation that is a lot faster than
4
+ the ordered version used in ActiveSupport version 2.2.x. This becomes apparent when used with large data sets.
5
+
6
+ == How to install
7
+
8
+ Once the gem appears on gems.github.com, you can install it with:
9
+ sudo gem install Narnach-fast_group_by
10
+
11
+ The alternative is to clone the git repo and build the gem yourself:
12
+ git clone git://github.com/Narnach/fast_group_by.git
13
+ cd fast_group_by
14
+ rake install
15
+
16
+ == Rationale
17
+
18
+ ActiveSupport uses OrderedHash in its Enumerable#group_by. In version 2.2.x, it uses nested Arrays to store data. Array#assoc is used to find if keys are already defined.
19
+ For small Arrays, Array#assoc's linear search is about as fast as Hash#[]' tree search, but for larger data sets, the difference in lookup speed becomes noticeable.
20
+ In version 2.3.x, there is a new implementation of OrderedHash that uses Hash instead of Array and is thus a lot faster. Using fast_group_by with ActiveSupport 2.3.x does not make much sense.
21
+
22
+ The reason why I wrote this is that the OrderedHash version took 66 seconds to group a collection of 50k items. The Hash version took less than 1 second.
23
+
24
+ == License
25
+
26
+ Copyright (c) 2009 Wes Oldenbeuving
27
+
28
+ Permission is hereby granted, free of charge, to any person obtaining
29
+ a copy of this software and associated documentation files (the
30
+ "Software"), to deal in the Software without restriction, including
31
+ without limitation the rights to use, copy, modify, merge, publish,
32
+ distribute, sublicense, and/or sell copies of the Software, and to
33
+ permit persons to whom the Software is furnished to do so, subject to
34
+ the following conditions:
35
+
36
+ The above copyright notice and this permission notice shall be
37
+ included in all copies or substantial portions of the Software.
38
+
39
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
40
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
41
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
42
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
43
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
44
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
45
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,12 @@
1
+ require File.join(File.dirname(__FILE__),%w[.. lib fast_group_by])
2
+
3
+ describe Array, '#fast_group_by' do
4
+ it 'should return a Hash with key-Array pairs' do
5
+ ary = (1..10).to_a
6
+ groups = ary.fast_group_by {|e| e%2}
7
+ groups.should == {
8
+ 0 => [2, 4, 6, 8, 10],
9
+ 1 => [1, 3, 5, 7, 9]
10
+ }
11
+ end
12
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fast_group_by
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Wes Oldenbeuving
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-07-16 00:00:00 +02:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Fast group_by is an Enumerable#group_by implementation that uses hash instead of OrderedHash and is thus faster, but not ordered.
17
+ email: narnach@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - readme.rdoc
24
+ files:
25
+ - readme.rdoc
26
+ - Rakefile
27
+ - fast_group_by.gemspec
28
+ - spec/fast_group_by_spec.rb
29
+ - lib/fast_group_by.rb
30
+ has_rdoc: true
31
+ homepage: http://www.github.com/Narnach/fast_group_by
32
+ licenses: []
33
+
34
+ post_install_message:
35
+ rdoc_options:
36
+ - --inline-source
37
+ - --line-numbers
38
+ - --main
39
+ - readme.rdoc
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.8.0
47
+ version:
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ requirements: []
55
+
56
+ rubyforge_project:
57
+ rubygems_version: 1.3.4
58
+ signing_key:
59
+ specification_version: 3
60
+ summary: Fast group_by is an Enumerable#group_by implementation that uses hash instead of OrderedHash and is thus faster, but not ordered.
61
+ test_files:
62
+ - spec/fast_group_by_spec.rb