fast_group_by 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +49 -0
- data/fast_group_by.gemspec +33 -0
- data/lib/fast_group_by.rb +13 -0
- data/readme.rdoc +45 -0
- data/spec/fast_group_by_spec.rb +12 -0
- metadata +62 -0
data/Rakefile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require "rake"
|
2
|
+
require "rake/clean"
|
3
|
+
require "rake/gempackagetask"
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
################################################################################
|
7
|
+
### Gem
|
8
|
+
################################################################################
|
9
|
+
|
10
|
+
begin
|
11
|
+
# Parse gemspec using the github safety level.
|
12
|
+
file = Dir['*.gemspec'].first
|
13
|
+
data = File.read(file)
|
14
|
+
spec = nil
|
15
|
+
# FIXME: Lowered SAFE from 3 to 2 to work with Ruby 1.9 due to rubygems
|
16
|
+
# performing a require internally
|
17
|
+
Thread.new { spec = eval("$SAFE = 2\n%s" % data)}.join
|
18
|
+
|
19
|
+
# Create the gem tasks
|
20
|
+
Rake::GemPackageTask.new(spec) do |package|
|
21
|
+
package.gem_spec = spec
|
22
|
+
end
|
23
|
+
rescue Exception => e
|
24
|
+
printf "WARNING: Error caught (%s): %s\n%s", e.class.name, e.message, e.backtrace[0...5].map {|l| ' %s' % l}.join("\n")
|
25
|
+
end
|
26
|
+
|
27
|
+
desc 'Package and install the gem for the current version'
|
28
|
+
task :install => :gem do
|
29
|
+
system "sudo gem install -l pkg/%s-%s.gem" % [spec.name, spec.version]
|
30
|
+
end
|
31
|
+
|
32
|
+
desc 'Show files missing from gemspec'
|
33
|
+
task :diff do
|
34
|
+
files = %w[
|
35
|
+
Rakefile
|
36
|
+
*README*
|
37
|
+
*LICENSE*
|
38
|
+
*.gemspec
|
39
|
+
bin/*
|
40
|
+
lib/**/*
|
41
|
+
spec/**/*
|
42
|
+
].map {|pattern| Dir.glob(pattern)}.flatten.select{|f| File.file?(f)}
|
43
|
+
missing_files = files - spec.files
|
44
|
+
extra_files = spec.files - files
|
45
|
+
puts "Missing files:"
|
46
|
+
puts missing_files.join(" ")
|
47
|
+
puts "Extra files:"
|
48
|
+
puts extra_files.join(" ")
|
49
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
# Project
|
3
|
+
s.name = 'fast_group_by'
|
4
|
+
s.summary = "Fast group_by is an Enumerable#group_by implementation that uses hash instead of OrderedHash and is thus faster, but not ordered."
|
5
|
+
s.description = s.summary
|
6
|
+
s.version = '0.1.0'
|
7
|
+
s.date = '2009-07-16'
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Wes Oldenbeuving"]
|
10
|
+
s.email = "narnach@gmail.com"
|
11
|
+
s.homepage = "http://www.github.com/Narnach/fast_group_by"
|
12
|
+
|
13
|
+
# Files
|
14
|
+
root_files = %w[readme.rdoc Rakefile fast_group_by.gemspec]
|
15
|
+
bin_files = []
|
16
|
+
lib_files = %w[fast_group_by]
|
17
|
+
test_files = %w[]
|
18
|
+
spec_files = %w[fast_group_by]
|
19
|
+
other_files = %w[]
|
20
|
+
s.bindir = "bin"
|
21
|
+
s.require_path = "lib"
|
22
|
+
s.executables = bin_files
|
23
|
+
s.test_files = test_files.map {|f| 'test/%s_test.rb' % f} + spec_files.map {|f| 'spec/%s_spec.rb' % f}
|
24
|
+
s.files = root_files + s.test_files + other_files + bin_files.map {|f| 'bin/%s' % f} + lib_files.map {|f| 'lib/%s.rb' % f}
|
25
|
+
|
26
|
+
# rdoc
|
27
|
+
s.has_rdoc = true
|
28
|
+
s.extra_rdoc_files = %w[readme.rdoc]
|
29
|
+
s.rdoc_options << '--inline-source' << '--line-numbers' << '--main' << 'readme.rdoc'
|
30
|
+
|
31
|
+
# Requirements
|
32
|
+
s.required_ruby_version = ">= 1.8.0"
|
33
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Enumerable
|
2
|
+
# Return a Hash with key-Array pairs. The keys are return values of the
|
3
|
+
# block that is called with each element in the collection.
|
4
|
+
# All elements with the same return key are added to the Array associated
|
5
|
+
# with that key.
|
6
|
+
def fast_group_by(&block)
|
7
|
+
res = Hash.new { |hash, key| hash[key] = [] }
|
8
|
+
each do |e|
|
9
|
+
res[block.call(e)] << e
|
10
|
+
end
|
11
|
+
res
|
12
|
+
end
|
13
|
+
end
|
data/readme.rdoc
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
= Fast group_by
|
2
|
+
|
3
|
+
An unordered group_by implementation that is a lot faster than
|
4
|
+
the ordered version used in ActiveSupport version 2.2.x. This becomes apparent when used with large data sets.
|
5
|
+
|
6
|
+
== How to install
|
7
|
+
|
8
|
+
Once the gem appears on gems.github.com, you can install it with:
|
9
|
+
sudo gem install Narnach-fast_group_by
|
10
|
+
|
11
|
+
The alternative is to clone the git repo and build the gem yourself:
|
12
|
+
git clone git://github.com/Narnach/fast_group_by.git
|
13
|
+
cd fast_group_by
|
14
|
+
rake install
|
15
|
+
|
16
|
+
== Rationale
|
17
|
+
|
18
|
+
ActiveSupport uses OrderedHash in its Enumerable#group_by. In version 2.2.x, it uses nested Arrays to store data. Array#assoc is used to find if keys are already defined.
|
19
|
+
For small Arrays, Array#assoc's linear search is about as fast as Hash#[]' tree search, but for larger data sets, the difference in lookup speed becomes noticeable.
|
20
|
+
In version 2.3.x, there is a new implementation of OrderedHash that uses Hash instead of Array and is thus a lot faster. Using fast_group_by with ActiveSupport 2.3.x does not make much sense.
|
21
|
+
|
22
|
+
The reason why I wrote this is that the OrderedHash version took 66 seconds to group a collection of 50k items. The Hash version took less than 1 second.
|
23
|
+
|
24
|
+
== License
|
25
|
+
|
26
|
+
Copyright (c) 2009 Wes Oldenbeuving
|
27
|
+
|
28
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
29
|
+
a copy of this software and associated documentation files (the
|
30
|
+
"Software"), to deal in the Software without restriction, including
|
31
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
32
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
33
|
+
permit persons to whom the Software is furnished to do so, subject to
|
34
|
+
the following conditions:
|
35
|
+
|
36
|
+
The above copyright notice and this permission notice shall be
|
37
|
+
included in all copies or substantial portions of the Software.
|
38
|
+
|
39
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
40
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
41
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
42
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
43
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
44
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
45
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),%w[.. lib fast_group_by])
|
2
|
+
|
3
|
+
describe Array, '#fast_group_by' do
|
4
|
+
it 'should return a Hash with key-Array pairs' do
|
5
|
+
ary = (1..10).to_a
|
6
|
+
groups = ary.fast_group_by {|e| e%2}
|
7
|
+
groups.should == {
|
8
|
+
0 => [2, 4, 6, 8, 10],
|
9
|
+
1 => [1, 3, 5, 7, 9]
|
10
|
+
}
|
11
|
+
end
|
12
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fast_group_by
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Wes Oldenbeuving
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-07-16 00:00:00 +02:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Fast group_by is an Enumerable#group_by implementation that uses hash instead of OrderedHash and is thus faster, but not ordered.
|
17
|
+
email: narnach@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- readme.rdoc
|
24
|
+
files:
|
25
|
+
- readme.rdoc
|
26
|
+
- Rakefile
|
27
|
+
- fast_group_by.gemspec
|
28
|
+
- spec/fast_group_by_spec.rb
|
29
|
+
- lib/fast_group_by.rb
|
30
|
+
has_rdoc: true
|
31
|
+
homepage: http://www.github.com/Narnach/fast_group_by
|
32
|
+
licenses: []
|
33
|
+
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options:
|
36
|
+
- --inline-source
|
37
|
+
- --line-numbers
|
38
|
+
- --main
|
39
|
+
- readme.rdoc
|
40
|
+
require_paths:
|
41
|
+
- lib
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 1.8.0
|
47
|
+
version:
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: "0"
|
53
|
+
version:
|
54
|
+
requirements: []
|
55
|
+
|
56
|
+
rubyforge_project:
|
57
|
+
rubygems_version: 1.3.4
|
58
|
+
signing_key:
|
59
|
+
specification_version: 3
|
60
|
+
summary: Fast group_by is an Enumerable#group_by implementation that uses hash instead of OrderedHash and is thus faster, but not ordered.
|
61
|
+
test_files:
|
62
|
+
- spec/fast_group_by_spec.rb
|