huge_enumerable 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ Gemfile.lock
6
+ InstalledFiles
7
+ coverage
8
+ InstalledFiles
9
+ lib/bundler/man
10
+ pkg
11
+ rdoc
12
+ spec/reports
13
+ test/tmp
14
+ test/version_tmp
15
+ tmp
16
+
17
+ # YARD artifacts
18
+ .yardoc
19
+ _yardoc
20
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Frank Hall
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # HugeEnumerable
2
+
3
+ Enumerate, sample, shuffle, combine, permutate, and create products of massive data sets using minimal memory
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'huge_enumerable'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install huge_enumerable
18
+
19
+ ## Usage
20
+
21
+ See HugeCollection, HugeCombination, HugePermutation, and HugeProduct for ways of utilizing HugeEnumerable.
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+ require 'rdoc/task'
4
+
5
+ RSpec::Core::RakeTask.new
6
+
7
+ task :default => :spec
8
+ task :test => :spec
9
+
10
+ RDoc::Task.new do |rdoc|
11
+ rdoc.rdoc_dir = 'doc'
12
+ rdoc.main = 'README.md'
13
+ rdoc.rdoc_files.include 'README.md', "lib/**/*\.rb"
14
+
15
+ rdoc.options << '--line-numbers'
16
+ end
17
+
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'huge_enumerable/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "huge_enumerable"
8
+ spec.version = HugeEnumerable::VERSION
9
+ spec.authors = ["Frank Hall"]
10
+ spec.email = ["ChapterHouse.Dune@gmail.com"]
11
+ spec.description = %q{Enumerate, sample, shuffle, combine, permutate, and create products of massive data sets using minimal memory}
12
+ spec.summary = %q{Enumerate, sample, shuffle, combine, permutate, and create products of massive data sets using minimal memory}
13
+ spec.homepage = "https://github.com/ChapterHouse/huge_enumerable.git"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency 'rspec', '~> 2.13'
24
+ spec.add_development_dependency 'rdoc'
25
+ spec.add_runtime_dependency "backports" # Wish this could be conditional. It is only used for ruby 1.8 for as long as I support it.
26
+ spec.add_runtime_dependency "prime_miller_rabin", ">= 0.0.2"
27
+
28
+ end
@@ -0,0 +1,83 @@
1
+ require 'huge_enumerable'
2
+ # The simplest form of a HugeEnumerable.
3
+ # This class can be used for large arrays or anything else that responds to [].
4
+ # It is not necessary for the enumerable to be completely mapped into memory.
5
+ # It only has to be able to return the element mapped to the index given to [].
6
+ # ==== Examples
7
+ #
8
+ # Using HugeCollection directly:
9
+ #
10
+ # original_array = ('a'..'z').to_a
11
+ # collection = HugeCollection.new(original_array)
12
+ # collection.shuffle!
13
+ # original_array[0..4] # => ["a", "b", "c", "d", "e"]
14
+ # collection[0..4] # => ["j", "a", "r", "i", "z"]
15
+ #
16
+ #
17
+ # Subclassing HugeCollection
18
+ #
19
+ # class StringNext < HugeCollection
20
+ #
21
+ # def initialize(size)
22
+ # @collection_size = size
23
+ # super ('a'..'z').to_a
24
+ # end
25
+ #
26
+ # private
27
+ #
28
+ # def fetch(index)
29
+ # result = ""
30
+ # index += 1
31
+ # while index > 0
32
+ # index -= 1
33
+ # result.prepend super(index % 26)
34
+ # index /= 26
35
+ # end
36
+ # result
37
+ # end
38
+ #
39
+ # end
40
+ #
41
+ # googol = 10*100
42
+ # collection = StringNext.new(googol)
43
+ # collection.size # => 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
44
+ # collection[0] # => "a"
45
+ # collection[-1] # => "zhxrtplbmwaiwcqlzpmglpziaegsdivmbvlnssusbjtbcgywaycqnhxztqwwikxvrsptazpp"
46
+ # collection[googol / 2] # => "dlijhfafxmqxnusmhfpshmdmopvodxfnkfgivwvnejaapyxmynutdlmjhxxqrykiiuizzhi"
47
+ # collection.shuffle!
48
+ # collection[0] # => "bipzqqzayczkgsmaseflwktpsotzclcjsqlnnjaciaawufpojywxflknuddhqkilhoedacn"
49
+ # collecyion[-1] # => "etneuebyurxgrvrfsreesxuvjaiyoqwplofsptacjdbhuhafdiwbwujvniokltgkjbfkiuy"
50
+ class HugeCollection < HugeEnumerable
51
+
52
+ # Create a new HugeCollection
53
+ #
54
+ # ==== Attributes
55
+ #
56
+ # * +enumerable+ - Any enumerable that responds to []
57
+ #
58
+ # ==== Options
59
+ #
60
+ # * +:max_array_size+ - The default size of arrays when #to_a is called.
61
+ # * +:rng+ - The random number generator to use.
62
+ def initialize(enumerable, max_array_size = nil, rng = nil)
63
+ @enum = enumerable
64
+ super(max_array_size, rng)
65
+ end
66
+
67
+ private
68
+
69
+ attr_accessor :enum
70
+
71
+ def collection_size
72
+ @collection_size ||= enum_size
73
+ end
74
+
75
+ def enum_size
76
+ @enum_size ||= enum.size
77
+ end
78
+
79
+ def fetch(index)
80
+ enum[index]
81
+ end
82
+
83
+ end
@@ -0,0 +1,118 @@
1
+ require 'huge_enumerable'
2
+ # HugeCombination is a HugeEnumerable style combination. Comparable to Array#combination.
3
+ # This class can be used to generate combinations of large arrays or anything else that responds to [].
4
+ # It is not necessary for the enumerable to be completely mapped into memory.
5
+ # It only has to be able to return the element mapped to the index given to [].
6
+ # ==== Examples
7
+ #
8
+ # Using HugeCombination directly:
9
+ #
10
+ # combination = HugeCombination.new(('a'..'z').to_a, 2)
11
+ # combination[0..4] # => [["a", "b"], ["a", "c"], ["a", "d"], ["a", "e"], ["a", "f"]]
12
+ # combination[23..27] # => [["a", "y"], ["a", "z"], ["b", "c"], ["b", "d"], ["b", "e"]]
13
+ #
14
+ #
15
+ # Subclassing HugeCombination
16
+ #
17
+ # class NumberArray < HugeCollection
18
+ #
19
+ # def initialize(size)
20
+ # @collection_size = size
21
+ # super(nil)
22
+ # end
23
+ #
24
+ # private
25
+ #
26
+ # def fetch(index)
27
+ # index
28
+ # end
29
+ #
30
+ # end
31
+ #
32
+ # class NumberCombination < HugeCombination
33
+ #
34
+ # def initialize(size)
35
+ # enumerable = size < 10 ? (0...size).to_a : NumberArray.new(size)
36
+ # super enumerable, 2, nil, nil
37
+ # end
38
+ #
39
+ # private
40
+ #
41
+ # def fetch(index)
42
+ # array = super
43
+ # sum = array.inject(0) { |sum, i| sum += i }
44
+ # "#{array.first} + #{array.last} = #{sum}"
45
+ # end
46
+ #
47
+ # end
48
+ #
49
+ # combination = NumberCombination.new(10**30)
50
+ # size = combination.size # => 499999999999999999999999999999500000000000000000000000000000
51
+ # combination[0] # => "0 + 1 = 1"
52
+ # combination[-1] # => "999999999999999999999999999998 + 999999999999999999999999999999 = 1999999999999999999999999999997"
53
+ # combination[size / 2] # => "292893218813452475599155637895 + 296085173605458049080913472356 = 588978392418910524680069110251"
54
+ class HugeCombination < HugeCollection
55
+
56
+ # Create a new HugeCombination
57
+ #
58
+ # ==== Attributes
59
+ #
60
+ # * +enumerable+ - Any enumerable that responds to []
61
+ # * +size+ - The number of elements per combination to use from enumerable. (Currently only size 2 is supported)
62
+ #
63
+ # ==== Options
64
+ #
65
+ # * +:max_array_size+ - The default size of arrays when #to_a is called.
66
+ # * +:rng+ - The random number generator to use.
67
+ def initialize(enumerable, size, max_array_size = nil, rng = nil)
68
+ raise NotImplementedError, "Not yet implemented for any size != 2" if size != 2 # TODO: Extend this class to handle length N
69
+ @combination_size = size
70
+ super(enumerable, max_array_size, rng)
71
+ end
72
+
73
+ private
74
+
75
+ def collection_size
76
+ sum(enum_size - 1)
77
+ end
78
+
79
+ def fetch(index)
80
+ cycle = locate_cycle(index)
81
+ first_index = cycle - 1
82
+ max_cycles = enum_size - 1
83
+ used = (cycle - 1) == 0 ? 0 : sum_from(max_cycles, max_cycles - (cycle - 2))
84
+ second_index = index - used + cycle
85
+ [enum[first_index], enum[second_index]]
86
+ end
87
+
88
+ def locate_cycle(index, min=0, max=enum_size-1)
89
+ cycle = min + (max - min) / 2
90
+
91
+ check_high = sum_at_cycle(cycle)
92
+ check_low = sum_at_cycle(cycle - 1)
93
+
94
+ if check_high > index && check_low <= index
95
+ cycle
96
+ elsif check_low > index
97
+ locate_cycle(index, min, cycle-1)
98
+ else
99
+ locate_cycle(index, cycle+1, max)
100
+ end
101
+ end
102
+
103
+ def sum(x)
104
+ x * (x + 1) / 2
105
+ end
106
+
107
+ def sum_from(m, n)
108
+ m, n = [n, m] if m > n
109
+ (n + 1 - m)*(n + m)/2
110
+ end
111
+
112
+ def sum_at_cycle(c)
113
+ ec = enum_size * c
114
+ (-c + 2*ec - c**2)/2
115
+ end
116
+
117
+ end
118
+
@@ -0,0 +1,67 @@
1
+ require 'huge_enumerable'
2
+ # HugePermutation is a HugeEnumerable style permutation. Comparable to Array#permutation.
3
+ # This class can be used to generate permutations of large arrays or anything else that responds to [].
4
+ # It is not necessary for the enumerable to be completely mapped into memory.
5
+ # It only has to be able to return the element mapped to the index given to [].
6
+ # ==== Examples
7
+ #
8
+ # Using HugePermutation directly:
9
+ #
10
+ # permutation = HugePermutation.new(('a'..'z').to_a, 2)
11
+ # permutation[0..4] # => [["a", "b"], ["a", "c"], ["a", "d"], ["a", "e"], ["a", "f"]]
12
+ # permutation[23..27] # => [["a", "y"], ["a", "z"], ["b", "a"], ["b", "c"], ["b", "d"]]
13
+ #
14
+ #
15
+ # Subclassing HugePermutation
16
+ #
17
+ # class SouthernNames < HugePermutation
18
+ #
19
+ # def initialize
20
+ # base_names = %w{Bill Joe Jo Bob Mary Lou Betty Sue Jimmy Ann Lee Ruby Jack Belle Daisy Dixie Lynn}
21
+ # super base_names, 2, nil, nil
22
+ # end
23
+ #
24
+ # private
25
+ #
26
+ # def fetch(index)
27
+ # "Your southern name is: #{super(index).join(' ')}"
28
+ # end
29
+ #
30
+ # end
31
+ #
32
+ # southern_name = SouthernNames.new
33
+ # southern_name[0] # => "Your southern name is: Bill Joe"
34
+ # southern_name[-1] # => "Your southern name is: Lynn Dixie"
35
+ # size = southern_name.size # => 272
36
+ # southern_name[size / 2] # => "Your southern name is: Jimmy Ann"
37
+ class HugePermutation < HugeCollection
38
+
39
+ # Create a new HugePermutation
40
+ #
41
+ # ==== Attributes
42
+ #
43
+ # * +enumerable+ - Any enumerable that responds to []
44
+ # * +size+ - The number of elements per permutation to use from enumerable. (Currently only size 2 is supported)
45
+ #
46
+ # ==== Options
47
+ #
48
+ # * +:max_array_size+ - The default size of arrays when #to_a is called.
49
+ # * +:rng+ - The random number generator to use.
50
+ def initialize(enumerable, length, max_array_size = nil, rng = nil)
51
+ raise NotImplementedError, "Not yet implemented for any length != 2" if length != 2 # TODO: Extend this class to handle length N
52
+ super(enumerable, max_array_size, rng)
53
+ end
54
+
55
+ private
56
+
57
+ def fetch(x)
58
+ first_index = x / (enum_size - 1)
59
+ second_index = ((x % enum_size) + (x / enum_size + 1)) % enum_size
60
+ [enum[first_index], enum[second_index]]
61
+ end
62
+
63
+ def collection_size
64
+ enum_size * (enum_size - 1)
65
+ end
66
+
67
+ end
@@ -0,0 +1,77 @@
1
+ require 'huge_enumerable'
2
+ # HugePermutation is a HugeEnumerable style product. Comparable to Array#product.
3
+ # This class can be used to generate products of large arrays or anything else that responds to [].
4
+ # It is not necessary for the enumerables to be completely mapped into memory.
5
+ # They only have to be able to return the element mapped to the index given to [].
6
+ # ==== Examples
7
+ #
8
+ # Using HugeProduct directly:
9
+ #
10
+ # product = HugeProduct.new(('a'..'z').to_a, ('A'..'Z').to_a)
11
+ # product[0..4] # => [["a", "A"], ["a", "B"], ["a", "C"], ["a", "D"], ["a", "E"]]
12
+ # product[23..27] # => [["a", "X"], ["a", "Y"], ["a", "Z"], ["b", "A"], ["b", "B"]]
13
+ #
14
+ #
15
+ # Subclassing HugeProduct
16
+ #
17
+ # class BabyGirlNames < HugeProduct
18
+ #
19
+ # def initialize
20
+ # first_names = %w{Emma Olivia Sophia Isabella Ava Mia Emily Charlotte Ella Amelia Abigail Madison Lily Chloe}
21
+ # middle_names = %w{Zoe Sophie Evelyn Aubrey Elizabeth Layla Anna Natalie Brooklyn Aria Audrey Ellie Lucy}
22
+ # super(first_names, middle_names)
23
+ # end
24
+ #
25
+ # private
26
+ #
27
+ # def fetch(index)
28
+ # super(index).join(' ')
29
+ # end
30
+ #
31
+ # end
32
+ #
33
+ # name = BabyGirlNames.new
34
+ # name[0] # => "Emma Zoe"
35
+ # name[-1] # => "Chloe Lucy"
36
+ # size = name.size # => 182
37
+ # name[size / 2] # => "Charlotte Zoe"
38
+ class HugeProduct < HugeEnumerable
39
+
40
+ # Create a new HugeProduct
41
+ #
42
+ # ==== Attributes
43
+ #
44
+ # * +enumerable_a+ - Any enumerable that responds to []
45
+ # * +enumerable_b+ - Any enumerable that responds to [] (This can be the same object as enumerable_a)
46
+ #
47
+ # ==== Options
48
+ #
49
+ # * +:max_array_size+ - The default size of arrays when #to_a is called.
50
+ # * +:rng+ - The random number generator to use.
51
+ def initialize(enumerable_a, enumerable_b, max_array_size = nil, rng = nil)
52
+ @enum_a = enumerable_a
53
+ @enum_b = enumerable_b
54
+ super(max_array_size, rng)
55
+ end
56
+
57
+ private
58
+
59
+ attr_accessor :enum_a, :enum_b
60
+
61
+ def collection_size
62
+ enum_a_size * enum_b_size
63
+ end
64
+
65
+ def fetch(x)
66
+ [enum_a[x / enum_b_size], enum_b[x % enum_b_size]]
67
+ end
68
+
69
+ def enum_a_size
70
+ @enum_a_size ||= enum_a.size
71
+ end
72
+
73
+ def enum_b_size
74
+ @enum_b_size ||= enum_b.size
75
+ end
76
+
77
+ end
@@ -0,0 +1,4 @@
1
+ class HugeEnumerable
2
+ # "0.0.1"
3
+ VERSION = "0.0.1"
4
+ end