squeeze 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,8 @@
1
+ Copyright (c) 2012 Matthew King
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8
+
data/README.md ADDED
File without changes
data/lib/squeeze.rb ADDED
@@ -0,0 +1,121 @@
1
+ require "squeeze/hash_tree"
2
+
3
+ module Squeezable
4
+ def self.included(mod)
5
+ mod.module_eval do
6
+ extend ClassMethods
7
+ include InstanceMethods
8
+ end
9
+ end
10
+
11
+ module ClassMethods
12
+ def squeezable(options=nil)
13
+ if options
14
+ @squeeze_spec = options
15
+ else
16
+ @squeeze_spec || superclass.squeezable rescue nil
17
+ end
18
+ end
19
+ end
20
+
21
+ module InstanceMethods
22
+ def squeeze(dataset=nil)
23
+ unless dataset
24
+ if meth = self.class.squeezable[:dataset_method]
25
+ dataset ||= self.send(meth)
26
+ else
27
+ raise ArgumentError, "Must give #squeeze a dataset"
28
+ end
29
+ end
30
+ squeezer.reduce(dataset)
31
+ end
32
+
33
+ def squeezer
34
+ @squeezer ||= Squeeze.new(:fields => self.class.squeezable[:fields])
35
+ end
36
+ end
37
+
38
+ module Sequel
39
+ def self.included(mod)
40
+ mod.module_eval do
41
+ include Squeezable
42
+ def_dataset_method(:squeeze) do
43
+ squeezer = Squeeze.new(:fields => mod.squeezable[:fields])
44
+ squeezer.reduce(self)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+
51
+ class Squeeze
52
+
53
+ attr_reader :output
54
+ attr_accessor :reports, :derived
55
+
56
+ def self.fields(spec=nil)
57
+ @fields ||= {}
58
+ spec ? @fields = spec : @fields
59
+ end
60
+
61
+ def initialize(options={})
62
+ f = options[:fields] || {}
63
+ @fields = self.class.fields.merge(f)
64
+ @derived = options[:derived] || {}
65
+ @output = HashTree.new
66
+ end
67
+
68
+ # Takes an array of hashes, keyed with Symbols
69
+ def reduce(records)
70
+ records.each do |record|
71
+ process(record, @fields)
72
+ end
73
+ @output
74
+ end
75
+
76
+ def process(record, fields, sig=[])
77
+ case fields
78
+ when Hash
79
+ fields.each do |field, subfields|
80
+ process_field(record, field, subfields, sig)
81
+ end
82
+ when Array
83
+ fields.each do |field|
84
+ process_field(record, field, true, sig)
85
+ end
86
+ end
87
+ end
88
+
89
+ def process_field(record, field, subfields, sig)
90
+ return unless key = resolve(field, record)
91
+ case subfields
92
+ when true
93
+ output.increment(sig + [field, key])
94
+ output.increment(sig + [field, :_count])
95
+ when Symbol
96
+ output.increment(sig + [field, key, :_count])
97
+ value = resolve(subfields, record)
98
+ output.increment(sig + [field, key, subfields, value])
99
+ when Array
100
+ subfields.each do |subfield|
101
+ process(record, {field => subfield}, sig )
102
+ end
103
+ when Hash
104
+ output.increment(sig + [field, key, :_count])
105
+ process(record, subfields, sig + [field, key])
106
+ end
107
+ end
108
+
109
+ def resolve(name, record)
110
+ result = if v = record[name]
111
+ v
112
+ elsif p = @derived[name]
113
+ p.call(record)
114
+ elsif record.respond_to?(name)
115
+ record.send(name)
116
+ else
117
+ :_unknown
118
+ end
119
+ end
120
+
121
+ end
@@ -0,0 +1,153 @@
1
+ require "squeeze/traversable"
2
+
3
+ # A Hash subclass with a default proc that creates another HashTree
4
+ # on attempts to access missing keys.
5
+ class HashTree < Hash
6
+ include Squeeze::Traversable
7
+
8
+ # Override the constructor to provide a default_proc
9
+ # NOTE: there's a better way to do this in 1.9.2, it seems.
10
+ # See Hash#default_proc=
11
+ def self.new()
12
+ hash = Hash.new { |h,k| h[k] = HashTree.new }
13
+ super.replace(hash)
14
+ end
15
+
16
+ def self.[](hash)
17
+ ht = self.new
18
+ ht << hash
19
+ ht
20
+ end
21
+
22
+ def _dump(depth)
23
+ h = Hash[self]
24
+ h.delete_if {|k,v| v.is_a? Proc }
25
+ Marshal.dump(h)
26
+ end
27
+
28
+ def self._load(*args)
29
+ h = Marshal.load(*args)
30
+ ht = self.new
31
+ ht.replace(h)
32
+ ht
33
+ end
34
+
35
+ # Follow the path specified, creating new nodes where necessary.
36
+ # Returns the value at the end of the path. If a block is supplied,
37
+ # it will be called with the last node and the last key as parameters,
38
+ # analogous to Hash.new's default proc. This is necessary to allow
39
+ # setting a value at the end of the path. See the implementation of #insert.
40
+ def create_path(sig)
41
+ final_key = sig.pop
42
+ hash = self
43
+ sig.each do |a|
44
+ hash = hash[a]
45
+ end
46
+ yield(hash, final_key) if block_given?
47
+ hash[final_key]
48
+ end
49
+
50
+ # Attempt to retrieve the value at the end of the path specified,
51
+ # without creating new nodes. Returns nil on failure.
52
+ # TODO: consider whether splatting the signature is wise.
53
+ def find(*sig)
54
+ stage = self
55
+ sig.each do |a|
56
+ if stage.has_key?(a)
57
+ stage = stage[a]
58
+ else
59
+ return nil
60
+ end
61
+ end
62
+ stage
63
+ end
64
+
65
+ def children(matcher=true)
66
+ next_keys = self.keys.select do |key|
67
+ match?(matcher, key)
68
+ end
69
+ self.values_at(*next_keys)
70
+ end
71
+
72
+ def +(other)
73
+ out = HashTree.new
74
+ _plus(other, out)
75
+ out
76
+ end
77
+
78
+ def _plus(ht2, out)
79
+ self.each do |k1,v1|
80
+ v1 = v1.respond_to?(:dup) ? v1 : v1.dup
81
+ if ht2.has_key?(k1)
82
+ v2 = ht2[k1]
83
+ if v1.respond_to?(:_plus)
84
+ out[k1] = v1
85
+ v1._plus(v2, out[k1])
86
+ elsif v2.respond_to?(:_plus)
87
+ raise ArgumentError,
88
+ "Can't merge leaf with non-leaf:\n#{v1.inspect}\n#{v2.inspect}"
89
+ else
90
+ if v2.is_a?(Numeric) && v1.is_a?(Numeric)
91
+ out[k1] = v1 + v2
92
+ else
93
+ out[k1] = [v1, ht2[k1]]
94
+ end
95
+ end
96
+ else
97
+ # should anything happen here?
98
+ end
99
+ end
100
+ ht2.each do |k,v|
101
+ if self.has_key?(k)
102
+ # should anything happen here?
103
+ else
104
+ v = v.respond_to?(:dup) ? v : v.dup
105
+ out[k] = v
106
+ end
107
+ end
108
+ end
109
+
110
+ def <<(other)
111
+ other.each do |k,v1|
112
+ if self.has_key?(k)
113
+ v2 = self[k]
114
+ if v1.respond_to?(:has_key?) && v2.respond_to?(:has_key?)
115
+ v2 << v1
116
+ elsif v1.is_a?(Numeric) && v2.is_a?(Numeric)
117
+ self[k] = v1 + v2
118
+ else
119
+ raise ArgumentError,
120
+ "Can't merge leaf with non-leaf:\n#{v1.inspect}\n#{v2.inspect}"
121
+ end
122
+ else
123
+ if v1.respond_to?(:has_key?)
124
+ self[k] << v1
125
+ else
126
+ self[k] = v1
127
+ end
128
+ end
129
+ end
130
+ end
131
+
132
+ def match?(val, key)
133
+ case val
134
+ when true
135
+ true
136
+ when String, Symbol
137
+ key == val
138
+ when Regexp
139
+ key =~ val
140
+ when Proc
141
+ val.call(key)
142
+ when nil
143
+ false
144
+ else
145
+ raise ArgumentError, "Unexpected matcher type: #{val.inspect}"
146
+ end
147
+ end
148
+
149
+ end
150
+
151
+
152
+
153
+
@@ -0,0 +1,28 @@
1
+ class Squeeze
2
+
3
+ # Matchers are initialized with a pattern, to be used for
4
+ # retrieval in a HashTree. What makes Matcher special,
5
+ # really the only reason it exists, is that it overrides #eql?
6
+ # and #hash so that Hashes will treat two Matchers as the same
7
+ # object if their patterns are the same. This prevents
8
+ # redundancy when a HashTree creates its internal tree.
9
+ class Matcher
10
+ attr_reader :pattern
11
+ def initialize(pattern)
12
+ @pattern = pattern || true
13
+ end
14
+
15
+ def call(val)
16
+ @pattern == val || @pattern == true
17
+ end
18
+
19
+ def eql?(other)
20
+ other.kind_of?(self.class) && other.pattern == @pattern
21
+ end
22
+
23
+ def hash
24
+ @pattern.hash
25
+ end
26
+ end
27
+
28
+ end
@@ -0,0 +1,128 @@
1
+ class Squeeze
2
+ # Tree classes including this module must supply these methods:
3
+ #
4
+ # * #create_path(path_signature)
5
+ # * #find(path_signature)
6
+ # * #children(node_matcher)
7
+ module Traversable
8
+
9
+ # Follow or create the path specified by the signature and assign
10
+ # the value as a terminating leaf node.
11
+ #
12
+ # h.set([:a, :b, :c], "This is a retrievable value")
13
+ #
14
+ def set(sig, val)
15
+ raise ArgumentError if sig.empty?
16
+ create_path(sig) do |node, key|
17
+ node[key] = val
18
+ end
19
+ end
20
+
21
+ def reduce(sig, base=0)
22
+ create_path(sig) do |node, key|
23
+ node[key] = base unless node.has_key?(key)
24
+ node[key] = yield node[key]
25
+ end
26
+ end
27
+
28
+ def increment(sig, val=1)
29
+ val = yield if block_given?
30
+ create_path(sig) do |node, key|
31
+ if node.has_key?(key)
32
+ node[key] = node[key] + val
33
+ else
34
+ node[key] = val
35
+ end
36
+ end
37
+ end
38
+
39
+ # Usage:
40
+ # a = ht.reducer([:a, :b, :c], 0) {|acc, v| acc + v }
41
+ # a[1]
42
+ def reducer(sig, base, &block)
43
+ p = nil
44
+ create_path(sig) do |node, key|
45
+ unless node.has_key?(key)
46
+ node[key] = base
47
+ end
48
+ p = lambda do |newval|
49
+ node[key] = block.call(node[key], newval)
50
+ end
51
+ end
52
+ p
53
+ end
54
+
55
+ def sum(*args)
56
+ out = 0
57
+ retrieve(*args) { |v| out += v }
58
+ out
59
+ end
60
+
61
+ def count(*args)
62
+ args = args + [:_count]
63
+ sum(*args)
64
+ end
65
+
66
+ def unique(*args)
67
+ out = 0
68
+ filter(*args) { |v| out += v.size }
69
+ out
70
+ end
71
+
72
+ # like retrieve, but will return any kind of node
73
+ def filter(*sig)
74
+ results = []
75
+ search(sig) do |node|
76
+ results << node
77
+ yield(node) if block_given?
78
+ end
79
+ results
80
+ end
81
+
82
+ # Given a signature array, attempt to retrieve matching leaf values.
83
+ def retrieve(*sig)
84
+ results = []
85
+ search(sig) do |node|
86
+ results << node unless node.respond_to?(:children)
87
+ yield(node) if block_given?
88
+ end
89
+ results
90
+ end
91
+
92
+ # Generic tree search method
93
+ def search(sig)
94
+ current_nodes = [self]
95
+
96
+ while !current_nodes.empty?
97
+ next_nodes = []
98
+ matcher = sig.shift
99
+ if matcher
100
+ current_nodes.each do |node|
101
+ if node.respond_to?(:children)
102
+ next_nodes += node.children(matcher)
103
+ end
104
+ end
105
+ else
106
+ current_nodes.each {|n| yield(n) }
107
+ end
108
+ current_nodes = next_nodes
109
+ end
110
+ end
111
+
112
+ def traverse
113
+ current_nodes = [self]
114
+ while !current_nodes.empty?
115
+ next_nodes = []
116
+ current_nodes.each do |node|
117
+ if node.respond_to?(:children)
118
+ next_nodes += node.children(true)
119
+ yield(node)
120
+ end
121
+ end
122
+
123
+ current_nodes = next_nodes
124
+ end
125
+ end
126
+
127
+ end
128
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: squeeze
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Matthew King
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-08-30 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: riot
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 37
30
+ segments:
31
+ - 0
32
+ - 12
33
+ - 5
34
+ version: 0.12.5
35
+ type: :development
36
+ version_requirements: *id001
37
+ description:
38
+ email:
39
+ executables: []
40
+
41
+ extensions: []
42
+
43
+ extra_rdoc_files: []
44
+
45
+ files:
46
+ - LICENSE
47
+ - README.md
48
+ - lib/squeeze/hash_tree.rb
49
+ - lib/squeeze/traversable.rb
50
+ - lib/squeeze/matcher.rb
51
+ - lib/squeeze.rb
52
+ has_rdoc: true
53
+ homepage: https://github.com/automatthew/squeeze
54
+ licenses: []
55
+
56
+ post_install_message:
57
+ rdoc_options: []
58
+
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ hash: 3
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ requirements: []
80
+
81
+ rubyforge_project:
82
+ rubygems_version: 1.6.2
83
+ signing_key:
84
+ specification_version: 3
85
+ summary: Tools for working with nested data structures in Ruby
86
+ test_files: []
87
+