squeeze 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +8 -0
- data/README.md +0 -0
- data/lib/squeeze.rb +121 -0
- data/lib/squeeze/hash_tree.rb +153 -0
- data/lib/squeeze/matcher.rb +28 -0
- data/lib/squeeze/traversable.rb +128 -0
- metadata +87 -0
data/LICENSE
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
Copyright (c) 2012 Matthew King
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
4
|
+
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
8
|
+
|
data/README.md
ADDED
File without changes
|
data/lib/squeeze.rb
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
require "squeeze/hash_tree"
|
2
|
+
|
3
|
+
module Squeezable
|
4
|
+
def self.included(mod)
|
5
|
+
mod.module_eval do
|
6
|
+
extend ClassMethods
|
7
|
+
include InstanceMethods
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module ClassMethods
|
12
|
+
def squeezable(options=nil)
|
13
|
+
if options
|
14
|
+
@squeeze_spec = options
|
15
|
+
else
|
16
|
+
@squeeze_spec || superclass.squeezable rescue nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module InstanceMethods
|
22
|
+
def squeeze(dataset=nil)
|
23
|
+
unless dataset
|
24
|
+
if meth = self.class.squeezable[:dataset_method]
|
25
|
+
dataset ||= self.send(meth)
|
26
|
+
else
|
27
|
+
raise ArgumentError, "Must give #squeeze a dataset"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
squeezer.reduce(dataset)
|
31
|
+
end
|
32
|
+
|
33
|
+
def squeezer
|
34
|
+
@squeezer ||= Squeeze.new(:fields => self.class.squeezable[:fields])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
module Sequel
|
39
|
+
def self.included(mod)
|
40
|
+
mod.module_eval do
|
41
|
+
include Squeezable
|
42
|
+
def_dataset_method(:squeeze) do
|
43
|
+
squeezer = Squeeze.new(:fields => mod.squeezable[:fields])
|
44
|
+
squeezer.reduce(self)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Squeeze
|
52
|
+
|
53
|
+
attr_reader :output
|
54
|
+
attr_accessor :reports, :derived
|
55
|
+
|
56
|
+
def self.fields(spec=nil)
|
57
|
+
@fields ||= {}
|
58
|
+
spec ? @fields = spec : @fields
|
59
|
+
end
|
60
|
+
|
61
|
+
def initialize(options={})
|
62
|
+
f = options[:fields] || {}
|
63
|
+
@fields = self.class.fields.merge(f)
|
64
|
+
@derived = options[:derived] || {}
|
65
|
+
@output = HashTree.new
|
66
|
+
end
|
67
|
+
|
68
|
+
# Takes an array of hashes, keyed with Symbols
|
69
|
+
def reduce(records)
|
70
|
+
records.each do |record|
|
71
|
+
process(record, @fields)
|
72
|
+
end
|
73
|
+
@output
|
74
|
+
end
|
75
|
+
|
76
|
+
def process(record, fields, sig=[])
|
77
|
+
case fields
|
78
|
+
when Hash
|
79
|
+
fields.each do |field, subfields|
|
80
|
+
process_field(record, field, subfields, sig)
|
81
|
+
end
|
82
|
+
when Array
|
83
|
+
fields.each do |field|
|
84
|
+
process_field(record, field, true, sig)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def process_field(record, field, subfields, sig)
|
90
|
+
return unless key = resolve(field, record)
|
91
|
+
case subfields
|
92
|
+
when true
|
93
|
+
output.increment(sig + [field, key])
|
94
|
+
output.increment(sig + [field, :_count])
|
95
|
+
when Symbol
|
96
|
+
output.increment(sig + [field, key, :_count])
|
97
|
+
value = resolve(subfields, record)
|
98
|
+
output.increment(sig + [field, key, subfields, value])
|
99
|
+
when Array
|
100
|
+
subfields.each do |subfield|
|
101
|
+
process(record, {field => subfield}, sig )
|
102
|
+
end
|
103
|
+
when Hash
|
104
|
+
output.increment(sig + [field, key, :_count])
|
105
|
+
process(record, subfields, sig + [field, key])
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def resolve(name, record)
|
110
|
+
result = if v = record[name]
|
111
|
+
v
|
112
|
+
elsif p = @derived[name]
|
113
|
+
p.call(record)
|
114
|
+
elsif record.respond_to?(name)
|
115
|
+
record.send(name)
|
116
|
+
else
|
117
|
+
:_unknown
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require "squeeze/traversable"
|
2
|
+
|
3
|
+
# A Hash subclass with a default proc that creates another HashTree
|
4
|
+
# on attempts to access missing keys.
|
5
|
+
class HashTree < Hash
|
6
|
+
include Squeeze::Traversable
|
7
|
+
|
8
|
+
# Override the constructor to provide a default_proc
|
9
|
+
# NOTE: there's a better way to do this in 1.9.2, it seems.
|
10
|
+
# See Hash#default_proc=
|
11
|
+
def self.new()
|
12
|
+
hash = Hash.new { |h,k| h[k] = HashTree.new }
|
13
|
+
super.replace(hash)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.[](hash)
|
17
|
+
ht = self.new
|
18
|
+
ht << hash
|
19
|
+
ht
|
20
|
+
end
|
21
|
+
|
22
|
+
def _dump(depth)
|
23
|
+
h = Hash[self]
|
24
|
+
h.delete_if {|k,v| v.is_a? Proc }
|
25
|
+
Marshal.dump(h)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self._load(*args)
|
29
|
+
h = Marshal.load(*args)
|
30
|
+
ht = self.new
|
31
|
+
ht.replace(h)
|
32
|
+
ht
|
33
|
+
end
|
34
|
+
|
35
|
+
# Follow the path specified, creating new nodes where necessary.
|
36
|
+
# Returns the value at the end of the path. If a block is supplied,
|
37
|
+
# it will be called with the last node and the last key as parameters,
|
38
|
+
# analogous to Hash.new's default proc. This is necessary to allow
|
39
|
+
# setting a value at the end of the path. See the implementation of #insert.
|
40
|
+
def create_path(sig)
|
41
|
+
final_key = sig.pop
|
42
|
+
hash = self
|
43
|
+
sig.each do |a|
|
44
|
+
hash = hash[a]
|
45
|
+
end
|
46
|
+
yield(hash, final_key) if block_given?
|
47
|
+
hash[final_key]
|
48
|
+
end
|
49
|
+
|
50
|
+
# Attempt to retrieve the value at the end of the path specified,
|
51
|
+
# without creating new nodes. Returns nil on failure.
|
52
|
+
# TODO: consider whether splatting the signature is wise.
|
53
|
+
def find(*sig)
|
54
|
+
stage = self
|
55
|
+
sig.each do |a|
|
56
|
+
if stage.has_key?(a)
|
57
|
+
stage = stage[a]
|
58
|
+
else
|
59
|
+
return nil
|
60
|
+
end
|
61
|
+
end
|
62
|
+
stage
|
63
|
+
end
|
64
|
+
|
65
|
+
def children(matcher=true)
|
66
|
+
next_keys = self.keys.select do |key|
|
67
|
+
match?(matcher, key)
|
68
|
+
end
|
69
|
+
self.values_at(*next_keys)
|
70
|
+
end
|
71
|
+
|
72
|
+
def +(other)
|
73
|
+
out = HashTree.new
|
74
|
+
_plus(other, out)
|
75
|
+
out
|
76
|
+
end
|
77
|
+
|
78
|
+
def _plus(ht2, out)
|
79
|
+
self.each do |k1,v1|
|
80
|
+
v1 = v1.respond_to?(:dup) ? v1 : v1.dup
|
81
|
+
if ht2.has_key?(k1)
|
82
|
+
v2 = ht2[k1]
|
83
|
+
if v1.respond_to?(:_plus)
|
84
|
+
out[k1] = v1
|
85
|
+
v1._plus(v2, out[k1])
|
86
|
+
elsif v2.respond_to?(:_plus)
|
87
|
+
raise ArgumentError,
|
88
|
+
"Can't merge leaf with non-leaf:\n#{v1.inspect}\n#{v2.inspect}"
|
89
|
+
else
|
90
|
+
if v2.is_a?(Numeric) && v1.is_a?(Numeric)
|
91
|
+
out[k1] = v1 + v2
|
92
|
+
else
|
93
|
+
out[k1] = [v1, ht2[k1]]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
else
|
97
|
+
# should anything happen here?
|
98
|
+
end
|
99
|
+
end
|
100
|
+
ht2.each do |k,v|
|
101
|
+
if self.has_key?(k)
|
102
|
+
# should anything happen here?
|
103
|
+
else
|
104
|
+
v = v.respond_to?(:dup) ? v : v.dup
|
105
|
+
out[k] = v
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def <<(other)
|
111
|
+
other.each do |k,v1|
|
112
|
+
if self.has_key?(k)
|
113
|
+
v2 = self[k]
|
114
|
+
if v1.respond_to?(:has_key?) && v2.respond_to?(:has_key?)
|
115
|
+
v2 << v1
|
116
|
+
elsif v1.is_a?(Numeric) && v2.is_a?(Numeric)
|
117
|
+
self[k] = v1 + v2
|
118
|
+
else
|
119
|
+
raise ArgumentError,
|
120
|
+
"Can't merge leaf with non-leaf:\n#{v1.inspect}\n#{v2.inspect}"
|
121
|
+
end
|
122
|
+
else
|
123
|
+
if v1.respond_to?(:has_key?)
|
124
|
+
self[k] << v1
|
125
|
+
else
|
126
|
+
self[k] = v1
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def match?(val, key)
|
133
|
+
case val
|
134
|
+
when true
|
135
|
+
true
|
136
|
+
when String, Symbol
|
137
|
+
key == val
|
138
|
+
when Regexp
|
139
|
+
key =~ val
|
140
|
+
when Proc
|
141
|
+
val.call(key)
|
142
|
+
when nil
|
143
|
+
false
|
144
|
+
else
|
145
|
+
raise ArgumentError, "Unexpected matcher type: #{val.inspect}"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
|
152
|
+
|
153
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class Squeeze
|
2
|
+
|
3
|
+
# Matchers are initialized with a pattern, to be used for
|
4
|
+
# retrieval in a HashTree. What makes Matcher special,
|
5
|
+
# really the only reason it exists, is that it overrides #eql?
|
6
|
+
# and #hash so that Hashes will treat two Matchers as the same
|
7
|
+
# object if their patterns are the same. This prevents
|
8
|
+
# redundancy when a HashTree creates its internal tree.
|
9
|
+
class Matcher
|
10
|
+
attr_reader :pattern
|
11
|
+
def initialize(pattern)
|
12
|
+
@pattern = pattern || true
|
13
|
+
end
|
14
|
+
|
15
|
+
def call(val)
|
16
|
+
@pattern == val || @pattern == true
|
17
|
+
end
|
18
|
+
|
19
|
+
def eql?(other)
|
20
|
+
other.kind_of?(self.class) && other.pattern == @pattern
|
21
|
+
end
|
22
|
+
|
23
|
+
def hash
|
24
|
+
@pattern.hash
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
class Squeeze
|
2
|
+
# Tree classes including this module must supply these methods:
|
3
|
+
#
|
4
|
+
# * #create_path(path_signature)
|
5
|
+
# * #find(path_signature)
|
6
|
+
# * #children(node_matcher)
|
7
|
+
module Traversable
|
8
|
+
|
9
|
+
# Follow or create the path specified by the signature and assign
|
10
|
+
# the value as a terminating leaf node.
|
11
|
+
#
|
12
|
+
# h.set([:a, :b, :c], "This is a retrievable value")
|
13
|
+
#
|
14
|
+
def set(sig, val)
|
15
|
+
raise ArgumentError if sig.empty?
|
16
|
+
create_path(sig) do |node, key|
|
17
|
+
node[key] = val
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def reduce(sig, base=0)
|
22
|
+
create_path(sig) do |node, key|
|
23
|
+
node[key] = base unless node.has_key?(key)
|
24
|
+
node[key] = yield node[key]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def increment(sig, val=1)
|
29
|
+
val = yield if block_given?
|
30
|
+
create_path(sig) do |node, key|
|
31
|
+
if node.has_key?(key)
|
32
|
+
node[key] = node[key] + val
|
33
|
+
else
|
34
|
+
node[key] = val
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Usage:
|
40
|
+
# a = ht.reducer([:a, :b, :c], 0) {|acc, v| acc + v }
|
41
|
+
# a[1]
|
42
|
+
def reducer(sig, base, &block)
|
43
|
+
p = nil
|
44
|
+
create_path(sig) do |node, key|
|
45
|
+
unless node.has_key?(key)
|
46
|
+
node[key] = base
|
47
|
+
end
|
48
|
+
p = lambda do |newval|
|
49
|
+
node[key] = block.call(node[key], newval)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
p
|
53
|
+
end
|
54
|
+
|
55
|
+
def sum(*args)
|
56
|
+
out = 0
|
57
|
+
retrieve(*args) { |v| out += v }
|
58
|
+
out
|
59
|
+
end
|
60
|
+
|
61
|
+
def count(*args)
|
62
|
+
args = args + [:_count]
|
63
|
+
sum(*args)
|
64
|
+
end
|
65
|
+
|
66
|
+
def unique(*args)
|
67
|
+
out = 0
|
68
|
+
filter(*args) { |v| out += v.size }
|
69
|
+
out
|
70
|
+
end
|
71
|
+
|
72
|
+
# like retrieve, but will return any kind of node
|
73
|
+
def filter(*sig)
|
74
|
+
results = []
|
75
|
+
search(sig) do |node|
|
76
|
+
results << node
|
77
|
+
yield(node) if block_given?
|
78
|
+
end
|
79
|
+
results
|
80
|
+
end
|
81
|
+
|
82
|
+
# Given a signature array, attempt to retrieve matching leaf values.
|
83
|
+
def retrieve(*sig)
|
84
|
+
results = []
|
85
|
+
search(sig) do |node|
|
86
|
+
results << node unless node.respond_to?(:children)
|
87
|
+
yield(node) if block_given?
|
88
|
+
end
|
89
|
+
results
|
90
|
+
end
|
91
|
+
|
92
|
+
# Generic tree search method
|
93
|
+
def search(sig)
|
94
|
+
current_nodes = [self]
|
95
|
+
|
96
|
+
while !current_nodes.empty?
|
97
|
+
next_nodes = []
|
98
|
+
matcher = sig.shift
|
99
|
+
if matcher
|
100
|
+
current_nodes.each do |node|
|
101
|
+
if node.respond_to?(:children)
|
102
|
+
next_nodes += node.children(matcher)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
else
|
106
|
+
current_nodes.each {|n| yield(n) }
|
107
|
+
end
|
108
|
+
current_nodes = next_nodes
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def traverse
|
113
|
+
current_nodes = [self]
|
114
|
+
while !current_nodes.empty?
|
115
|
+
next_nodes = []
|
116
|
+
current_nodes.each do |node|
|
117
|
+
if node.respond_to?(:children)
|
118
|
+
next_nodes += node.children(true)
|
119
|
+
yield(node)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
current_nodes = next_nodes
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
metadata
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: squeeze
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Matthew King
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2012-08-30 00:00:00 -05:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: riot
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 37
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
- 12
|
33
|
+
- 5
|
34
|
+
version: 0.12.5
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
description:
|
38
|
+
email:
|
39
|
+
executables: []
|
40
|
+
|
41
|
+
extensions: []
|
42
|
+
|
43
|
+
extra_rdoc_files: []
|
44
|
+
|
45
|
+
files:
|
46
|
+
- LICENSE
|
47
|
+
- README.md
|
48
|
+
- lib/squeeze/hash_tree.rb
|
49
|
+
- lib/squeeze/traversable.rb
|
50
|
+
- lib/squeeze/matcher.rb
|
51
|
+
- lib/squeeze.rb
|
52
|
+
has_rdoc: true
|
53
|
+
homepage: https://github.com/automatthew/squeeze
|
54
|
+
licenses: []
|
55
|
+
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
hash: 3
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
requirements: []
|
80
|
+
|
81
|
+
rubyforge_project:
|
82
|
+
rubygems_version: 1.6.2
|
83
|
+
signing_key:
|
84
|
+
specification_version: 3
|
85
|
+
summary: Tools for working with nested data structures in Ruby
|
86
|
+
test_files: []
|
87
|
+
|