tsuga 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +5 -0
  3. data/.rspec +3 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +12 -0
  6. data/Gemfile +16 -0
  7. data/Gemfile.lock +146 -0
  8. data/Guardfile +8 -0
  9. data/LICENSE.txt +22 -0
  10. data/README.md +161 -0
  11. data/Rakefile +1 -0
  12. data/lib/tsuga.rb +11 -0
  13. data/lib/tsuga/adapter.rb +4 -0
  14. data/lib/tsuga/adapter/active_record/base.rb +61 -0
  15. data/lib/tsuga/adapter/active_record/cluster.rb +52 -0
  16. data/lib/tsuga/adapter/active_record/migration.rb +50 -0
  17. data/lib/tsuga/adapter/active_record/record.rb +15 -0
  18. data/lib/tsuga/adapter/active_record/test.rb +73 -0
  19. data/lib/tsuga/adapter/memory/base.rb +146 -0
  20. data/lib/tsuga/adapter/memory/cluster.rb +32 -0
  21. data/lib/tsuga/adapter/memory/test.rb +27 -0
  22. data/lib/tsuga/adapter/mongoid/base.rb +41 -0
  23. data/lib/tsuga/adapter/mongoid/cluster.rb +29 -0
  24. data/lib/tsuga/adapter/mongoid/record.rb +16 -0
  25. data/lib/tsuga/adapter/mongoid/test.rb +77 -0
  26. data/lib/tsuga/adapter/sequel/base.rb +57 -0
  27. data/lib/tsuga/adapter/sequel/cluster.rb +43 -0
  28. data/lib/tsuga/adapter/sequel/record.rb +15 -0
  29. data/lib/tsuga/adapter/sequel/test.rb +73 -0
  30. data/lib/tsuga/adapter/shared.rb +4 -0
  31. data/lib/tsuga/adapter/shared/cluster.rb +19 -0
  32. data/lib/tsuga/errors.rb +3 -0
  33. data/lib/tsuga/model/cluster.rb +147 -0
  34. data/lib/tsuga/model/point.rb +206 -0
  35. data/lib/tsuga/model/record.rb +20 -0
  36. data/lib/tsuga/model/tile.rb +136 -0
  37. data/lib/tsuga/service/aggregator.rb +175 -0
  38. data/lib/tsuga/service/clusterer.rb +260 -0
  39. data/lib/tsuga/service/labeler.rb +20 -0
  40. data/lib/tsuga/version.rb +3 -0
  41. data/script/benchmark-aggregator.rb +72 -0
  42. data/script/benchmark-clusterer.rb +102 -0
  43. data/spec/adapter/memory/base_spec.rb +174 -0
  44. data/spec/adapter/memory/cluster_spec.rb +39 -0
  45. data/spec/adapter/shared/cluster_spec.rb +56 -0
  46. data/spec/integration/active_record_spec.rb +10 -0
  47. data/spec/integration/memory_spec.rb +10 -0
  48. data/spec/integration/mongoid_spec.rb +10 -0
  49. data/spec/integration/sequel_spec.rb +10 -0
  50. data/spec/integration/shared.rb +50 -0
  51. data/spec/model/point_spec.rb +102 -0
  52. data/spec/model/tile_spec.rb +116 -0
  53. data/spec/service/aggregator_spec.rb +143 -0
  54. data/spec/service/clusterer_spec.rb +84 -0
  55. data/spec/spec_helper.rb +26 -0
  56. data/spec/support/mongoid.yml +17 -0
  57. data/tsuga.gemspec +29 -0
  58. metadata +226 -0
@@ -0,0 +1,206 @@
1
+ require 'tsuga'
2
+
3
+ module Tsuga::Model
4
+
5
+ # Represents a position in the 0..1 x 0..1 square, modeling points on the
6
+ # Earth as represented by their longitude/latitude coordinates.
7
+ #
8
+ # Concretions have the following accessors:
9
+ # - :geohash (64-bit integer)
10
+ #
11
+ # - :lat (float, -90..90)
12
+ # - :lng (float, -180..180)
13
+ #
14
+ module PointTrait
15
+
16
+
17
+
18
+ def distance_to(other)
19
+ Math.sqrt((self.lat - other.lat) ** 2 + (self.lng - other.lng) ** 2)
20
+ end
21
+
22
+
23
+ def =~(other)
24
+ self.geohash == other.geohash
25
+ end
26
+
27
+
28
+ def &(other)
29
+ distance_to(other)
30
+ end
31
+
32
+
33
+ def geohash=(value)
34
+ super(value)
35
+ _updating_coords { _set_latlng_from_geohash }
36
+ geohash
37
+ end
38
+
39
+
40
+ def lat=(value)
41
+ _validate_lat(value) if value
42
+ super(value)
43
+ _updating_coords { _set_geohash_from_latlng }
44
+ lat
45
+ end
46
+
47
+
48
+ def lng=(value)
49
+ _validate_lng(value) if value
50
+ super(value)
51
+ _updating_coords { _set_geohash_from_latlng }
52
+ lng
53
+ end
54
+
55
+
56
+ def inspect
57
+ "<%s lat:%s lng:%s geohash:%s>" % [
58
+ (self.class.name || 'Point').gsub(/.*::/, ''),
59
+ lat ? ("%.3f" % lat) : 'nil',
60
+ lng ? ("%.3f" % lng) : 'nil',
61
+ geohash ? geohash : 'nil'
62
+ ]
63
+ end
64
+
65
+ def prefix(depth)
66
+ geohash[0...depth]
67
+ end
68
+
69
+ private
70
+
71
+ # only the outmost call yields.
72
+ # prevents infinite loops of latlng <-> geohash updates
73
+ def _updating_coords
74
+ return if @_updating
75
+ @_updating = true
76
+ yield
77
+ @_updating = false
78
+ end
79
+
80
+
81
+ def _validate_lat(_lat)
82
+ raise ArgumentError, 'bad lat' unless ( -90.0 ... 90.0).include?(_lat)
83
+ end
84
+
85
+ def _validate_lng(_lng)
86
+ raise ArgumentError, 'bad lng' unless (-180.0 ... 180.0).include?(_lng)
87
+ end
88
+
89
+
90
+ def _validate_geohash(value)
91
+ raise ArgumentError, 'bad geohash' unless /^[0-3]{32}$/ =~ value
92
+ end
93
+
94
+
95
+ def _geohash_to_int(value)
96
+ value.to_i(4)
97
+ end
98
+
99
+ def _int_to_geohash(value)
100
+ value.to_s(4).rjust(32,'0')
101
+ end
102
+
103
+ # Convert the geohash into lat/lng
104
+ def _set_latlng_from_geohash
105
+ geohash = self.geohash
106
+ if geohash.nil?
107
+ self.lat = self.lng = nil
108
+ return
109
+ end
110
+ _validate_geohash(geohash)
111
+
112
+ geohash_i = _geohash_to_int(geohash)
113
+ lat,lng = _deinterleave_bits(geohash_i)
114
+ lat = lat * 180.0 / (1<<32) - 90.0
115
+ lng = lng * 360.0 / (1<<32) - 180.0
116
+ self.lat = lat
117
+ self.lng = lng
118
+ return
119
+ end
120
+
121
+
122
+ def _set_geohash_from_latlng
123
+ lat = self.lat
124
+ lng = self.lng
125
+ if lat.nil? || lng.nil?
126
+ self.geohash = nil
127
+ return
128
+ end
129
+ _validate_lat(lat)
130
+ _validate_lng(lng)
131
+ normalized_lat = ((lat + 90.0) * (1<<32) / 180.0).to_i
132
+ normalized_lng = ((lng + 180.0) * (1<<32) / 360.0).to_i
133
+
134
+ geohash_i = _interleave_bits(normalized_lat, normalized_lng)
135
+ self.geohash = _int_to_geohash(geohash_i)
136
+ return
137
+ end
138
+
139
+
140
+ def _interleave_bits(a,b)
141
+ (_interleave_bits_16b(a >> 16, b >> 16) << 32) |
142
+ (_interleave_bits_16b(a & 0xffff, b & 0xffff))
143
+ end
144
+
145
+
146
+ def _deinterleave_bits(z)
147
+ x_hi, y_hi = _deinterleave_bits_16b(z >> 32)
148
+ x_lo, y_lo = _deinterleave_bits_16b(z & 0xFFFFFFFF)
149
+
150
+ [((x_hi << 16) | x_lo), ((y_hi << 16) | y_lo)]
151
+ end
152
+
153
+
154
+ Magic = [0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF]
155
+
156
+ # Interleave lower 16 bits of x and y, so the bits of x
157
+ # are in the even positions and bits from y in the odd;
158
+ # z gets the resulting 32-bit Morton Number.
159
+ # x and y must initially be less than 65536.
160
+ # Rubyfied from http://graphics.stanford.edu/~seander/bithacks.html
161
+ def _interleave_bits_16b(x,y)
162
+ x = (x | (x << 8)) & Magic[3]
163
+ x = (x | (x << 4)) & Magic[2]
164
+ x = (x | (x << 2)) & Magic[1]
165
+ x = (x | (x << 1)) & Magic[0]
166
+ y = (y | (y << 8)) & Magic[3]
167
+ y = (y | (y << 4)) & Magic[2]
168
+ y = (y | (y << 2)) & Magic[1]
169
+ y = (y | (y << 1)) & Magic[0]
170
+ z = x | (y << 1)
171
+ end
172
+
173
+ # Deinterleave even bits and odd bits (resp.) to a 2-tuple.
174
+ # Rubyfied from http://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
175
+ def _deinterleave_bits_16b(z)
176
+ [_even_bits(z), _even_bits(z >> 1)]
177
+ end
178
+
179
+
180
+ def _even_bits(z)
181
+ x = z & 0x55555555
182
+ x = (x ^ (x >> 1)) & 0x33333333
183
+ x = (x ^ (x >> 2)) & 0x0f0f0f0f
184
+ x = (x ^ (x >> 4)) & 0x00ff00ff
185
+ x = (x ^ (x >> 8)) & 0x0000ffff
186
+ end
187
+ end
188
+
189
+
190
+ class Point
191
+ module Fields
192
+ attr_accessor :lat, :lng, :geohash
193
+ end
194
+ include Fields
195
+ include PointTrait
196
+
197
+ def initialize(geohash: nil, lat: nil, lng: nil)
198
+ if geohash
199
+ self.geohash = geohash
200
+ else
201
+ self.lat = lat
202
+ self.lng = lng
203
+ end
204
+ end
205
+ end
206
+ end
@@ -0,0 +1,20 @@
1
+ require 'tsuga'
2
+ require 'tsuga/model/point'
3
+
4
+ module Tsuga::Model
5
+ # Concretions have the following accessors:
6
+ # (same as Point)
7
+ #
8
+ # And respond to class methods:
9
+ # - :find(id)
10
+ # - :collect_ids (returns a Set)
11
+ #
12
+ module Record
13
+ include Tsuga::Model::PointTrait
14
+
15
+ def update_geohash
16
+ self.geohash
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,136 @@
1
+ require 'tsuga'
2
+ require 'tsuga/model/point'
3
+
4
+ module Tsuga::Model
5
+ class Tile
6
+ # corner points
7
+ attr_reader :southwest, :northeast
8
+
9
+ # level in the tile tree, also number of relevant high bits
10
+ # in the geohash.
11
+ attr_reader :depth
12
+
13
+ # geohash prefix
14
+ attr_reader :prefix
15
+
16
+ WIGGLE_FACTOR = 1e-4
17
+
18
+ def initialize(prefix:nil)
19
+ raise ArgumentError, 'bad prefix' if prefix !~ /^[0-3]{1,32}$/
20
+ @prefix = prefix
21
+ @depth = prefix.length
22
+ @southwest = Point.new(geohash: prefix.ljust(32, '0'))
23
+ @northeast = Point.new(geohash: prefix.ljust(32, '3'))
24
+ end
25
+
26
+ def contains?(point)
27
+ point.geohash.start_with?(@prefix)
28
+ end
29
+
30
+ def dlat(count = 1)
31
+ (northeast.lat - southwest.lat) * (count + WIGGLE_FACTOR)
32
+ end
33
+
34
+ def dlng(count = 1)
35
+ (northeast.lng - southwest.lng) * (count + WIGGLE_FACTOR)
36
+ end
37
+
38
+ # return the 4 children of this tile
39
+ def children
40
+ %w(0 1 2 3).map { |quadrant|
41
+ self.class.new(prefix: @prefix + quadrant)
42
+ }
43
+ end
44
+
45
+ # return a neighouring tile offset in tile increments
46
+ # TODO: this could be implemented using bit logic
47
+ def neighbour(lat:0, lng:0)
48
+ new_point = Point.new(
49
+ lat: southwest.lat + dlat(lat),
50
+ lng: southwest.lng + dlng(lng))
51
+ Tile.including(new_point, depth: depth)
52
+ end
53
+
54
+ # return neighbouring tiles to the north, northeast, and east
55
+ def neighbours
56
+ offsets = (-1..1).to_a.product((-1..1).to_a)
57
+ offsets.map do |lat, lng|
58
+ begin
59
+ neighbour(lat:lat, lng:lng)
60
+ rescue ArgumentError
61
+ nil # occurs on world boundaries
62
+ end
63
+ end.compact
64
+ end
65
+
66
+ def inspect
67
+ "<%s depth:%d prefix:%s>" % [
68
+ (self.class.name || 'Tile'),
69
+ depth, prefix
70
+ ]
71
+ end
72
+
73
+ module ClassMethods
74
+ # Returns a Tile instance.
75
+ # +point+ should respond to +geohash+.
76
+ # Options:
77
+ # - :depth
78
+ def including(point, options={})
79
+ depth = options[:depth]
80
+ raise ArgumentError, 'bad depth' unless (0..31).include?(depth)
81
+
82
+ new(prefix: point.prefix(depth))
83
+ end
84
+
85
+ # Return an array of Tile instances that encloses both corner points
86
+ # FIXME: this is untested
87
+ def enclosing_viewport(point_ne:nil, point_sw:nil, depth:nil)
88
+ # $stderr.puts "aiming to enclose:"
89
+ # $stderr.puts "%.2f %.2f -> %.2f %.2f" % [point_ne.lat, point_ne.lng, point_sw.lat, point_sw.lng]
90
+ # $stderr.flush
91
+
92
+ tiles = []
93
+ first_tile = including(point_sw, depth:depth)
94
+
95
+ offset_lat = 0
96
+ loop do
97
+ offset_lng = 0
98
+ loop do
99
+ # $stderr.puts("offset: #{offset_lat} #{offset_lng}")
100
+ # $stderr.flush
101
+ new_tile = first_tile.neighbour(lat:offset_lat, lng:offset_lng)
102
+ tiles << new_tile
103
+
104
+ # $stderr.puts "%.2f %.2f -> %.2f %.2f" % [new_tile.southwest.lat, new_tile.southwest.lng, new_tile.northeast.lat, new_tile.northeast.lng]
105
+ # $stderr.flush
106
+
107
+ offset_lng += 1
108
+ break if tiles.last.northeast.lng >= point_ne.lng
109
+ end
110
+ break if tiles.last.northeast.lat >= point_ne.lat
111
+ offset_lat += 1
112
+ offset_lng = 0
113
+ end
114
+
115
+ return tiles
116
+ end
117
+ end
118
+ extend ClassMethods
119
+ end
120
+ end
121
+
122
+ __END__
123
+
124
+ load 'lib/tsuga/model/tile.rb'
125
+
126
+ # {"n"=>"41.41169761785169", "e"=>"2.2055472226562642", "s"=>"41.33015287320352", "w"=>"2.107700237792983", "z"=>"3"
127
+
128
+
129
+ sw = Tsuga::Model::Point.new(lat: 41.33015287320352, lng: 2.107700237792983)
130
+ ne = Tsuga::Model::Point.new(lat: 41.41169761785169, lng: 2.2055472226562642)
131
+
132
+ Tsuga::Model::Tile.including(sw, depth: 7)
133
+ Tsuga::Model::Tile.including(ne, depth: 7)
134
+
135
+ Tsuga::Model::Tile.enclosing_viewport(point_sw:sw, point_ne:ne, depth:7).length
136
+
@@ -0,0 +1,175 @@
1
+ require 'set'
2
+ require 'tsuga/model/point'
3
+ require 'tsuga/model/tile'
4
+
5
+ module Tsuga::Service
6
+
7
+ # Aggregates clusters together until no two clusters are closer than
8
+ # a given minimum distance.
9
+ class Aggregator
10
+ # - clusters (Array): list of points to aggregate
11
+ # - fence (Tile): clusters outside this will not be aggregated
12
+ # - ratio (0..1): minimum distance between clusters after aggregation,
13
+ # as a ratio of the tile diagonal
14
+ def initialize(clusters:nil, ratio:nil, fence:nil)
15
+ @_clusters = clusters
16
+ @_fence = fence || _default_fence
17
+ @min_distance_ratio = ratio # fraction of tile diagonal
18
+ @_dropped_clusters = IdSet.new
19
+ @_updated_clusters = IdSet.new
20
+ end
21
+
22
+ def run
23
+ return if _clusters.empty?
24
+ warn "warning: running aggregation on many clusters (#{_clusters.size})" if _clusters.size > 100
25
+
26
+ if DENSITY_BIAS_FACTOR
27
+ @min_density, @max_density = _clusters.collect(&:density).minmax
28
+ end
29
+
30
+ # build the set of pairs (n²/2)
31
+ pairs = []
32
+ source = _clusters.dup
33
+ while left = source.pop
34
+ source.each do |right|
35
+ pairs << _build_pair(left, right, _fence)
36
+ end
37
+ end
38
+
39
+ # pop & merge
40
+ while pairs.any?
41
+ best_pair = pairs.min
42
+ break if best_pair.distance > min_distance
43
+
44
+ # remove the closest pair
45
+ left, right = best_pair.values
46
+ left_id = left.id
47
+ right_id = right.id
48
+
49
+ # remove pairs containing one of the items
50
+ pairs.delete_if { |p| p.has?(left) || p.has?(right) }
51
+
52
+ # merge clusters
53
+ left.merge(right)
54
+ _clusters.delete_if { |c| c.id == right_id }
55
+ _updated_clusters.remove right
56
+ _dropped_clusters.add right
57
+ _updated_clusters.add left
58
+
59
+ # create new pairs
60
+ _clusters.each do |cluster|
61
+ next if cluster.id == left_id
62
+ pairs << _build_pair(left, cluster, _fence)
63
+ end
64
+ end
65
+ nil
66
+ end
67
+
68
+ # after #run, this contains the clusters that were merged into other clusters
69
+ def dropped_clusters
70
+ _dropped_clusters.to_a
71
+ end
72
+
73
+ # after #run, this contains the clusters that were modified and need to be persisted
74
+ def updated_clusters
75
+ _updated_clusters.to_a
76
+ end
77
+
78
+ # fraction of the diagonal of the fence tile
79
+ def min_distance
80
+ @min_distance ||= (_fence.southwest & _fence.northeast) * @min_distance_ratio
81
+ end
82
+
83
+ private
84
+
85
+ # FIXME: a sensible value would be ~0.4 in theory, but this
86
+ # biasing seems to have little impact. remove?
87
+ DENSITY_BIAS_FACTOR = nil
88
+
89
+ attr_reader :_clusters, :_fence, :_dropped_clusters, :_updated_clusters
90
+
91
+ # factory for pairs, switches between fenced/unfenced
92
+ # and conditionnaly adds density bias
93
+ def _build_pair(c1, c2, fence)
94
+ pair = fence.nil? ? Pair.new(c1, c2) : FencedPair.new(c1, c2, fence)
95
+
96
+ if DENSITY_BIAS_FACTOR && (@max_density != @min_density)
97
+ # the least dense cluster pairs have a density_bias value close to 0, the densest closer to 1
98
+ density_bias = (c1.density + c2.density - 2 * @min_density) / (2 * (@max_density - @min_density))
99
+ # this makes dense clusters appear closer, and vice-versa
100
+ pair.distance = pair.distance * (1 + DENSITY_BIAS_FACTOR * (1 - density_bias) - 0.5 * DENSITY_BIAS_FACTOR)
101
+ end
102
+ pair
103
+ end
104
+
105
+ def _default_fence
106
+ return if _clusters.empty?
107
+ Tsuga::Model::Tile.including(_clusters.first, depth:_clusters.first.depth)
108
+ end
109
+
110
+ # model a pair of clusters such as [a,b] == [b,a]
111
+ # and comparison is based on distance
112
+ class Pair
113
+ include Comparable
114
+ attr_accessor :distance
115
+
116
+ def initialize(c1, c2)
117
+ @left = c1
118
+ @right = c2
119
+ @left_id = c1.id
120
+ @right_id = c2.id
121
+ @distance = (@left & @right)
122
+
123
+ raise ArgumentError, 'pair elements must be distinct' if @left_id == @right_id
124
+ end
125
+
126
+ def <=>(other)
127
+ self.distance <=> other.distance
128
+ end
129
+
130
+ # def ==(other)
131
+ # (self.left_id == other.left_id) && (self.right_id == other.right_id)
132
+ # end
133
+
134
+ def values
135
+ [@left, @right]
136
+ end
137
+
138
+ def has?(c)
139
+ c_id = c.id
140
+ (@left_id == c_id) || (@right_id == c_id)
141
+ end
142
+ end
143
+
144
+ # pairs where both points fall outside the fence are considered "at horizon"
145
+ # i.e. their distance infinite. the point is to never aggregate them.
146
+ class FencedPair < Pair
147
+ def initialize(c1, c2, fence)
148
+ super(c1, c2)
149
+ @outside = !fence.contains?(c1) && !fence.contains?(c2)
150
+ end
151
+
152
+ def distance
153
+ @outside ? Float::MAX : super
154
+ end
155
+ end
156
+
157
+ class IdSet
158
+ def initialize
159
+ @data = {}
160
+ end
161
+
162
+ def add(item)
163
+ @data[item.id] = item
164
+ end
165
+
166
+ def remove(item)
167
+ @data.delete(item.id)
168
+ end
169
+
170
+ def to_a
171
+ @data.values
172
+ end
173
+ end
174
+ end
175
+ end