tsuga 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +5 -0
  3. data/.rspec +3 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +12 -0
  6. data/Gemfile +16 -0
  7. data/Gemfile.lock +146 -0
  8. data/Guardfile +8 -0
  9. data/LICENSE.txt +22 -0
  10. data/README.md +161 -0
  11. data/Rakefile +1 -0
  12. data/lib/tsuga.rb +11 -0
  13. data/lib/tsuga/adapter.rb +4 -0
  14. data/lib/tsuga/adapter/active_record/base.rb +61 -0
  15. data/lib/tsuga/adapter/active_record/cluster.rb +52 -0
  16. data/lib/tsuga/adapter/active_record/migration.rb +50 -0
  17. data/lib/tsuga/adapter/active_record/record.rb +15 -0
  18. data/lib/tsuga/adapter/active_record/test.rb +73 -0
  19. data/lib/tsuga/adapter/memory/base.rb +146 -0
  20. data/lib/tsuga/adapter/memory/cluster.rb +32 -0
  21. data/lib/tsuga/adapter/memory/test.rb +27 -0
  22. data/lib/tsuga/adapter/mongoid/base.rb +41 -0
  23. data/lib/tsuga/adapter/mongoid/cluster.rb +29 -0
  24. data/lib/tsuga/adapter/mongoid/record.rb +16 -0
  25. data/lib/tsuga/adapter/mongoid/test.rb +77 -0
  26. data/lib/tsuga/adapter/sequel/base.rb +57 -0
  27. data/lib/tsuga/adapter/sequel/cluster.rb +43 -0
  28. data/lib/tsuga/adapter/sequel/record.rb +15 -0
  29. data/lib/tsuga/adapter/sequel/test.rb +73 -0
  30. data/lib/tsuga/adapter/shared.rb +4 -0
  31. data/lib/tsuga/adapter/shared/cluster.rb +19 -0
  32. data/lib/tsuga/errors.rb +3 -0
  33. data/lib/tsuga/model/cluster.rb +147 -0
  34. data/lib/tsuga/model/point.rb +206 -0
  35. data/lib/tsuga/model/record.rb +20 -0
  36. data/lib/tsuga/model/tile.rb +136 -0
  37. data/lib/tsuga/service/aggregator.rb +175 -0
  38. data/lib/tsuga/service/clusterer.rb +260 -0
  39. data/lib/tsuga/service/labeler.rb +20 -0
  40. data/lib/tsuga/version.rb +3 -0
  41. data/script/benchmark-aggregator.rb +72 -0
  42. data/script/benchmark-clusterer.rb +102 -0
  43. data/spec/adapter/memory/base_spec.rb +174 -0
  44. data/spec/adapter/memory/cluster_spec.rb +39 -0
  45. data/spec/adapter/shared/cluster_spec.rb +56 -0
  46. data/spec/integration/active_record_spec.rb +10 -0
  47. data/spec/integration/memory_spec.rb +10 -0
  48. data/spec/integration/mongoid_spec.rb +10 -0
  49. data/spec/integration/sequel_spec.rb +10 -0
  50. data/spec/integration/shared.rb +50 -0
  51. data/spec/model/point_spec.rb +102 -0
  52. data/spec/model/tile_spec.rb +116 -0
  53. data/spec/service/aggregator_spec.rb +143 -0
  54. data/spec/service/clusterer_spec.rb +84 -0
  55. data/spec/spec_helper.rb +26 -0
  56. data/spec/support/mongoid.yml +17 -0
  57. data/tsuga.gemspec +29 -0
  58. metadata +226 -0
@@ -0,0 +1,206 @@
1
+ require 'tsuga'
2
+
3
+ module Tsuga::Model
4
+
5
+ # Represents a position in the 0..1 x 0..1 square, modeling points on the
6
+ # Earth as represented by their longitude/latitude coordinates.
7
+ #
8
+ # Concretions have the following accessors:
9
+ # - :geohash (64-bit integer)
10
+ #
11
+ # - :lat (float, -90..90)
12
+ # - :lng (float, -180..180)
13
+ #
14
+ module PointTrait
15
+
16
+
17
+
18
+ def distance_to(other)
19
+ Math.sqrt((self.lat - other.lat) ** 2 + (self.lng - other.lng) ** 2)
20
+ end
21
+
22
+
23
+ def =~(other)
24
+ self.geohash == other.geohash
25
+ end
26
+
27
+
28
+ def &(other)
29
+ distance_to(other)
30
+ end
31
+
32
+
33
+ def geohash=(value)
34
+ super(value)
35
+ _updating_coords { _set_latlng_from_geohash }
36
+ geohash
37
+ end
38
+
39
+
40
+ def lat=(value)
41
+ _validate_lat(value) if value
42
+ super(value)
43
+ _updating_coords { _set_geohash_from_latlng }
44
+ lat
45
+ end
46
+
47
+
48
+ def lng=(value)
49
+ _validate_lng(value) if value
50
+ super(value)
51
+ _updating_coords { _set_geohash_from_latlng }
52
+ lng
53
+ end
54
+
55
+
56
+ def inspect
57
+ "<%s lat:%s lng:%s geohash:%s>" % [
58
+ (self.class.name || 'Point').gsub(/.*::/, ''),
59
+ lat ? ("%.3f" % lat) : 'nil',
60
+ lng ? ("%.3f" % lng) : 'nil',
61
+ geohash ? geohash : 'nil'
62
+ ]
63
+ end
64
+
65
+ def prefix(depth)
66
+ geohash[0...depth]
67
+ end
68
+
69
+ private
70
+
71
+ # only the outmost call yields.
72
+ # prevents infinite loops of latlng <-> geohash updates
73
+ def _updating_coords
74
+ return if @_updating
75
+ @_updating = true
76
+ yield
77
+ @_updating = false
78
+ end
79
+
80
+
81
+ def _validate_lat(_lat)
82
+ raise ArgumentError, 'bad lat' unless ( -90.0 ... 90.0).include?(_lat)
83
+ end
84
+
85
+ def _validate_lng(_lng)
86
+ raise ArgumentError, 'bad lng' unless (-180.0 ... 180.0).include?(_lng)
87
+ end
88
+
89
+
90
+ def _validate_geohash(value)
91
+ raise ArgumentError, 'bad geohash' unless /^[0-3]{32}$/ =~ value
92
+ end
93
+
94
+
95
+ def _geohash_to_int(value)
96
+ value.to_i(4)
97
+ end
98
+
99
+ def _int_to_geohash(value)
100
+ value.to_s(4).rjust(32,'0')
101
+ end
102
+
103
+ # Convert the geohash into lat/lng
104
+ def _set_latlng_from_geohash
105
+ geohash = self.geohash
106
+ if geohash.nil?
107
+ self.lat = self.lng = nil
108
+ return
109
+ end
110
+ _validate_geohash(geohash)
111
+
112
+ geohash_i = _geohash_to_int(geohash)
113
+ lat,lng = _deinterleave_bits(geohash_i)
114
+ lat = lat * 180.0 / (1<<32) - 90.0
115
+ lng = lng * 360.0 / (1<<32) - 180.0
116
+ self.lat = lat
117
+ self.lng = lng
118
+ return
119
+ end
120
+
121
+
122
+ def _set_geohash_from_latlng
123
+ lat = self.lat
124
+ lng = self.lng
125
+ if lat.nil? || lng.nil?
126
+ self.geohash = nil
127
+ return
128
+ end
129
+ _validate_lat(lat)
130
+ _validate_lng(lng)
131
+ normalized_lat = ((lat + 90.0) * (1<<32) / 180.0).to_i
132
+ normalized_lng = ((lng + 180.0) * (1<<32) / 360.0).to_i
133
+
134
+ geohash_i = _interleave_bits(normalized_lat, normalized_lng)
135
+ self.geohash = _int_to_geohash(geohash_i)
136
+ return
137
+ end
138
+
139
+
140
+ def _interleave_bits(a,b)
141
+ (_interleave_bits_16b(a >> 16, b >> 16) << 32) |
142
+ (_interleave_bits_16b(a & 0xffff, b & 0xffff))
143
+ end
144
+
145
+
146
+ def _deinterleave_bits(z)
147
+ x_hi, y_hi = _deinterleave_bits_16b(z >> 32)
148
+ x_lo, y_lo = _deinterleave_bits_16b(z & 0xFFFFFFFF)
149
+
150
+ [((x_hi << 16) | x_lo), ((y_hi << 16) | y_lo)]
151
+ end
152
+
153
+
154
+ Magic = [0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF]
155
+
156
+ # Interleave lower 16 bits of x and y, so the bits of x
157
+ # are in the even positions and bits from y in the odd;
158
+ # z gets the resulting 32-bit Morton Number.
159
+ # x and y must initially be less than 65536.
160
+ # Rubyfied from http://graphics.stanford.edu/~seander/bithacks.html
161
+ def _interleave_bits_16b(x,y)
162
+ x = (x | (x << 8)) & Magic[3]
163
+ x = (x | (x << 4)) & Magic[2]
164
+ x = (x | (x << 2)) & Magic[1]
165
+ x = (x | (x << 1)) & Magic[0]
166
+ y = (y | (y << 8)) & Magic[3]
167
+ y = (y | (y << 4)) & Magic[2]
168
+ y = (y | (y << 2)) & Magic[1]
169
+ y = (y | (y << 1)) & Magic[0]
170
+ z = x | (y << 1)
171
+ end
172
+
173
+ # Deinterleave even bits and odd bits (resp.) to a 2-tuple.
174
+ # Rubyfied from http://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
175
+ def _deinterleave_bits_16b(z)
176
+ [_even_bits(z), _even_bits(z >> 1)]
177
+ end
178
+
179
+
180
+ def _even_bits(z)
181
+ x = z & 0x55555555
182
+ x = (x ^ (x >> 1)) & 0x33333333
183
+ x = (x ^ (x >> 2)) & 0x0f0f0f0f
184
+ x = (x ^ (x >> 4)) & 0x00ff00ff
185
+ x = (x ^ (x >> 8)) & 0x0000ffff
186
+ end
187
+ end
188
+
189
+
190
+ class Point
191
+ module Fields
192
+ attr_accessor :lat, :lng, :geohash
193
+ end
194
+ include Fields
195
+ include PointTrait
196
+
197
+ def initialize(geohash: nil, lat: nil, lng: nil)
198
+ if geohash
199
+ self.geohash = geohash
200
+ else
201
+ self.lat = lat
202
+ self.lng = lng
203
+ end
204
+ end
205
+ end
206
+ end
@@ -0,0 +1,20 @@
1
+ require 'tsuga'
2
+ require 'tsuga/model/point'
3
+
4
+ module Tsuga::Model
5
+ # Concretions have the following accessors:
6
+ # (same as Point)
7
+ #
8
+ # And respond to class methods:
9
+ # - :find(id)
10
+ # - :collect_ids (returns a Set)
11
+ #
12
+ module Record
13
+ include Tsuga::Model::PointTrait
14
+
15
+ def update_geohash
16
+ self.geohash
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,136 @@
1
+ require 'tsuga'
2
+ require 'tsuga/model/point'
3
+
4
+ module Tsuga::Model
5
+ class Tile
6
+ # corner points
7
+ attr_reader :southwest, :northeast
8
+
9
+ # level in the tile tree, also number of relevant high bits
10
+ # in the geohash.
11
+ attr_reader :depth
12
+
13
+ # geohash prefix
14
+ attr_reader :prefix
15
+
16
+ WIGGLE_FACTOR = 1e-4
17
+
18
+ def initialize(prefix:nil)
19
+ raise ArgumentError, 'bad prefix' if prefix !~ /^[0-3]{1,32}$/
20
+ @prefix = prefix
21
+ @depth = prefix.length
22
+ @southwest = Point.new(geohash: prefix.ljust(32, '0'))
23
+ @northeast = Point.new(geohash: prefix.ljust(32, '3'))
24
+ end
25
+
26
+ def contains?(point)
27
+ point.geohash.start_with?(@prefix)
28
+ end
29
+
30
+ def dlat(count = 1)
31
+ (northeast.lat - southwest.lat) * (count + WIGGLE_FACTOR)
32
+ end
33
+
34
+ def dlng(count = 1)
35
+ (northeast.lng - southwest.lng) * (count + WIGGLE_FACTOR)
36
+ end
37
+
38
+ # return the 4 children of this tile
39
+ def children
40
+ %w(0 1 2 3).map { |quadrant|
41
+ self.class.new(prefix: @prefix + quadrant)
42
+ }
43
+ end
44
+
45
+ # return a neighouring tile offset in tile increments
46
+ # TODO: this could be implemented using bit logic
47
+ def neighbour(lat:0, lng:0)
48
+ new_point = Point.new(
49
+ lat: southwest.lat + dlat(lat),
50
+ lng: southwest.lng + dlng(lng))
51
+ Tile.including(new_point, depth: depth)
52
+ end
53
+
54
+ # return neighbouring tiles to the north, northeast, and east
55
+ def neighbours
56
+ offsets = (-1..1).to_a.product((-1..1).to_a)
57
+ offsets.map do |lat, lng|
58
+ begin
59
+ neighbour(lat:lat, lng:lng)
60
+ rescue ArgumentError
61
+ nil # occurs on world boundaries
62
+ end
63
+ end.compact
64
+ end
65
+
66
+ def inspect
67
+ "<%s depth:%d prefix:%s>" % [
68
+ (self.class.name || 'Tile'),
69
+ depth, prefix
70
+ ]
71
+ end
72
+
73
+ module ClassMethods
74
+ # Returns a Tile instance.
75
+ # +point+ should respond to +geohash+.
76
+ # Options:
77
+ # - :depth
78
+ def including(point, options={})
79
+ depth = options[:depth]
80
+ raise ArgumentError, 'bad depth' unless (0..31).include?(depth)
81
+
82
+ new(prefix: point.prefix(depth))
83
+ end
84
+
85
+ # Return an array of Tile instances that encloses both corner points
86
+ # FIXME: this is untested
87
+ def enclosing_viewport(point_ne:nil, point_sw:nil, depth:nil)
88
+ # $stderr.puts "aiming to enclose:"
89
+ # $stderr.puts "%.2f %.2f -> %.2f %.2f" % [point_ne.lat, point_ne.lng, point_sw.lat, point_sw.lng]
90
+ # $stderr.flush
91
+
92
+ tiles = []
93
+ first_tile = including(point_sw, depth:depth)
94
+
95
+ offset_lat = 0
96
+ loop do
97
+ offset_lng = 0
98
+ loop do
99
+ # $stderr.puts("offset: #{offset_lat} #{offset_lng}")
100
+ # $stderr.flush
101
+ new_tile = first_tile.neighbour(lat:offset_lat, lng:offset_lng)
102
+ tiles << new_tile
103
+
104
+ # $stderr.puts "%.2f %.2f -> %.2f %.2f" % [new_tile.southwest.lat, new_tile.southwest.lng, new_tile.northeast.lat, new_tile.northeast.lng]
105
+ # $stderr.flush
106
+
107
+ offset_lng += 1
108
+ break if tiles.last.northeast.lng >= point_ne.lng
109
+ end
110
+ break if tiles.last.northeast.lat >= point_ne.lat
111
+ offset_lat += 1
112
+ offset_lng = 0
113
+ end
114
+
115
+ return tiles
116
+ end
117
+ end
118
+ extend ClassMethods
119
+ end
120
+ end
121
+
122
+ __END__
123
+
124
+ load 'lib/tsuga/model/tile.rb'
125
+
126
+ # {"n"=>"41.41169761785169", "e"=>"2.2055472226562642", "s"=>"41.33015287320352", "w"=>"2.107700237792983", "z"=>"3"
127
+
128
+
129
+ sw = Tsuga::Model::Point.new(lat: 41.33015287320352, lng: 2.107700237792983)
130
+ ne = Tsuga::Model::Point.new(lat: 41.41169761785169, lng: 2.2055472226562642)
131
+
132
+ Tsuga::Model::Tile.including(sw, depth: 7)
133
+ Tsuga::Model::Tile.including(ne, depth: 7)
134
+
135
+ Tsuga::Model::Tile.enclosing_viewport(point_sw:sw, point_ne:ne, depth:7).length
136
+
@@ -0,0 +1,175 @@
1
+ require 'set'
2
+ require 'tsuga/model/point'
3
+ require 'tsuga/model/tile'
4
+
5
+ module Tsuga::Service
6
+
7
+ # Aggregates clusters together until no two clusters are closer than
8
+ # a given minimum distance.
9
+ class Aggregator
10
+ # - clusters (Array): list of points to aggregate
11
+ # - fence (Tile): clusters outside this will not be aggregated
12
+ # - ratio (0..1): minimum distance between clusters after aggregation,
13
+ # as a ratio of the tile diagonal
14
+ def initialize(clusters:nil, ratio:nil, fence:nil)
15
+ @_clusters = clusters
16
+ @_fence = fence || _default_fence
17
+ @min_distance_ratio = ratio # fraction of tile diagonal
18
+ @_dropped_clusters = IdSet.new
19
+ @_updated_clusters = IdSet.new
20
+ end
21
+
22
+ def run
23
+ return if _clusters.empty?
24
+ warn "warning: running aggregation on many clusters (#{_clusters.size})" if _clusters.size > 100
25
+
26
+ if DENSITY_BIAS_FACTOR
27
+ @min_density, @max_density = _clusters.collect(&:density).minmax
28
+ end
29
+
30
+ # build the set of pairs (n²/2)
31
+ pairs = []
32
+ source = _clusters.dup
33
+ while left = source.pop
34
+ source.each do |right|
35
+ pairs << _build_pair(left, right, _fence)
36
+ end
37
+ end
38
+
39
+ # pop & merge
40
+ while pairs.any?
41
+ best_pair = pairs.min
42
+ break if best_pair.distance > min_distance
43
+
44
+ # remove the closest pair
45
+ left, right = best_pair.values
46
+ left_id = left.id
47
+ right_id = right.id
48
+
49
+ # remove pairs containing one of the items
50
+ pairs.delete_if { |p| p.has?(left) || p.has?(right) }
51
+
52
+ # merge clusters
53
+ left.merge(right)
54
+ _clusters.delete_if { |c| c.id == right_id }
55
+ _updated_clusters.remove right
56
+ _dropped_clusters.add right
57
+ _updated_clusters.add left
58
+
59
+ # create new pairs
60
+ _clusters.each do |cluster|
61
+ next if cluster.id == left_id
62
+ pairs << _build_pair(left, cluster, _fence)
63
+ end
64
+ end
65
+ nil
66
+ end
67
+
68
+ # after #run, this contains the clusters that were merged into other clusters
69
+ def dropped_clusters
70
+ _dropped_clusters.to_a
71
+ end
72
+
73
+ # after #run, this contains the clusters that were modified and need to be persisted
74
+ def updated_clusters
75
+ _updated_clusters.to_a
76
+ end
77
+
78
+ # fraction of the diagonal of the fence tile
79
+ def min_distance
80
+ @min_distance ||= (_fence.southwest & _fence.northeast) * @min_distance_ratio
81
+ end
82
+
83
+ private
84
+
85
+ # FIXME: a sensible value would be ~0.4 in theory, but this
86
+ # biasing seems to have little impact. remove?
87
+ DENSITY_BIAS_FACTOR = nil
88
+
89
+ attr_reader :_clusters, :_fence, :_dropped_clusters, :_updated_clusters
90
+
91
+ # factory for pairs, switches between fenced/unfenced
92
+ # and conditionnaly adds density bias
93
+ def _build_pair(c1, c2, fence)
94
+ pair = fence.nil? ? Pair.new(c1, c2) : FencedPair.new(c1, c2, fence)
95
+
96
+ if DENSITY_BIAS_FACTOR && (@max_density != @min_density)
97
+ # the least dense cluster pairs have a density_bias value close to 0, the densest closer to 1
98
+ density_bias = (c1.density + c2.density - 2 * @min_density) / (2 * (@max_density - @min_density))
99
+ # this makes dense clusters appear closer, and vice-versa
100
+ pair.distance = pair.distance * (1 + DENSITY_BIAS_FACTOR * (1 - density_bias) - 0.5 * DENSITY_BIAS_FACTOR)
101
+ end
102
+ pair
103
+ end
104
+
105
+ def _default_fence
106
+ return if _clusters.empty?
107
+ Tsuga::Model::Tile.including(_clusters.first, depth:_clusters.first.depth)
108
+ end
109
+
110
+ # model a pair of clusters such as [a,b] == [b,a]
111
+ # and comparison is based on distance
112
+ class Pair
113
+ include Comparable
114
+ attr_accessor :distance
115
+
116
+ def initialize(c1, c2)
117
+ @left = c1
118
+ @right = c2
119
+ @left_id = c1.id
120
+ @right_id = c2.id
121
+ @distance = (@left & @right)
122
+
123
+ raise ArgumentError, 'pair elements must be distinct' if @left_id == @right_id
124
+ end
125
+
126
+ def <=>(other)
127
+ self.distance <=> other.distance
128
+ end
129
+
130
+ # def ==(other)
131
+ # (self.left_id == other.left_id) && (self.right_id == other.right_id)
132
+ # end
133
+
134
+ def values
135
+ [@left, @right]
136
+ end
137
+
138
+ def has?(c)
139
+ c_id = c.id
140
+ (@left_id == c_id) || (@right_id == c_id)
141
+ end
142
+ end
143
+
144
+ # pairs where both points fall outside the fence are considered "at horizon"
145
+ # i.e. their distance infinite. the point is to never aggregate them.
146
+ class FencedPair < Pair
147
+ def initialize(c1, c2, fence)
148
+ super(c1, c2)
149
+ @outside = !fence.contains?(c1) && !fence.contains?(c2)
150
+ end
151
+
152
+ def distance
153
+ @outside ? Float::MAX : super
154
+ end
155
+ end
156
+
157
+ class IdSet
158
+ def initialize
159
+ @data = {}
160
+ end
161
+
162
+ def add(item)
163
+ @data[item.id] = item
164
+ end
165
+
166
+ def remove(item)
167
+ @data.delete(item.id)
168
+ end
169
+
170
+ def to_a
171
+ @data.values
172
+ end
173
+ end
174
+ end
175
+ end