tsuga 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +12 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +146 -0
- data/Guardfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +161 -0
- data/Rakefile +1 -0
- data/lib/tsuga.rb +11 -0
- data/lib/tsuga/adapter.rb +4 -0
- data/lib/tsuga/adapter/active_record/base.rb +61 -0
- data/lib/tsuga/adapter/active_record/cluster.rb +52 -0
- data/lib/tsuga/adapter/active_record/migration.rb +50 -0
- data/lib/tsuga/adapter/active_record/record.rb +15 -0
- data/lib/tsuga/adapter/active_record/test.rb +73 -0
- data/lib/tsuga/adapter/memory/base.rb +146 -0
- data/lib/tsuga/adapter/memory/cluster.rb +32 -0
- data/lib/tsuga/adapter/memory/test.rb +27 -0
- data/lib/tsuga/adapter/mongoid/base.rb +41 -0
- data/lib/tsuga/adapter/mongoid/cluster.rb +29 -0
- data/lib/tsuga/adapter/mongoid/record.rb +16 -0
- data/lib/tsuga/adapter/mongoid/test.rb +77 -0
- data/lib/tsuga/adapter/sequel/base.rb +57 -0
- data/lib/tsuga/adapter/sequel/cluster.rb +43 -0
- data/lib/tsuga/adapter/sequel/record.rb +15 -0
- data/lib/tsuga/adapter/sequel/test.rb +73 -0
- data/lib/tsuga/adapter/shared.rb +4 -0
- data/lib/tsuga/adapter/shared/cluster.rb +19 -0
- data/lib/tsuga/errors.rb +3 -0
- data/lib/tsuga/model/cluster.rb +147 -0
- data/lib/tsuga/model/point.rb +206 -0
- data/lib/tsuga/model/record.rb +20 -0
- data/lib/tsuga/model/tile.rb +136 -0
- data/lib/tsuga/service/aggregator.rb +175 -0
- data/lib/tsuga/service/clusterer.rb +260 -0
- data/lib/tsuga/service/labeler.rb +20 -0
- data/lib/tsuga/version.rb +3 -0
- data/script/benchmark-aggregator.rb +72 -0
- data/script/benchmark-clusterer.rb +102 -0
- data/spec/adapter/memory/base_spec.rb +174 -0
- data/spec/adapter/memory/cluster_spec.rb +39 -0
- data/spec/adapter/shared/cluster_spec.rb +56 -0
- data/spec/integration/active_record_spec.rb +10 -0
- data/spec/integration/memory_spec.rb +10 -0
- data/spec/integration/mongoid_spec.rb +10 -0
- data/spec/integration/sequel_spec.rb +10 -0
- data/spec/integration/shared.rb +50 -0
- data/spec/model/point_spec.rb +102 -0
- data/spec/model/tile_spec.rb +116 -0
- data/spec/service/aggregator_spec.rb +143 -0
- data/spec/service/clusterer_spec.rb +84 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/support/mongoid.yml +17 -0
- data/tsuga.gemspec +29 -0
- metadata +226 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'tsuga/model/record'
|
2
|
+
require 'tsuga/adapter/mongoid/base'
|
3
|
+
require 'mongoid'
|
4
|
+
|
5
|
+
module Tsuga::Adapter::Mongoid
|
6
|
+
module Record
|
7
|
+
def self.included(by)
|
8
|
+
by.send :include, Base
|
9
|
+
by.send :include, Tsuga::Model::Record
|
10
|
+
by.extend ScopeMethods
|
11
|
+
end
|
12
|
+
|
13
|
+
module ScopeMethods
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'tsuga/adapter/mongoid/base'
|
2
|
+
require 'tsuga/adapter/mongoid/cluster'
|
3
|
+
require 'tsuga/adapter/mongoid/record'
|
4
|
+
require 'mongoid'
|
5
|
+
require 'ostruct'
|
6
|
+
require 'forwardable'
|
7
|
+
|
8
|
+
module Tsuga::Adapter::Mongoid
|
9
|
+
module Test
|
10
|
+
class << self
|
11
|
+
extend Forwardable
|
12
|
+
delegate [:records, :clusters] => :models
|
13
|
+
|
14
|
+
def models
|
15
|
+
@_models ||= _build_test_models
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
|
22
|
+
def _build_test_models
|
23
|
+
::Mongoid.load!("spec/support/mongoid.yml", :test)
|
24
|
+
_cluster_model.create_indexes
|
25
|
+
_record_model.create_indexes
|
26
|
+
|
27
|
+
# FIXME: hardly elegant but Mongoid insists on a named class.
|
28
|
+
self.const_set :Cluster, _cluster_model
|
29
|
+
self.const_set :Record, _record_model
|
30
|
+
|
31
|
+
OpenStruct.new :clusters => _cluster_model, :records => _record_model
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def _cluster_model
|
36
|
+
@_cluster_model ||= Class.new do
|
37
|
+
include Mongoid::Document
|
38
|
+
|
39
|
+
field :tilecode
|
40
|
+
field :depth
|
41
|
+
field :geohash
|
42
|
+
field :lat
|
43
|
+
field :lng
|
44
|
+
field :parent_id
|
45
|
+
field :children_type
|
46
|
+
field :children_ids
|
47
|
+
field :sum_lat
|
48
|
+
field :sum_lng
|
49
|
+
field :ssq_lat
|
50
|
+
field :ssq_lng
|
51
|
+
field :weight
|
52
|
+
|
53
|
+
store_in :collection => 'clusters'
|
54
|
+
index tilecode:1
|
55
|
+
|
56
|
+
include Tsuga::Adapter::Mongoid::Cluster
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def _record_model
|
62
|
+
@_record_model ||= Class.new do
|
63
|
+
include Mongoid::Document
|
64
|
+
|
65
|
+
field :geohash
|
66
|
+
field :lat
|
67
|
+
field :lng
|
68
|
+
|
69
|
+
store_in :collection => 'records'
|
70
|
+
index geohash:1
|
71
|
+
|
72
|
+
include Tsuga::Adapter::Mongoid::Record
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'tsuga/errors'
|
2
|
+
require 'tsuga/adapter'
|
3
|
+
require 'sequel'
|
4
|
+
require 'delegate'
|
5
|
+
|
6
|
+
module Tsuga::Adapter::Sequel
|
7
|
+
module Base
|
8
|
+
def self.included(by)
|
9
|
+
by.dataset_module DatasetMethods
|
10
|
+
end
|
11
|
+
|
12
|
+
def id
|
13
|
+
@_id ||= super
|
14
|
+
end
|
15
|
+
|
16
|
+
def persist!
|
17
|
+
save
|
18
|
+
end
|
19
|
+
|
20
|
+
def new_record?
|
21
|
+
new?
|
22
|
+
end
|
23
|
+
|
24
|
+
module DatasetMethods
|
25
|
+
def mass_create(new_records)
|
26
|
+
multi_insert(new_records.map(&:to_hash))
|
27
|
+
end
|
28
|
+
|
29
|
+
def mass_update(records)
|
30
|
+
db.transaction do
|
31
|
+
records.each(&:save)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def find_by_id(id)
|
36
|
+
self[id]
|
37
|
+
end
|
38
|
+
|
39
|
+
def delete_all
|
40
|
+
where.delete
|
41
|
+
end
|
42
|
+
|
43
|
+
def collect_ids
|
44
|
+
map(:id)
|
45
|
+
end
|
46
|
+
|
47
|
+
def find_each
|
48
|
+
# TODO: pagination would be nice to have here, but seems not to
|
49
|
+
# work out-of-the-box with Sequel
|
50
|
+
# where.extension(:pagination).each_page(2000) do |page|
|
51
|
+
# page.each { |r| yield r }
|
52
|
+
# end
|
53
|
+
all.each { |r| yield r }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'tsuga/model/cluster'
|
2
|
+
require 'tsuga/model/tile'
|
3
|
+
require 'tsuga/adapter/sequel/base'
|
4
|
+
require 'tsuga/adapter/shared/cluster'
|
5
|
+
|
6
|
+
module Tsuga::Adapter::Sequel
|
7
|
+
module Cluster
|
8
|
+
def self.included(by)
|
9
|
+
by.send :include, Base
|
10
|
+
by.send :include, Tsuga::Model::Cluster
|
11
|
+
by.send :include, Tsuga::Adapter::Shared::Cluster
|
12
|
+
by.dataset_module Scopes
|
13
|
+
end
|
14
|
+
|
15
|
+
def children_ids
|
16
|
+
@_children_ids ||= begin
|
17
|
+
stored = super
|
18
|
+
stored ? stored.split(',').map(&:to_i) : []
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def children_ids=(value)
|
23
|
+
changed = (@_children_ids != value)
|
24
|
+
@_children_ids = value
|
25
|
+
super(@_children_ids.join(',')) if changed
|
26
|
+
@_children_ids
|
27
|
+
end
|
28
|
+
|
29
|
+
module Scopes
|
30
|
+
def at_depth(depth)
|
31
|
+
where(depth: depth)
|
32
|
+
end
|
33
|
+
|
34
|
+
def in_tile(*tiles)
|
35
|
+
depths = tiles.map(&:depth).uniq
|
36
|
+
if depths.length > 1
|
37
|
+
raise ArgumentError, 'all tile must be at same depth'
|
38
|
+
end
|
39
|
+
where(tilecode: tiles.map(&:prefix))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'tsuga/model/record'
|
2
|
+
require 'tsuga/adapter/sequel/base'
|
3
|
+
|
4
|
+
module Tsuga::Adapter::Sequel
|
5
|
+
module Record
|
6
|
+
def self.included(by)
|
7
|
+
by.send :include, Base
|
8
|
+
by.send :include, Tsuga::Model::Record
|
9
|
+
by.dataset_module Scopes
|
10
|
+
end
|
11
|
+
|
12
|
+
module Scopes
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'tsuga/adapter/sequel/base'
|
2
|
+
require 'tsuga/adapter/sequel/cluster'
|
3
|
+
require 'tsuga/adapter/sequel/record'
|
4
|
+
require 'sequel'
|
5
|
+
require 'sqlite3'
|
6
|
+
require 'ostruct'
|
7
|
+
require 'forwardable'
|
8
|
+
|
9
|
+
module Tsuga::Adapter::Sequel
|
10
|
+
module Test
|
11
|
+
class << self
|
12
|
+
extend Forwardable
|
13
|
+
delegate [:records, :clusters] => :models
|
14
|
+
|
15
|
+
def models
|
16
|
+
@_models ||= _build_test_models
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
# Makes sure a connection exists
|
22
|
+
def _db
|
23
|
+
@_db ||= Sequel::DATABASES.first || Sequel.sqlite
|
24
|
+
end
|
25
|
+
|
26
|
+
def _prepare_tables
|
27
|
+
_db.drop_table?(:test_records)
|
28
|
+
_db.create_table(:test_records) do
|
29
|
+
primary_key :id
|
30
|
+
String :geohash, size:32
|
31
|
+
Float :lat
|
32
|
+
Float :lng
|
33
|
+
|
34
|
+
index :geohash
|
35
|
+
end
|
36
|
+
|
37
|
+
_db.drop_table?(:test_clusters)
|
38
|
+
_db.create_table(:test_clusters) do
|
39
|
+
primary_key :id
|
40
|
+
Integer :depth
|
41
|
+
String :geohash, size:32
|
42
|
+
String :tilecode, size:32
|
43
|
+
Float :lat
|
44
|
+
Float :lng
|
45
|
+
Integer :parent_id
|
46
|
+
String :children_type
|
47
|
+
String :children_ids # FIXME
|
48
|
+
Double :sum_lat
|
49
|
+
Double :sum_lng
|
50
|
+
Double :ssq_lat
|
51
|
+
Double :ssq_lng
|
52
|
+
Integer :weight
|
53
|
+
|
54
|
+
index :tilecode
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def _build_test_models
|
59
|
+
_prepare_tables
|
60
|
+
|
61
|
+
cluster_model = Class.new(Sequel::Model(:test_clusters)) do
|
62
|
+
include Tsuga::Adapter::Sequel::Cluster
|
63
|
+
end
|
64
|
+
|
65
|
+
record_model = Class.new(Sequel::Model(:test_records)) do
|
66
|
+
include Tsuga::Adapter::Sequel::Record
|
67
|
+
end
|
68
|
+
|
69
|
+
OpenStruct.new :clusters => cluster_model, :records => record_model
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'tsuga/adapter/shared'
|
2
|
+
|
3
|
+
# Shared functionnality between adapters
|
4
|
+
module Tsuga::Adapter::Shared::Cluster
|
5
|
+
def children
|
6
|
+
return [] if children_ids.nil?
|
7
|
+
children_ids.map do |_id|
|
8
|
+
self.class.find_by_id(_id)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def leaves
|
13
|
+
if children_type != self.class.name || children_ids.nil? || children_ids.empty?
|
14
|
+
[self]
|
15
|
+
else
|
16
|
+
children.map(&:leaves).inject(:+)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/tsuga/errors.rb
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'tsuga'
|
2
|
+
require 'tsuga/model/point'
|
3
|
+
|
4
|
+
module Tsuga::Model
|
5
|
+
# Concretions (provided by adapters) have the following accessors:
|
6
|
+
# - :depth
|
7
|
+
# - :parent_id
|
8
|
+
# - :children_type (Record or Cluster)
|
9
|
+
# - :children_ids
|
10
|
+
# - :weight (count of Record in subtree)
|
11
|
+
# - :sum_lat, :sum_lng
|
12
|
+
# - :ssq_lat, :ssq_lng
|
13
|
+
#
|
14
|
+
# Respond to class methods:
|
15
|
+
# - :in_tile(Tile) (scopish, response responds to :find_each)
|
16
|
+
# - :at_depth(depth)
|
17
|
+
# - :delete_all
|
18
|
+
# - :find(id)
|
19
|
+
#
|
20
|
+
# Respond to the following instance methods:
|
21
|
+
# - :destroy
|
22
|
+
module Cluster
|
23
|
+
include Tsuga::Model::PointTrait
|
24
|
+
|
25
|
+
def initialize
|
26
|
+
super
|
27
|
+
self.depth ||= 1
|
28
|
+
# equator/greenwich
|
29
|
+
self.lat ||= 0
|
30
|
+
self.lng ||= 0
|
31
|
+
end
|
32
|
+
|
33
|
+
# latitude deviation in cluster
|
34
|
+
def dlat
|
35
|
+
@_dlat ||= _safe_sqrt(ssq_lat/weight - (sum_lat/weight)**2)
|
36
|
+
end
|
37
|
+
|
38
|
+
# longitude deviation in cluster
|
39
|
+
def dlng
|
40
|
+
@_dlng ||= _safe_sqrt(ssq_lng/weight - (sum_lng/weight)**2)
|
41
|
+
end
|
42
|
+
|
43
|
+
# radius of cluster
|
44
|
+
def radius
|
45
|
+
@_radius ||= Math.sqrt(dlat ** 2 + dlng ** 2)
|
46
|
+
end
|
47
|
+
|
48
|
+
# density (weight per unit area)
|
49
|
+
def density
|
50
|
+
@_density ||= begin
|
51
|
+
# min. radius 1.4e-4 (about 15m at european latitudes)
|
52
|
+
# for 1-point clusters where density would otherwise be infinite
|
53
|
+
our_radius = [radius, 1.4e-4].max
|
54
|
+
# Math.log(weight / (our_radius ** 2)) / Math.log(2)
|
55
|
+
weight / (our_radius ** 2)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def geohash=(value)
|
60
|
+
super(value)
|
61
|
+
_update_tilecode
|
62
|
+
geohash
|
63
|
+
end
|
64
|
+
|
65
|
+
def depth=(value)
|
66
|
+
super(value)
|
67
|
+
_update_tilecode
|
68
|
+
depth
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
def merge(other)
|
73
|
+
raise ArgumentError, 'not same depth' unless depth == other.depth
|
74
|
+
raise ArgumentError, 'not same parent' unless parent_id == other.parent_id
|
75
|
+
|
76
|
+
self.weight += other.weight
|
77
|
+
self.sum_lat += other.sum_lat
|
78
|
+
self.sum_lng += other.sum_lng
|
79
|
+
self.ssq_lat += other.ssq_lat
|
80
|
+
self.ssq_lng += other.ssq_lng
|
81
|
+
self.lat = sum_lat/weight
|
82
|
+
self.lng = sum_lng/weight
|
83
|
+
self.children_ids += other.children_ids
|
84
|
+
|
85
|
+
# dirty calculated values
|
86
|
+
@_dlng = @_dlat = @_radius = @_density = nil
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
module ClassMethods
|
91
|
+
# Cluster factory.
|
92
|
+
# +other+ is either a Cluster or a Record
|
93
|
+
#
|
94
|
+
# FIXME: there's a potential for overflow here on large datasets on the sum-
|
95
|
+
# and sum-of-squares fields. it can be mitigated by using double-precision
|
96
|
+
# fields, or calculating sums only on the children (instead of the subtree)
|
97
|
+
def build_from(depth, other)
|
98
|
+
c = new()
|
99
|
+
c.depth = depth
|
100
|
+
|
101
|
+
c.lat = other.lat
|
102
|
+
c.lng = other.lng
|
103
|
+
c.children_ids = [other.id]
|
104
|
+
c.children_type = other.class.name
|
105
|
+
|
106
|
+
case other
|
107
|
+
when Cluster
|
108
|
+
c.weight = other.weight
|
109
|
+
c.sum_lng = other.sum_lng
|
110
|
+
c.sum_lat = other.sum_lat
|
111
|
+
c.ssq_lng = other.ssq_lng
|
112
|
+
c.ssq_lat = other.ssq_lat
|
113
|
+
else
|
114
|
+
c.weight = 1
|
115
|
+
c.sum_lng = other.lng
|
116
|
+
c.sum_lat = other.lat
|
117
|
+
c.ssq_lng = other.lng ** 2
|
118
|
+
c.ssq_lat = other.lat ** 2
|
119
|
+
end
|
120
|
+
|
121
|
+
c.geohash # force geohash calculation
|
122
|
+
return c
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def self.included(by)
|
127
|
+
by.extend(ClassMethods)
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
private
|
132
|
+
|
133
|
+
|
134
|
+
def _safe_sqrt(value)
|
135
|
+
(value < 0) ? 0 : Math.sqrt(value)
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def _update_tilecode
|
140
|
+
if geohash && depth
|
141
|
+
self.tilecode = prefix(depth)
|
142
|
+
else
|
143
|
+
self.tilecode = nil
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|