tsuga 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +12 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +146 -0
- data/Guardfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +161 -0
- data/Rakefile +1 -0
- data/lib/tsuga.rb +11 -0
- data/lib/tsuga/adapter.rb +4 -0
- data/lib/tsuga/adapter/active_record/base.rb +61 -0
- data/lib/tsuga/adapter/active_record/cluster.rb +52 -0
- data/lib/tsuga/adapter/active_record/migration.rb +50 -0
- data/lib/tsuga/adapter/active_record/record.rb +15 -0
- data/lib/tsuga/adapter/active_record/test.rb +73 -0
- data/lib/tsuga/adapter/memory/base.rb +146 -0
- data/lib/tsuga/adapter/memory/cluster.rb +32 -0
- data/lib/tsuga/adapter/memory/test.rb +27 -0
- data/lib/tsuga/adapter/mongoid/base.rb +41 -0
- data/lib/tsuga/adapter/mongoid/cluster.rb +29 -0
- data/lib/tsuga/adapter/mongoid/record.rb +16 -0
- data/lib/tsuga/adapter/mongoid/test.rb +77 -0
- data/lib/tsuga/adapter/sequel/base.rb +57 -0
- data/lib/tsuga/adapter/sequel/cluster.rb +43 -0
- data/lib/tsuga/adapter/sequel/record.rb +15 -0
- data/lib/tsuga/adapter/sequel/test.rb +73 -0
- data/lib/tsuga/adapter/shared.rb +4 -0
- data/lib/tsuga/adapter/shared/cluster.rb +19 -0
- data/lib/tsuga/errors.rb +3 -0
- data/lib/tsuga/model/cluster.rb +147 -0
- data/lib/tsuga/model/point.rb +206 -0
- data/lib/tsuga/model/record.rb +20 -0
- data/lib/tsuga/model/tile.rb +136 -0
- data/lib/tsuga/service/aggregator.rb +175 -0
- data/lib/tsuga/service/clusterer.rb +260 -0
- data/lib/tsuga/service/labeler.rb +20 -0
- data/lib/tsuga/version.rb +3 -0
- data/script/benchmark-aggregator.rb +72 -0
- data/script/benchmark-clusterer.rb +102 -0
- data/spec/adapter/memory/base_spec.rb +174 -0
- data/spec/adapter/memory/cluster_spec.rb +39 -0
- data/spec/adapter/shared/cluster_spec.rb +56 -0
- data/spec/integration/active_record_spec.rb +10 -0
- data/spec/integration/memory_spec.rb +10 -0
- data/spec/integration/mongoid_spec.rb +10 -0
- data/spec/integration/sequel_spec.rb +10 -0
- data/spec/integration/shared.rb +50 -0
- data/spec/model/point_spec.rb +102 -0
- data/spec/model/tile_spec.rb +116 -0
- data/spec/service/aggregator_spec.rb +143 -0
- data/spec/service/clusterer_spec.rb +84 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/support/mongoid.yml +17 -0
- data/tsuga.gemspec +29 -0
- metadata +226 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'tsuga/model/record'
|
2
|
+
require 'tsuga/adapter/mongoid/base'
|
3
|
+
require 'mongoid'
|
4
|
+
|
5
|
+
module Tsuga::Adapter::Mongoid
|
6
|
+
module Record
|
7
|
+
def self.included(by)
|
8
|
+
by.send :include, Base
|
9
|
+
by.send :include, Tsuga::Model::Record
|
10
|
+
by.extend ScopeMethods
|
11
|
+
end
|
12
|
+
|
13
|
+
module ScopeMethods
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'tsuga/adapter/mongoid/base'
|
2
|
+
require 'tsuga/adapter/mongoid/cluster'
|
3
|
+
require 'tsuga/adapter/mongoid/record'
|
4
|
+
require 'mongoid'
|
5
|
+
require 'ostruct'
|
6
|
+
require 'forwardable'
|
7
|
+
|
8
|
+
module Tsuga::Adapter::Mongoid
|
9
|
+
module Test
|
10
|
+
class << self
|
11
|
+
extend Forwardable
|
12
|
+
delegate [:records, :clusters] => :models
|
13
|
+
|
14
|
+
def models
|
15
|
+
@_models ||= _build_test_models
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
|
22
|
+
def _build_test_models
|
23
|
+
::Mongoid.load!("spec/support/mongoid.yml", :test)
|
24
|
+
_cluster_model.create_indexes
|
25
|
+
_record_model.create_indexes
|
26
|
+
|
27
|
+
# FIXME: hardly elegant but Mongoid insists on a named class.
|
28
|
+
self.const_set :Cluster, _cluster_model
|
29
|
+
self.const_set :Record, _record_model
|
30
|
+
|
31
|
+
OpenStruct.new :clusters => _cluster_model, :records => _record_model
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def _cluster_model
|
36
|
+
@_cluster_model ||= Class.new do
|
37
|
+
include Mongoid::Document
|
38
|
+
|
39
|
+
field :tilecode
|
40
|
+
field :depth
|
41
|
+
field :geohash
|
42
|
+
field :lat
|
43
|
+
field :lng
|
44
|
+
field :parent_id
|
45
|
+
field :children_type
|
46
|
+
field :children_ids
|
47
|
+
field :sum_lat
|
48
|
+
field :sum_lng
|
49
|
+
field :ssq_lat
|
50
|
+
field :ssq_lng
|
51
|
+
field :weight
|
52
|
+
|
53
|
+
store_in :collection => 'clusters'
|
54
|
+
index tilecode:1
|
55
|
+
|
56
|
+
include Tsuga::Adapter::Mongoid::Cluster
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def _record_model
|
62
|
+
@_record_model ||= Class.new do
|
63
|
+
include Mongoid::Document
|
64
|
+
|
65
|
+
field :geohash
|
66
|
+
field :lat
|
67
|
+
field :lng
|
68
|
+
|
69
|
+
store_in :collection => 'records'
|
70
|
+
index geohash:1
|
71
|
+
|
72
|
+
include Tsuga::Adapter::Mongoid::Record
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'tsuga/errors'
|
2
|
+
require 'tsuga/adapter'
|
3
|
+
require 'sequel'
|
4
|
+
require 'delegate'
|
5
|
+
|
6
|
+
module Tsuga::Adapter::Sequel
|
7
|
+
module Base
|
8
|
+
def self.included(by)
|
9
|
+
by.dataset_module DatasetMethods
|
10
|
+
end
|
11
|
+
|
12
|
+
def id
|
13
|
+
@_id ||= super
|
14
|
+
end
|
15
|
+
|
16
|
+
def persist!
|
17
|
+
save
|
18
|
+
end
|
19
|
+
|
20
|
+
def new_record?
|
21
|
+
new?
|
22
|
+
end
|
23
|
+
|
24
|
+
module DatasetMethods
|
25
|
+
def mass_create(new_records)
|
26
|
+
multi_insert(new_records.map(&:to_hash))
|
27
|
+
end
|
28
|
+
|
29
|
+
def mass_update(records)
|
30
|
+
db.transaction do
|
31
|
+
records.each(&:save)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def find_by_id(id)
|
36
|
+
self[id]
|
37
|
+
end
|
38
|
+
|
39
|
+
def delete_all
|
40
|
+
where.delete
|
41
|
+
end
|
42
|
+
|
43
|
+
def collect_ids
|
44
|
+
map(:id)
|
45
|
+
end
|
46
|
+
|
47
|
+
def find_each
|
48
|
+
# TODO: pagination would be nice to have here, but seems not to
|
49
|
+
# work out-of-the-box with Sequel
|
50
|
+
# where.extension(:pagination).each_page(2000) do |page|
|
51
|
+
# page.each { |r| yield r }
|
52
|
+
# end
|
53
|
+
all.each { |r| yield r }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'tsuga/model/cluster'
|
2
|
+
require 'tsuga/model/tile'
|
3
|
+
require 'tsuga/adapter/sequel/base'
|
4
|
+
require 'tsuga/adapter/shared/cluster'
|
5
|
+
|
6
|
+
module Tsuga::Adapter::Sequel
|
7
|
+
module Cluster
|
8
|
+
def self.included(by)
|
9
|
+
by.send :include, Base
|
10
|
+
by.send :include, Tsuga::Model::Cluster
|
11
|
+
by.send :include, Tsuga::Adapter::Shared::Cluster
|
12
|
+
by.dataset_module Scopes
|
13
|
+
end
|
14
|
+
|
15
|
+
def children_ids
|
16
|
+
@_children_ids ||= begin
|
17
|
+
stored = super
|
18
|
+
stored ? stored.split(',').map(&:to_i) : []
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def children_ids=(value)
|
23
|
+
changed = (@_children_ids != value)
|
24
|
+
@_children_ids = value
|
25
|
+
super(@_children_ids.join(',')) if changed
|
26
|
+
@_children_ids
|
27
|
+
end
|
28
|
+
|
29
|
+
module Scopes
|
30
|
+
def at_depth(depth)
|
31
|
+
where(depth: depth)
|
32
|
+
end
|
33
|
+
|
34
|
+
def in_tile(*tiles)
|
35
|
+
depths = tiles.map(&:depth).uniq
|
36
|
+
if depths.length > 1
|
37
|
+
raise ArgumentError, 'all tile must be at same depth'
|
38
|
+
end
|
39
|
+
where(tilecode: tiles.map(&:prefix))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'tsuga/model/record'
|
2
|
+
require 'tsuga/adapter/sequel/base'
|
3
|
+
|
4
|
+
module Tsuga::Adapter::Sequel
|
5
|
+
module Record
|
6
|
+
def self.included(by)
|
7
|
+
by.send :include, Base
|
8
|
+
by.send :include, Tsuga::Model::Record
|
9
|
+
by.dataset_module Scopes
|
10
|
+
end
|
11
|
+
|
12
|
+
module Scopes
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'tsuga/adapter/sequel/base'
|
2
|
+
require 'tsuga/adapter/sequel/cluster'
|
3
|
+
require 'tsuga/adapter/sequel/record'
|
4
|
+
require 'sequel'
|
5
|
+
require 'sqlite3'
|
6
|
+
require 'ostruct'
|
7
|
+
require 'forwardable'
|
8
|
+
|
9
|
+
module Tsuga::Adapter::Sequel
|
10
|
+
module Test
|
11
|
+
class << self
|
12
|
+
extend Forwardable
|
13
|
+
delegate [:records, :clusters] => :models
|
14
|
+
|
15
|
+
def models
|
16
|
+
@_models ||= _build_test_models
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
# Makes sure a connection exists
|
22
|
+
def _db
|
23
|
+
@_db ||= Sequel::DATABASES.first || Sequel.sqlite
|
24
|
+
end
|
25
|
+
|
26
|
+
def _prepare_tables
|
27
|
+
_db.drop_table?(:test_records)
|
28
|
+
_db.create_table(:test_records) do
|
29
|
+
primary_key :id
|
30
|
+
String :geohash, size:32
|
31
|
+
Float :lat
|
32
|
+
Float :lng
|
33
|
+
|
34
|
+
index :geohash
|
35
|
+
end
|
36
|
+
|
37
|
+
_db.drop_table?(:test_clusters)
|
38
|
+
_db.create_table(:test_clusters) do
|
39
|
+
primary_key :id
|
40
|
+
Integer :depth
|
41
|
+
String :geohash, size:32
|
42
|
+
String :tilecode, size:32
|
43
|
+
Float :lat
|
44
|
+
Float :lng
|
45
|
+
Integer :parent_id
|
46
|
+
String :children_type
|
47
|
+
String :children_ids # FIXME
|
48
|
+
Double :sum_lat
|
49
|
+
Double :sum_lng
|
50
|
+
Double :ssq_lat
|
51
|
+
Double :ssq_lng
|
52
|
+
Integer :weight
|
53
|
+
|
54
|
+
index :tilecode
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def _build_test_models
|
59
|
+
_prepare_tables
|
60
|
+
|
61
|
+
cluster_model = Class.new(Sequel::Model(:test_clusters)) do
|
62
|
+
include Tsuga::Adapter::Sequel::Cluster
|
63
|
+
end
|
64
|
+
|
65
|
+
record_model = Class.new(Sequel::Model(:test_records)) do
|
66
|
+
include Tsuga::Adapter::Sequel::Record
|
67
|
+
end
|
68
|
+
|
69
|
+
OpenStruct.new :clusters => cluster_model, :records => record_model
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'tsuga/adapter/shared'
|
2
|
+
|
3
|
+
# Shared functionnality between adapters
|
4
|
+
module Tsuga::Adapter::Shared::Cluster
|
5
|
+
def children
|
6
|
+
return [] if children_ids.nil?
|
7
|
+
children_ids.map do |_id|
|
8
|
+
self.class.find_by_id(_id)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def leaves
|
13
|
+
if children_type != self.class.name || children_ids.nil? || children_ids.empty?
|
14
|
+
[self]
|
15
|
+
else
|
16
|
+
children.map(&:leaves).inject(:+)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/tsuga/errors.rb
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'tsuga'
|
2
|
+
require 'tsuga/model/point'
|
3
|
+
|
4
|
+
module Tsuga::Model
|
5
|
+
# Concretions (provided by adapters) have the following accessors:
|
6
|
+
# - :depth
|
7
|
+
# - :parent_id
|
8
|
+
# - :children_type (Record or Cluster)
|
9
|
+
# - :children_ids
|
10
|
+
# - :weight (count of Record in subtree)
|
11
|
+
# - :sum_lat, :sum_lng
|
12
|
+
# - :ssq_lat, :ssq_lng
|
13
|
+
#
|
14
|
+
# Respond to class methods:
|
15
|
+
# - :in_tile(Tile) (scopish, response responds to :find_each)
|
16
|
+
# - :at_depth(depth)
|
17
|
+
# - :delete_all
|
18
|
+
# - :find(id)
|
19
|
+
#
|
20
|
+
# Respond to the following instance methods:
|
21
|
+
# - :destroy
|
22
|
+
module Cluster
|
23
|
+
include Tsuga::Model::PointTrait
|
24
|
+
|
25
|
+
def initialize
|
26
|
+
super
|
27
|
+
self.depth ||= 1
|
28
|
+
# equator/greenwich
|
29
|
+
self.lat ||= 0
|
30
|
+
self.lng ||= 0
|
31
|
+
end
|
32
|
+
|
33
|
+
# latitude deviation in cluster
|
34
|
+
def dlat
|
35
|
+
@_dlat ||= _safe_sqrt(ssq_lat/weight - (sum_lat/weight)**2)
|
36
|
+
end
|
37
|
+
|
38
|
+
# longitude deviation in cluster
|
39
|
+
def dlng
|
40
|
+
@_dlng ||= _safe_sqrt(ssq_lng/weight - (sum_lng/weight)**2)
|
41
|
+
end
|
42
|
+
|
43
|
+
# radius of cluster
|
44
|
+
def radius
|
45
|
+
@_radius ||= Math.sqrt(dlat ** 2 + dlng ** 2)
|
46
|
+
end
|
47
|
+
|
48
|
+
# density (weight per unit area)
|
49
|
+
def density
|
50
|
+
@_density ||= begin
|
51
|
+
# min. radius 1.4e-4 (about 15m at european latitudes)
|
52
|
+
# for 1-point clusters where density would otherwise be infinite
|
53
|
+
our_radius = [radius, 1.4e-4].max
|
54
|
+
# Math.log(weight / (our_radius ** 2)) / Math.log(2)
|
55
|
+
weight / (our_radius ** 2)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def geohash=(value)
|
60
|
+
super(value)
|
61
|
+
_update_tilecode
|
62
|
+
geohash
|
63
|
+
end
|
64
|
+
|
65
|
+
def depth=(value)
|
66
|
+
super(value)
|
67
|
+
_update_tilecode
|
68
|
+
depth
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
def merge(other)
|
73
|
+
raise ArgumentError, 'not same depth' unless depth == other.depth
|
74
|
+
raise ArgumentError, 'not same parent' unless parent_id == other.parent_id
|
75
|
+
|
76
|
+
self.weight += other.weight
|
77
|
+
self.sum_lat += other.sum_lat
|
78
|
+
self.sum_lng += other.sum_lng
|
79
|
+
self.ssq_lat += other.ssq_lat
|
80
|
+
self.ssq_lng += other.ssq_lng
|
81
|
+
self.lat = sum_lat/weight
|
82
|
+
self.lng = sum_lng/weight
|
83
|
+
self.children_ids += other.children_ids
|
84
|
+
|
85
|
+
# dirty calculated values
|
86
|
+
@_dlng = @_dlat = @_radius = @_density = nil
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
module ClassMethods
|
91
|
+
# Cluster factory.
|
92
|
+
# +other+ is either a Cluster or a Record
|
93
|
+
#
|
94
|
+
# FIXME: there's a potential for overflow here on large datasets on the sum-
|
95
|
+
# and sum-of-squares fields. it can be mitigated by using double-precision
|
96
|
+
# fields, or calculating sums only on the children (instead of the subtree)
|
97
|
+
def build_from(depth, other)
|
98
|
+
c = new()
|
99
|
+
c.depth = depth
|
100
|
+
|
101
|
+
c.lat = other.lat
|
102
|
+
c.lng = other.lng
|
103
|
+
c.children_ids = [other.id]
|
104
|
+
c.children_type = other.class.name
|
105
|
+
|
106
|
+
case other
|
107
|
+
when Cluster
|
108
|
+
c.weight = other.weight
|
109
|
+
c.sum_lng = other.sum_lng
|
110
|
+
c.sum_lat = other.sum_lat
|
111
|
+
c.ssq_lng = other.ssq_lng
|
112
|
+
c.ssq_lat = other.ssq_lat
|
113
|
+
else
|
114
|
+
c.weight = 1
|
115
|
+
c.sum_lng = other.lng
|
116
|
+
c.sum_lat = other.lat
|
117
|
+
c.ssq_lng = other.lng ** 2
|
118
|
+
c.ssq_lat = other.lat ** 2
|
119
|
+
end
|
120
|
+
|
121
|
+
c.geohash # force geohash calculation
|
122
|
+
return c
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def self.included(by)
|
127
|
+
by.extend(ClassMethods)
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
private
|
132
|
+
|
133
|
+
|
134
|
+
def _safe_sqrt(value)
|
135
|
+
(value < 0) ? 0 : Math.sqrt(value)
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def _update_tilecode
|
140
|
+
if geohash && depth
|
141
|
+
self.tilecode = prefix(depth)
|
142
|
+
else
|
143
|
+
self.tilecode = nil
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|