tsuga 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +5 -0
  3. data/.rspec +3 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +12 -0
  6. data/Gemfile +16 -0
  7. data/Gemfile.lock +146 -0
  8. data/Guardfile +8 -0
  9. data/LICENSE.txt +22 -0
  10. data/README.md +161 -0
  11. data/Rakefile +1 -0
  12. data/lib/tsuga.rb +11 -0
  13. data/lib/tsuga/adapter.rb +4 -0
  14. data/lib/tsuga/adapter/active_record/base.rb +61 -0
  15. data/lib/tsuga/adapter/active_record/cluster.rb +52 -0
  16. data/lib/tsuga/adapter/active_record/migration.rb +50 -0
  17. data/lib/tsuga/adapter/active_record/record.rb +15 -0
  18. data/lib/tsuga/adapter/active_record/test.rb +73 -0
  19. data/lib/tsuga/adapter/memory/base.rb +146 -0
  20. data/lib/tsuga/adapter/memory/cluster.rb +32 -0
  21. data/lib/tsuga/adapter/memory/test.rb +27 -0
  22. data/lib/tsuga/adapter/mongoid/base.rb +41 -0
  23. data/lib/tsuga/adapter/mongoid/cluster.rb +29 -0
  24. data/lib/tsuga/adapter/mongoid/record.rb +16 -0
  25. data/lib/tsuga/adapter/mongoid/test.rb +77 -0
  26. data/lib/tsuga/adapter/sequel/base.rb +57 -0
  27. data/lib/tsuga/adapter/sequel/cluster.rb +43 -0
  28. data/lib/tsuga/adapter/sequel/record.rb +15 -0
  29. data/lib/tsuga/adapter/sequel/test.rb +73 -0
  30. data/lib/tsuga/adapter/shared.rb +4 -0
  31. data/lib/tsuga/adapter/shared/cluster.rb +19 -0
  32. data/lib/tsuga/errors.rb +3 -0
  33. data/lib/tsuga/model/cluster.rb +147 -0
  34. data/lib/tsuga/model/point.rb +206 -0
  35. data/lib/tsuga/model/record.rb +20 -0
  36. data/lib/tsuga/model/tile.rb +136 -0
  37. data/lib/tsuga/service/aggregator.rb +175 -0
  38. data/lib/tsuga/service/clusterer.rb +260 -0
  39. data/lib/tsuga/service/labeler.rb +20 -0
  40. data/lib/tsuga/version.rb +3 -0
  41. data/script/benchmark-aggregator.rb +72 -0
  42. data/script/benchmark-clusterer.rb +102 -0
  43. data/spec/adapter/memory/base_spec.rb +174 -0
  44. data/spec/adapter/memory/cluster_spec.rb +39 -0
  45. data/spec/adapter/shared/cluster_spec.rb +56 -0
  46. data/spec/integration/active_record_spec.rb +10 -0
  47. data/spec/integration/memory_spec.rb +10 -0
  48. data/spec/integration/mongoid_spec.rb +10 -0
  49. data/spec/integration/sequel_spec.rb +10 -0
  50. data/spec/integration/shared.rb +50 -0
  51. data/spec/model/point_spec.rb +102 -0
  52. data/spec/model/tile_spec.rb +116 -0
  53. data/spec/service/aggregator_spec.rb +143 -0
  54. data/spec/service/clusterer_spec.rb +84 -0
  55. data/spec/spec_helper.rb +26 -0
  56. data/spec/support/mongoid.yml +17 -0
  57. data/tsuga.gemspec +29 -0
  58. metadata +226 -0
@@ -0,0 +1,16 @@
1
+ require 'tsuga/model/record'
2
+ require 'tsuga/adapter/mongoid/base'
3
+ require 'mongoid'
4
+
5
+ module Tsuga::Adapter::Mongoid
6
+ module Record
7
+ def self.included(by)
8
+ by.send :include, Base
9
+ by.send :include, Tsuga::Model::Record
10
+ by.extend ScopeMethods
11
+ end
12
+
13
+ module ScopeMethods
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,77 @@
1
+ require 'tsuga/adapter/mongoid/base'
2
+ require 'tsuga/adapter/mongoid/cluster'
3
+ require 'tsuga/adapter/mongoid/record'
4
+ require 'mongoid'
5
+ require 'ostruct'
6
+ require 'forwardable'
7
+
8
+ module Tsuga::Adapter::Mongoid
9
+ module Test
10
+ class << self
11
+ extend Forwardable
12
+ delegate [:records, :clusters] => :models
13
+
14
+ def models
15
+ @_models ||= _build_test_models
16
+ end
17
+
18
+
19
+ private
20
+
21
+
22
+ def _build_test_models
23
+ ::Mongoid.load!("spec/support/mongoid.yml", :test)
24
+ _cluster_model.create_indexes
25
+ _record_model.create_indexes
26
+
27
+ # FIXME: hardly elegant but Mongoid insists on a named class.
28
+ self.const_set :Cluster, _cluster_model
29
+ self.const_set :Record, _record_model
30
+
31
+ OpenStruct.new :clusters => _cluster_model, :records => _record_model
32
+ end
33
+
34
+
35
+ def _cluster_model
36
+ @_cluster_model ||= Class.new do
37
+ include Mongoid::Document
38
+
39
+ field :tilecode
40
+ field :depth
41
+ field :geohash
42
+ field :lat
43
+ field :lng
44
+ field :parent_id
45
+ field :children_type
46
+ field :children_ids
47
+ field :sum_lat
48
+ field :sum_lng
49
+ field :ssq_lat
50
+ field :ssq_lng
51
+ field :weight
52
+
53
+ store_in :collection => 'clusters'
54
+ index tilecode:1
55
+
56
+ include Tsuga::Adapter::Mongoid::Cluster
57
+ end
58
+ end
59
+
60
+
61
+ def _record_model
62
+ @_record_model ||= Class.new do
63
+ include Mongoid::Document
64
+
65
+ field :geohash
66
+ field :lat
67
+ field :lng
68
+
69
+ store_in :collection => 'records'
70
+ index geohash:1
71
+
72
+ include Tsuga::Adapter::Mongoid::Record
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,57 @@
1
+ require 'tsuga/errors'
2
+ require 'tsuga/adapter'
3
+ require 'sequel'
4
+ require 'delegate'
5
+
6
+ module Tsuga::Adapter::Sequel
7
+ module Base
8
+ def self.included(by)
9
+ by.dataset_module DatasetMethods
10
+ end
11
+
12
+ def id
13
+ @_id ||= super
14
+ end
15
+
16
+ def persist!
17
+ save
18
+ end
19
+
20
+ def new_record?
21
+ new?
22
+ end
23
+
24
+ module DatasetMethods
25
+ def mass_create(new_records)
26
+ multi_insert(new_records.map(&:to_hash))
27
+ end
28
+
29
+ def mass_update(records)
30
+ db.transaction do
31
+ records.each(&:save)
32
+ end
33
+ end
34
+
35
+ def find_by_id(id)
36
+ self[id]
37
+ end
38
+
39
+ def delete_all
40
+ where.delete
41
+ end
42
+
43
+ def collect_ids
44
+ map(:id)
45
+ end
46
+
47
+ def find_each
48
+ # TODO: pagination would be nice to have here, but seems not to
49
+ # work out-of-the-box with Sequel
50
+ # where.extension(:pagination).each_page(2000) do |page|
51
+ # page.each { |r| yield r }
52
+ # end
53
+ all.each { |r| yield r }
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,43 @@
1
+ require 'tsuga/model/cluster'
2
+ require 'tsuga/model/tile'
3
+ require 'tsuga/adapter/sequel/base'
4
+ require 'tsuga/adapter/shared/cluster'
5
+
6
+ module Tsuga::Adapter::Sequel
7
+ module Cluster
8
+ def self.included(by)
9
+ by.send :include, Base
10
+ by.send :include, Tsuga::Model::Cluster
11
+ by.send :include, Tsuga::Adapter::Shared::Cluster
12
+ by.dataset_module Scopes
13
+ end
14
+
15
+ def children_ids
16
+ @_children_ids ||= begin
17
+ stored = super
18
+ stored ? stored.split(',').map(&:to_i) : []
19
+ end
20
+ end
21
+
22
+ def children_ids=(value)
23
+ changed = (@_children_ids != value)
24
+ @_children_ids = value
25
+ super(@_children_ids.join(',')) if changed
26
+ @_children_ids
27
+ end
28
+
29
+ module Scopes
30
+ def at_depth(depth)
31
+ where(depth: depth)
32
+ end
33
+
34
+ def in_tile(*tiles)
35
+ depths = tiles.map(&:depth).uniq
36
+ if depths.length > 1
37
+ raise ArgumentError, 'all tile must be at same depth'
38
+ end
39
+ where(tilecode: tiles.map(&:prefix))
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,15 @@
1
+ require 'tsuga/model/record'
2
+ require 'tsuga/adapter/sequel/base'
3
+
4
+ module Tsuga::Adapter::Sequel
5
+ module Record
6
+ def self.included(by)
7
+ by.send :include, Base
8
+ by.send :include, Tsuga::Model::Record
9
+ by.dataset_module Scopes
10
+ end
11
+
12
+ module Scopes
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,73 @@
1
+ require 'tsuga/adapter/sequel/base'
2
+ require 'tsuga/adapter/sequel/cluster'
3
+ require 'tsuga/adapter/sequel/record'
4
+ require 'sequel'
5
+ require 'sqlite3'
6
+ require 'ostruct'
7
+ require 'forwardable'
8
+
9
+ module Tsuga::Adapter::Sequel
10
+ module Test
11
+ class << self
12
+ extend Forwardable
13
+ delegate [:records, :clusters] => :models
14
+
15
+ def models
16
+ @_models ||= _build_test_models
17
+ end
18
+
19
+ private
20
+
21
+ # Makes sure a connection exists
22
+ def _db
23
+ @_db ||= Sequel::DATABASES.first || Sequel.sqlite
24
+ end
25
+
26
+ def _prepare_tables
27
+ _db.drop_table?(:test_records)
28
+ _db.create_table(:test_records) do
29
+ primary_key :id
30
+ String :geohash, size:32
31
+ Float :lat
32
+ Float :lng
33
+
34
+ index :geohash
35
+ end
36
+
37
+ _db.drop_table?(:test_clusters)
38
+ _db.create_table(:test_clusters) do
39
+ primary_key :id
40
+ Integer :depth
41
+ String :geohash, size:32
42
+ String :tilecode, size:32
43
+ Float :lat
44
+ Float :lng
45
+ Integer :parent_id
46
+ String :children_type
47
+ String :children_ids # FIXME
48
+ Double :sum_lat
49
+ Double :sum_lng
50
+ Double :ssq_lat
51
+ Double :ssq_lng
52
+ Integer :weight
53
+
54
+ index :tilecode
55
+ end
56
+ end
57
+
58
+ def _build_test_models
59
+ _prepare_tables
60
+
61
+ cluster_model = Class.new(Sequel::Model(:test_clusters)) do
62
+ include Tsuga::Adapter::Sequel::Cluster
63
+ end
64
+
65
+ record_model = Class.new(Sequel::Model(:test_records)) do
66
+ include Tsuga::Adapter::Sequel::Record
67
+ end
68
+
69
+ OpenStruct.new :clusters => cluster_model, :records => record_model
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,4 @@
1
+ require 'tsuga/adapter'
2
+
3
+ module Tsuga::Adapter::Shared
4
+ end
@@ -0,0 +1,19 @@
1
+ require 'tsuga/adapter/shared'
2
+
3
+ # Shared functionnality between adapters
4
+ module Tsuga::Adapter::Shared::Cluster
5
+ def children
6
+ return [] if children_ids.nil?
7
+ children_ids.map do |_id|
8
+ self.class.find_by_id(_id)
9
+ end
10
+ end
11
+
12
+ def leaves
13
+ if children_type != self.class.name || children_ids.nil? || children_ids.empty?
14
+ [self]
15
+ else
16
+ children.map(&:leaves).inject(:+)
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,3 @@
1
+ module Tsuga
2
+ RecordNotFound = Class.new(RuntimeError)
3
+ end
@@ -0,0 +1,147 @@
1
+ require 'tsuga'
2
+ require 'tsuga/model/point'
3
+
4
+ module Tsuga::Model
5
+ # Concretions (provided by adapters) have the following accessors:
6
+ # - :depth
7
+ # - :parent_id
8
+ # - :children_type (Record or Cluster)
9
+ # - :children_ids
10
+ # - :weight (count of Record in subtree)
11
+ # - :sum_lat, :sum_lng
12
+ # - :ssq_lat, :ssq_lng
13
+ #
14
+ # Respond to class methods:
15
+ # - :in_tile(Tile) (scopish, response responds to :find_each)
16
+ # - :at_depth(depth)
17
+ # - :delete_all
18
+ # - :find(id)
19
+ #
20
+ # Respond to the following instance methods:
21
+ # - :destroy
22
+ module Cluster
23
+ include Tsuga::Model::PointTrait
24
+
25
+ def initialize
26
+ super
27
+ self.depth ||= 1
28
+ # equator/greenwich
29
+ self.lat ||= 0
30
+ self.lng ||= 0
31
+ end
32
+
33
+ # latitude deviation in cluster
34
+ def dlat
35
+ @_dlat ||= _safe_sqrt(ssq_lat/weight - (sum_lat/weight)**2)
36
+ end
37
+
38
+ # longitude deviation in cluster
39
+ def dlng
40
+ @_dlng ||= _safe_sqrt(ssq_lng/weight - (sum_lng/weight)**2)
41
+ end
42
+
43
+ # radius of cluster
44
+ def radius
45
+ @_radius ||= Math.sqrt(dlat ** 2 + dlng ** 2)
46
+ end
47
+
48
+ # density (weight per unit area)
49
+ def density
50
+ @_density ||= begin
51
+ # min. radius 1.4e-4 (about 15m at european latitudes)
52
+ # for 1-point clusters where density would otherwise be infinite
53
+ our_radius = [radius, 1.4e-4].max
54
+ # Math.log(weight / (our_radius ** 2)) / Math.log(2)
55
+ weight / (our_radius ** 2)
56
+ end
57
+ end
58
+
59
+ def geohash=(value)
60
+ super(value)
61
+ _update_tilecode
62
+ geohash
63
+ end
64
+
65
+ def depth=(value)
66
+ super(value)
67
+ _update_tilecode
68
+ depth
69
+ end
70
+
71
+
72
+ def merge(other)
73
+ raise ArgumentError, 'not same depth' unless depth == other.depth
74
+ raise ArgumentError, 'not same parent' unless parent_id == other.parent_id
75
+
76
+ self.weight += other.weight
77
+ self.sum_lat += other.sum_lat
78
+ self.sum_lng += other.sum_lng
79
+ self.ssq_lat += other.ssq_lat
80
+ self.ssq_lng += other.ssq_lng
81
+ self.lat = sum_lat/weight
82
+ self.lng = sum_lng/weight
83
+ self.children_ids += other.children_ids
84
+
85
+ # dirty calculated values
86
+ @_dlng = @_dlat = @_radius = @_density = nil
87
+ end
88
+
89
+
90
+ module ClassMethods
91
+ # Cluster factory.
92
+ # +other+ is either a Cluster or a Record
93
+ #
94
+ # FIXME: there's a potential for overflow here on large datasets on the sum-
95
+ # and sum-of-squares fields. it can be mitigated by using double-precision
96
+ # fields, or calculating sums only on the children (instead of the subtree)
97
+ def build_from(depth, other)
98
+ c = new()
99
+ c.depth = depth
100
+
101
+ c.lat = other.lat
102
+ c.lng = other.lng
103
+ c.children_ids = [other.id]
104
+ c.children_type = other.class.name
105
+
106
+ case other
107
+ when Cluster
108
+ c.weight = other.weight
109
+ c.sum_lng = other.sum_lng
110
+ c.sum_lat = other.sum_lat
111
+ c.ssq_lng = other.ssq_lng
112
+ c.ssq_lat = other.ssq_lat
113
+ else
114
+ c.weight = 1
115
+ c.sum_lng = other.lng
116
+ c.sum_lat = other.lat
117
+ c.ssq_lng = other.lng ** 2
118
+ c.ssq_lat = other.lat ** 2
119
+ end
120
+
121
+ c.geohash # force geohash calculation
122
+ return c
123
+ end
124
+ end
125
+
126
+ def self.included(by)
127
+ by.extend(ClassMethods)
128
+ end
129
+
130
+
131
+ private
132
+
133
+
134
+ def _safe_sqrt(value)
135
+ (value < 0) ? 0 : Math.sqrt(value)
136
+ end
137
+
138
+
139
+ def _update_tilecode
140
+ if geohash && depth
141
+ self.tilecode = prefix(depth)
142
+ else
143
+ self.tilecode = nil
144
+ end
145
+ end
146
+ end
147
+ end