tsuga 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +5 -0
  3. data/.rspec +3 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +12 -0
  6. data/Gemfile +16 -0
  7. data/Gemfile.lock +146 -0
  8. data/Guardfile +8 -0
  9. data/LICENSE.txt +22 -0
  10. data/README.md +161 -0
  11. data/Rakefile +1 -0
  12. data/lib/tsuga.rb +11 -0
  13. data/lib/tsuga/adapter.rb +4 -0
  14. data/lib/tsuga/adapter/active_record/base.rb +61 -0
  15. data/lib/tsuga/adapter/active_record/cluster.rb +52 -0
  16. data/lib/tsuga/adapter/active_record/migration.rb +50 -0
  17. data/lib/tsuga/adapter/active_record/record.rb +15 -0
  18. data/lib/tsuga/adapter/active_record/test.rb +73 -0
  19. data/lib/tsuga/adapter/memory/base.rb +146 -0
  20. data/lib/tsuga/adapter/memory/cluster.rb +32 -0
  21. data/lib/tsuga/adapter/memory/test.rb +27 -0
  22. data/lib/tsuga/adapter/mongoid/base.rb +41 -0
  23. data/lib/tsuga/adapter/mongoid/cluster.rb +29 -0
  24. data/lib/tsuga/adapter/mongoid/record.rb +16 -0
  25. data/lib/tsuga/adapter/mongoid/test.rb +77 -0
  26. data/lib/tsuga/adapter/sequel/base.rb +57 -0
  27. data/lib/tsuga/adapter/sequel/cluster.rb +43 -0
  28. data/lib/tsuga/adapter/sequel/record.rb +15 -0
  29. data/lib/tsuga/adapter/sequel/test.rb +73 -0
  30. data/lib/tsuga/adapter/shared.rb +4 -0
  31. data/lib/tsuga/adapter/shared/cluster.rb +19 -0
  32. data/lib/tsuga/errors.rb +3 -0
  33. data/lib/tsuga/model/cluster.rb +147 -0
  34. data/lib/tsuga/model/point.rb +206 -0
  35. data/lib/tsuga/model/record.rb +20 -0
  36. data/lib/tsuga/model/tile.rb +136 -0
  37. data/lib/tsuga/service/aggregator.rb +175 -0
  38. data/lib/tsuga/service/clusterer.rb +260 -0
  39. data/lib/tsuga/service/labeler.rb +20 -0
  40. data/lib/tsuga/version.rb +3 -0
  41. data/script/benchmark-aggregator.rb +72 -0
  42. data/script/benchmark-clusterer.rb +102 -0
  43. data/spec/adapter/memory/base_spec.rb +174 -0
  44. data/spec/adapter/memory/cluster_spec.rb +39 -0
  45. data/spec/adapter/shared/cluster_spec.rb +56 -0
  46. data/spec/integration/active_record_spec.rb +10 -0
  47. data/spec/integration/memory_spec.rb +10 -0
  48. data/spec/integration/mongoid_spec.rb +10 -0
  49. data/spec/integration/sequel_spec.rb +10 -0
  50. data/spec/integration/shared.rb +50 -0
  51. data/spec/model/point_spec.rb +102 -0
  52. data/spec/model/tile_spec.rb +116 -0
  53. data/spec/service/aggregator_spec.rb +143 -0
  54. data/spec/service/clusterer_spec.rb +84 -0
  55. data/spec/spec_helper.rb +26 -0
  56. data/spec/support/mongoid.yml +17 -0
  57. data/tsuga.gemspec +29 -0
  58. metadata +226 -0
@@ -0,0 +1,16 @@
1
+ require 'tsuga/model/record'
2
+ require 'tsuga/adapter/mongoid/base'
3
+ require 'mongoid'
4
+
5
+ module Tsuga::Adapter::Mongoid
6
+ module Record
7
+ def self.included(by)
8
+ by.send :include, Base
9
+ by.send :include, Tsuga::Model::Record
10
+ by.extend ScopeMethods
11
+ end
12
+
13
+ module ScopeMethods
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,77 @@
1
+ require 'tsuga/adapter/mongoid/base'
2
+ require 'tsuga/adapter/mongoid/cluster'
3
+ require 'tsuga/adapter/mongoid/record'
4
+ require 'mongoid'
5
+ require 'ostruct'
6
+ require 'forwardable'
7
+
8
+ module Tsuga::Adapter::Mongoid
9
+ module Test
10
+ class << self
11
+ extend Forwardable
12
+ delegate [:records, :clusters] => :models
13
+
14
+ def models
15
+ @_models ||= _build_test_models
16
+ end
17
+
18
+
19
+ private
20
+
21
+
22
+ def _build_test_models
23
+ ::Mongoid.load!("spec/support/mongoid.yml", :test)
24
+ _cluster_model.create_indexes
25
+ _record_model.create_indexes
26
+
27
+ # FIXME: hardly elegant but Mongoid insists on a named class.
28
+ self.const_set :Cluster, _cluster_model
29
+ self.const_set :Record, _record_model
30
+
31
+ OpenStruct.new :clusters => _cluster_model, :records => _record_model
32
+ end
33
+
34
+
35
+ def _cluster_model
36
+ @_cluster_model ||= Class.new do
37
+ include Mongoid::Document
38
+
39
+ field :tilecode
40
+ field :depth
41
+ field :geohash
42
+ field :lat
43
+ field :lng
44
+ field :parent_id
45
+ field :children_type
46
+ field :children_ids
47
+ field :sum_lat
48
+ field :sum_lng
49
+ field :ssq_lat
50
+ field :ssq_lng
51
+ field :weight
52
+
53
+ store_in :collection => 'clusters'
54
+ index tilecode:1
55
+
56
+ include Tsuga::Adapter::Mongoid::Cluster
57
+ end
58
+ end
59
+
60
+
61
+ def _record_model
62
+ @_record_model ||= Class.new do
63
+ include Mongoid::Document
64
+
65
+ field :geohash
66
+ field :lat
67
+ field :lng
68
+
69
+ store_in :collection => 'records'
70
+ index geohash:1
71
+
72
+ include Tsuga::Adapter::Mongoid::Record
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,57 @@
1
+ require 'tsuga/errors'
2
+ require 'tsuga/adapter'
3
+ require 'sequel'
4
+ require 'delegate'
5
+
6
+ module Tsuga::Adapter::Sequel
7
+ module Base
8
+ def self.included(by)
9
+ by.dataset_module DatasetMethods
10
+ end
11
+
12
+ def id
13
+ @_id ||= super
14
+ end
15
+
16
+ def persist!
17
+ save
18
+ end
19
+
20
+ def new_record?
21
+ new?
22
+ end
23
+
24
+ module DatasetMethods
25
+ def mass_create(new_records)
26
+ multi_insert(new_records.map(&:to_hash))
27
+ end
28
+
29
+ def mass_update(records)
30
+ db.transaction do
31
+ records.each(&:save)
32
+ end
33
+ end
34
+
35
+ def find_by_id(id)
36
+ self[id]
37
+ end
38
+
39
+ def delete_all
40
+ where.delete
41
+ end
42
+
43
+ def collect_ids
44
+ map(:id)
45
+ end
46
+
47
+ def find_each
48
+ # TODO: pagination would be nice to have here, but seems not to
49
+ # work out-of-the-box with Sequel
50
+ # where.extension(:pagination).each_page(2000) do |page|
51
+ # page.each { |r| yield r }
52
+ # end
53
+ all.each { |r| yield r }
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,43 @@
1
+ require 'tsuga/model/cluster'
2
+ require 'tsuga/model/tile'
3
+ require 'tsuga/adapter/sequel/base'
4
+ require 'tsuga/adapter/shared/cluster'
5
+
6
+ module Tsuga::Adapter::Sequel
7
+ module Cluster
8
+ def self.included(by)
9
+ by.send :include, Base
10
+ by.send :include, Tsuga::Model::Cluster
11
+ by.send :include, Tsuga::Adapter::Shared::Cluster
12
+ by.dataset_module Scopes
13
+ end
14
+
15
+ def children_ids
16
+ @_children_ids ||= begin
17
+ stored = super
18
+ stored ? stored.split(',').map(&:to_i) : []
19
+ end
20
+ end
21
+
22
+ def children_ids=(value)
23
+ changed = (@_children_ids != value)
24
+ @_children_ids = value
25
+ super(@_children_ids.join(',')) if changed
26
+ @_children_ids
27
+ end
28
+
29
+ module Scopes
30
+ def at_depth(depth)
31
+ where(depth: depth)
32
+ end
33
+
34
+ def in_tile(*tiles)
35
+ depths = tiles.map(&:depth).uniq
36
+ if depths.length > 1
37
+ raise ArgumentError, 'all tile must be at same depth'
38
+ end
39
+ where(tilecode: tiles.map(&:prefix))
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,15 @@
1
+ require 'tsuga/model/record'
2
+ require 'tsuga/adapter/sequel/base'
3
+
4
+ module Tsuga::Adapter::Sequel
5
+ module Record
6
+ def self.included(by)
7
+ by.send :include, Base
8
+ by.send :include, Tsuga::Model::Record
9
+ by.dataset_module Scopes
10
+ end
11
+
12
+ module Scopes
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,73 @@
1
+ require 'tsuga/adapter/sequel/base'
2
+ require 'tsuga/adapter/sequel/cluster'
3
+ require 'tsuga/adapter/sequel/record'
4
+ require 'sequel'
5
+ require 'sqlite3'
6
+ require 'ostruct'
7
+ require 'forwardable'
8
+
9
+ module Tsuga::Adapter::Sequel
10
+ module Test
11
+ class << self
12
+ extend Forwardable
13
+ delegate [:records, :clusters] => :models
14
+
15
+ def models
16
+ @_models ||= _build_test_models
17
+ end
18
+
19
+ private
20
+
21
+ # Makes sure a connection exists
22
+ def _db
23
+ @_db ||= Sequel::DATABASES.first || Sequel.sqlite
24
+ end
25
+
26
+ def _prepare_tables
27
+ _db.drop_table?(:test_records)
28
+ _db.create_table(:test_records) do
29
+ primary_key :id
30
+ String :geohash, size:32
31
+ Float :lat
32
+ Float :lng
33
+
34
+ index :geohash
35
+ end
36
+
37
+ _db.drop_table?(:test_clusters)
38
+ _db.create_table(:test_clusters) do
39
+ primary_key :id
40
+ Integer :depth
41
+ String :geohash, size:32
42
+ String :tilecode, size:32
43
+ Float :lat
44
+ Float :lng
45
+ Integer :parent_id
46
+ String :children_type
47
+ String :children_ids # FIXME
48
+ Double :sum_lat
49
+ Double :sum_lng
50
+ Double :ssq_lat
51
+ Double :ssq_lng
52
+ Integer :weight
53
+
54
+ index :tilecode
55
+ end
56
+ end
57
+
58
+ def _build_test_models
59
+ _prepare_tables
60
+
61
+ cluster_model = Class.new(Sequel::Model(:test_clusters)) do
62
+ include Tsuga::Adapter::Sequel::Cluster
63
+ end
64
+
65
+ record_model = Class.new(Sequel::Model(:test_records)) do
66
+ include Tsuga::Adapter::Sequel::Record
67
+ end
68
+
69
+ OpenStruct.new :clusters => cluster_model, :records => record_model
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,4 @@
1
+ require 'tsuga/adapter'
2
+
3
+ module Tsuga::Adapter::Shared
4
+ end
@@ -0,0 +1,19 @@
1
+ require 'tsuga/adapter/shared'
2
+
3
+ # Shared functionnality between adapters
4
+ module Tsuga::Adapter::Shared::Cluster
5
+ def children
6
+ return [] if children_ids.nil?
7
+ children_ids.map do |_id|
8
+ self.class.find_by_id(_id)
9
+ end
10
+ end
11
+
12
+ def leaves
13
+ if children_type != self.class.name || children_ids.nil? || children_ids.empty?
14
+ [self]
15
+ else
16
+ children.map(&:leaves).inject(:+)
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,3 @@
1
+ module Tsuga
2
+ RecordNotFound = Class.new(RuntimeError)
3
+ end
@@ -0,0 +1,147 @@
1
+ require 'tsuga'
2
+ require 'tsuga/model/point'
3
+
4
+ module Tsuga::Model
5
+ # Concretions (provided by adapters) have the following accessors:
6
+ # - :depth
7
+ # - :parent_id
8
+ # - :children_type (Record or Cluster)
9
+ # - :children_ids
10
+ # - :weight (count of Record in subtree)
11
+ # - :sum_lat, :sum_lng
12
+ # - :ssq_lat, :ssq_lng
13
+ #
14
+ # Respond to class methods:
15
+ # - :in_tile(Tile) (scopish, response responds to :find_each)
16
+ # - :at_depth(depth)
17
+ # - :delete_all
18
+ # - :find(id)
19
+ #
20
+ # Respond to the following instance methods:
21
+ # - :destroy
22
+ module Cluster
23
+ include Tsuga::Model::PointTrait
24
+
25
+ def initialize
26
+ super
27
+ self.depth ||= 1
28
+ # equator/greenwich
29
+ self.lat ||= 0
30
+ self.lng ||= 0
31
+ end
32
+
33
+ # latitude deviation in cluster
34
+ def dlat
35
+ @_dlat ||= _safe_sqrt(ssq_lat/weight - (sum_lat/weight)**2)
36
+ end
37
+
38
+ # longitude deviation in cluster
39
+ def dlng
40
+ @_dlng ||= _safe_sqrt(ssq_lng/weight - (sum_lng/weight)**2)
41
+ end
42
+
43
+ # radius of cluster
44
+ def radius
45
+ @_radius ||= Math.sqrt(dlat ** 2 + dlng ** 2)
46
+ end
47
+
48
+ # density (weight per unit area)
49
+ def density
50
+ @_density ||= begin
51
+ # min. radius 1.4e-4 (about 15m at european latitudes)
52
+ # for 1-point clusters where density would otherwise be infinite
53
+ our_radius = [radius, 1.4e-4].max
54
+ # Math.log(weight / (our_radius ** 2)) / Math.log(2)
55
+ weight / (our_radius ** 2)
56
+ end
57
+ end
58
+
59
+ def geohash=(value)
60
+ super(value)
61
+ _update_tilecode
62
+ geohash
63
+ end
64
+
65
+ def depth=(value)
66
+ super(value)
67
+ _update_tilecode
68
+ depth
69
+ end
70
+
71
+
72
+ def merge(other)
73
+ raise ArgumentError, 'not same depth' unless depth == other.depth
74
+ raise ArgumentError, 'not same parent' unless parent_id == other.parent_id
75
+
76
+ self.weight += other.weight
77
+ self.sum_lat += other.sum_lat
78
+ self.sum_lng += other.sum_lng
79
+ self.ssq_lat += other.ssq_lat
80
+ self.ssq_lng += other.ssq_lng
81
+ self.lat = sum_lat/weight
82
+ self.lng = sum_lng/weight
83
+ self.children_ids += other.children_ids
84
+
85
+ # dirty calculated values
86
+ @_dlng = @_dlat = @_radius = @_density = nil
87
+ end
88
+
89
+
90
+ module ClassMethods
91
+ # Cluster factory.
92
+ # +other+ is either a Cluster or a Record
93
+ #
94
+ # FIXME: there's a potential for overflow here on large datasets on the sum-
95
+ # and sum-of-squares fields. it can be mitigated by using double-precision
96
+ # fields, or calculating sums only on the children (instead of the subtree)
97
+ def build_from(depth, other)
98
+ c = new()
99
+ c.depth = depth
100
+
101
+ c.lat = other.lat
102
+ c.lng = other.lng
103
+ c.children_ids = [other.id]
104
+ c.children_type = other.class.name
105
+
106
+ case other
107
+ when Cluster
108
+ c.weight = other.weight
109
+ c.sum_lng = other.sum_lng
110
+ c.sum_lat = other.sum_lat
111
+ c.ssq_lng = other.ssq_lng
112
+ c.ssq_lat = other.ssq_lat
113
+ else
114
+ c.weight = 1
115
+ c.sum_lng = other.lng
116
+ c.sum_lat = other.lat
117
+ c.ssq_lng = other.lng ** 2
118
+ c.ssq_lat = other.lat ** 2
119
+ end
120
+
121
+ c.geohash # force geohash calculation
122
+ return c
123
+ end
124
+ end
125
+
126
+ def self.included(by)
127
+ by.extend(ClassMethods)
128
+ end
129
+
130
+
131
+ private
132
+
133
+
134
+ def _safe_sqrt(value)
135
+ (value < 0) ? 0 : Math.sqrt(value)
136
+ end
137
+
138
+
139
+ def _update_tilecode
140
+ if geohash && depth
141
+ self.tilecode = prefix(depth)
142
+ else
143
+ self.tilecode = nil
144
+ end
145
+ end
146
+ end
147
+ end