geotree 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.txt +3 -0
- data/README.txt +6 -0
- data/lib/geotree.rb +1 -0
- data/lib/geotree/blockfile.rb +453 -0
- data/lib/geotree/bounds.rb +81 -0
- data/lib/geotree/datapoint.rb +68 -0
- data/lib/geotree/diskblockfile.rb +64 -0
- data/lib/geotree/externalsort.rb +369 -0
- data/lib/geotree/geotree.rb +980 -0
- data/lib/geotree/loc.rb +76 -0
- data/lib/geotree/multitree.rb +190 -0
- data/lib/geotree/node.rb +252 -0
- data/lib/geotree/pswriter.rb +471 -0
- data/lib/geotree/ptbuffer.rb +120 -0
- data/lib/geotree/tools.rb +626 -0
- data/test/test_blockfile.rb +153 -0
- data/test/test_externalsort.rb +139 -0
- data/test/test_geotree.rb +432 -0
- data/test/test_ps.rb +56 -0
- metadata +76 -0
data/lib/geotree/loc.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
require_relative 'tools'
|
2
|
+
|
3
|
+
module GeoTreeModule
|
4
|
+
|
5
|
+
if true
|
6
|
+
LOC_MAX = ((1 << 30)-1)
|
7
|
+
LOC_MIN = -LOC_MAX
|
8
|
+
else
|
9
|
+
warn("using very small bounds for test purposes")
|
10
|
+
LOC_MAX = 1000
|
11
|
+
LOC_MIN = 0
|
12
|
+
end
|
13
|
+
|
14
|
+
# Factor for converting integer locations to latitude/longitudes
|
15
|
+
LAT_LONG_FACTOR_ = 180.0 / LOC_MAX
|
16
|
+
|
17
|
+
# Represents an x,y location.
|
18
|
+
# Each coordinate is stored internally as an integer, but may be
|
19
|
+
# referred to as a latitude and longitude as well.
|
20
|
+
#
|
21
|
+
class Loc
|
22
|
+
|
23
|
+
attr_accessor :x,:y
|
24
|
+
|
25
|
+
def self.cvt_latlong_to_int(n)
|
26
|
+
m = (n / LAT_LONG_FACTOR_ + 0.5).to_i
|
27
|
+
raise ArgumentError,"Converting lat/long #{n} is out of range" if m < LOC_MIN || m > LOC_MAX
|
28
|
+
m
|
29
|
+
end
|
30
|
+
|
31
|
+
# Construct a point.
|
32
|
+
# If x is a Float, it assumes that x and y are
|
33
|
+
# longitude and latitude respectively, and converts them
|
34
|
+
# to integer values.
|
35
|
+
#
|
36
|
+
def initialize(x = 0, y = 0)
|
37
|
+
if x.is_a? Float
|
38
|
+
x = Loc.cvt_latlong_to_int(x)
|
39
|
+
y = Loc.cvt_latlong_to_int(y)
|
40
|
+
end
|
41
|
+
@x = x
|
42
|
+
@y = y
|
43
|
+
end
|
44
|
+
|
45
|
+
# Get x as a longitudinal coordinate
|
46
|
+
#
|
47
|
+
def longit
|
48
|
+
@x * LAT_LONG_FACTOR_
|
49
|
+
end
|
50
|
+
|
51
|
+
# Get y as a latitudinal coordinate
|
52
|
+
#
|
53
|
+
def latit
|
54
|
+
@y * LAT_LONG_FACTOR_
|
55
|
+
end
|
56
|
+
|
57
|
+
def to_s
|
58
|
+
"(#{x},#{y})"
|
59
|
+
end
|
60
|
+
|
61
|
+
def inspect
|
62
|
+
to_s
|
63
|
+
end
|
64
|
+
|
65
|
+
def set_to(src)
|
66
|
+
@x = src.x
|
67
|
+
@y = src.y
|
68
|
+
end
|
69
|
+
|
70
|
+
# Return a version of the point with the coordinates exchanged
|
71
|
+
#
|
72
|
+
def flip
|
73
|
+
Loc.new(@y,@x)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
require_relative 'tools'
|
2
|
+
req 'geotree'
|
3
|
+
|
4
|
+
module GeoTreeModule
|
5
|
+
|
6
|
+
# A variant of GeoTree that supports queries at multiple resolutions.
|
7
|
+
#
|
8
|
+
# For example, a map application should return approximately the same number of
|
9
|
+
# datapoints when the screen displays the entire state of California, as well as
|
10
|
+
# when it is 'zoomed in' on a particular section of the Los Angeles area.
|
11
|
+
#
|
12
|
+
# To accomplish this, a MultiTree maintains several GeoTrees, each for a different
|
13
|
+
# level of detail. The highest detail tree contains every datapoint that has been
|
14
|
+
# added to the tree, and lower detail trees will have progressively fewer points.
|
15
|
+
#
|
16
|
+
# When querying a MultiTree, the user must specify which level of detail (i.e.,
|
17
|
+
# which of the contained trees) is to be examined.
|
18
|
+
#
|
19
|
+
class MultiTree
|
20
|
+
|
21
|
+
attr_reader :num_trees
|
22
|
+
|
23
|
+
# Construct MultiTree
|
24
|
+
# @param path directory to store trees within
|
25
|
+
# @param num_trees the number of trees to maintain (equivalently, the number of
|
26
|
+
# levels of detail to support)
|
27
|
+
#
|
28
|
+
def initialize(path,num_trees)
|
29
|
+
@buffer = PtBuffer.new(self)
|
30
|
+
@num_trees = num_trees
|
31
|
+
raise ArgumentError if File.file?(path)
|
32
|
+
|
33
|
+
@trees = []
|
34
|
+
|
35
|
+
if !File.directory?(path)
|
36
|
+
Dir::mkdir(path)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Construct trees within this directory
|
40
|
+
num_trees.times do |i|
|
41
|
+
tp = File.join(path,"tree_#{i}.bin")
|
42
|
+
t = GeoTree.open(tp)
|
43
|
+
@trees << t
|
44
|
+
end
|
45
|
+
|
46
|
+
prepare_details
|
47
|
+
end
|
48
|
+
|
49
|
+
def buffering
|
50
|
+
@buffer.active
|
51
|
+
end
|
52
|
+
|
53
|
+
def buffering=(val)
|
54
|
+
db = false
|
55
|
+
|
56
|
+
raise IllegalStateException if !open?
|
57
|
+
|
58
|
+
@buffer.active = val
|
59
|
+
end
|
60
|
+
|
61
|
+
def open?
|
62
|
+
@trees != nil
|
63
|
+
end
|
64
|
+
|
65
|
+
def close
|
66
|
+
raise IllegalStateException if !open?
|
67
|
+
|
68
|
+
# Stop buffering, in case we were, to flush points to tree
|
69
|
+
@buffer.active = false
|
70
|
+
|
71
|
+
@trees.each{|t| t.close}
|
72
|
+
@trees = nil
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
# Add a datapoint to the trees.
|
77
|
+
# Does not ensure that a datapoint with this name already exists in the
|
78
|
+
# tree, even if it has the same location.
|
79
|
+
#
|
80
|
+
def add(data_point)
|
81
|
+
raise IllegalStateException if !open?
|
82
|
+
@buffer.add(data_point)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Remove a datapoint. Returns the datapoint if it was found and removed,
|
86
|
+
# otherwise nil.
|
87
|
+
# A datapoint will be removed iff both its name and location match
|
88
|
+
# the sought point; the weight is ignored.
|
89
|
+
def remove(data_point)
|
90
|
+
|
91
|
+
raise IllegalStateException if @buffer.active
|
92
|
+
|
93
|
+
removed = nil
|
94
|
+
|
95
|
+
# Start with highest-detail tree, and continue to remove the
|
96
|
+
# same point until we reach a tree that doesn't contain it
|
97
|
+
@trees.each do |t|
|
98
|
+
rem = t.remove(data_point)
|
99
|
+
if rem
|
100
|
+
removed = true
|
101
|
+
else
|
102
|
+
break # assume it's not in any lower detail tree
|
103
|
+
end
|
104
|
+
end
|
105
|
+
removed
|
106
|
+
end
|
107
|
+
|
108
|
+
# Find all points intersecting a rectangle.
|
109
|
+
# @param rect query rectangle
|
110
|
+
# @param detail level of detail, 0...num_trees-1
|
111
|
+
#
|
112
|
+
def find(rect, detail)
|
113
|
+
raise IllegalStateException if (!open? || @buffer.active)
|
114
|
+
tree(detail).find(rect)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Determine if a particular datapoint lies in the tree
|
118
|
+
def find_point(df, detail)
|
119
|
+
raise IllegalStateException if (!open? || @buffer.active)
|
120
|
+
tree(detail).find(rect)
|
121
|
+
end
|
122
|
+
|
123
|
+
def add_buffered_point(data_point)
|
124
|
+
db = false
|
125
|
+
# db = true
|
126
|
+
|
127
|
+
# Determine which is the lowest detail level at which
|
128
|
+
# this point is to be found
|
129
|
+
|
130
|
+
stretch = 1.5
|
131
|
+
contract = 0.5
|
132
|
+
rf = rand() - contract
|
133
|
+
|
134
|
+
wt = data_point.weight & (MAX_POINT_WEIGHT-1)
|
135
|
+
|
136
|
+
randval = (wt + stretch*rf) / MAX_POINT_WEIGHT
|
137
|
+
|
138
|
+
!db || pr("add pt#%4d wt%2d rf=%6.3f rand=%6.3f: ",data_point.name,wt,rf,randval)
|
139
|
+
|
140
|
+
num_trees.times do |ti|
|
141
|
+
di = num_trees - 1 - ti
|
142
|
+
|
143
|
+
if ti > 0 && randval < @cutoffs[di]
|
144
|
+
break
|
145
|
+
end
|
146
|
+
|
147
|
+
!db || pr(" ++ #{di} ")
|
148
|
+
tree(di).add_buffered_point(data_point)
|
149
|
+
end
|
150
|
+
!db || pr("\n")
|
151
|
+
end
|
152
|
+
|
153
|
+
private
|
154
|
+
|
155
|
+
def prepare_details
|
156
|
+
|
157
|
+
# Cutoffs are indexed by detail level;
|
158
|
+
# if the adjusted data point weights are less than the
|
159
|
+
# cutoff value, then the point will not appear in that level's tree
|
160
|
+
|
161
|
+
@cutoffs = []
|
162
|
+
|
163
|
+
cmin = -0.3
|
164
|
+
m = (1.0) / num_trees
|
165
|
+
|
166
|
+
num_trees.times do |ti|
|
167
|
+
dt = num_trees-1-ti
|
168
|
+
@cutoffs << (dt+1) * m + cmin
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def tree(detail)
|
173
|
+
@trees[num_trees - 1 - detail]
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
if main?(__FILE__)
|
179
|
+
include GeoTreeModule
|
180
|
+
newdir = File.join(File.dirname(__FILE__),"../../test/workdir")
|
181
|
+
assert!(File.directory?(newdir))
|
182
|
+
Dir.chdir(newdir)
|
183
|
+
remove_file_or_dir("_multitree_")
|
184
|
+
|
185
|
+
mt = MultiTree.new("_multitree_",3)
|
186
|
+
|
187
|
+
pts = DataPoint.rnd_many(100)
|
188
|
+
pts.each{|x| mt.add(x)}
|
189
|
+
|
190
|
+
end
|
data/lib/geotree/node.rb
ADDED
@@ -0,0 +1,252 @@
|
|
1
|
+
require_relative 'datapoint'
|
2
|
+
|
3
|
+
module GeoTreeModule
|
4
|
+
|
5
|
+
DATAPOINT_INTS = 4
|
6
|
+
PARTITION_INTS = 2
|
7
|
+
INT_BYTES = 4
|
8
|
+
DATAPOINT_BYTES = DATAPOINT_INTS * INT_BYTES
|
9
|
+
|
10
|
+
if true
|
11
|
+
KDTREE_BLOCKSIZE = 256
|
12
|
+
NODEI_CHILDREN = (((KDTREE_BLOCKSIZE/INT_BYTES) - 6)/2)
|
13
|
+
NODEL_CAPACITY = (((KDTREE_BLOCKSIZE/INT_BYTES) - 4)/4)
|
14
|
+
else
|
15
|
+
KDTREE_BLOCKSIZE = 64
|
16
|
+
NODEI_CHILDREN = [(((KDTREE_BLOCKSIZE/INT_BYTES) - 6)/2),3].min
|
17
|
+
NODEL_CAPACITY = (((KDTREE_BLOCKSIZE/INT_BYTES) - 4)/4)
|
18
|
+
warn("using unusually small nodes; children=#{NODEI_CHILDREN}, capacity=#{NODEL_CAPACITY}")
|
19
|
+
end
|
20
|
+
|
21
|
+
# The maximum population of a leaf node (+ overflow nodes) without splitting
|
22
|
+
# (although splitting is disabled if the leaf bounds gets too small)
|
23
|
+
SPLIT_SIZE = (NODEL_CAPACITY * 3)
|
24
|
+
|
25
|
+
# The size below which a bounds cannot be further subdivided
|
26
|
+
# (to convert a leaf node that's at capacity to an internal node)
|
27
|
+
SPLITTABLE_LINEAR_SIZE = 2
|
28
|
+
|
29
|
+
if false
|
30
|
+
warn("setting cache very small")
|
31
|
+
KD_CACHE_SIZE = 5
|
32
|
+
else
|
33
|
+
KD_CACHE_SIZE = (100000/KDTREE_BLOCKSIZE)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Block fields for Node base class (each is an int)
|
37
|
+
HDR_FLAGS = 0
|
38
|
+
HDR_INTS = 1
|
39
|
+
|
40
|
+
# Block fields for NodeI subclass
|
41
|
+
IFLD_POPULATION = HDR_INTS
|
42
|
+
IFLD_PARTITIONS = IFLD_POPULATION + 1
|
43
|
+
IFLD_INTS = IFLD_PARTITIONS + NODEI_CHILDREN * PARTITION_INTS
|
44
|
+
|
45
|
+
# Block fields for NodeL subclass
|
46
|
+
LFLD_OVERFLOW = HDR_INTS
|
47
|
+
LFLD_USED = LFLD_OVERFLOW+1
|
48
|
+
LFLD_DATAPOINTS = LFLD_USED+1
|
49
|
+
LFLD_INTS = (LFLD_DATAPOINTS + NODEL_CAPACITY * DATAPOINT_INTS)
|
50
|
+
|
51
|
+
class Partition
|
52
|
+
attr_accessor :start_position, :child_name
|
53
|
+
def initialize(pos=0,child_name=0)
|
54
|
+
@start_position = pos
|
55
|
+
@child_name = child_name
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Base class for KDTree nodes
|
60
|
+
#
|
61
|
+
class Node
|
62
|
+
|
63
|
+
attr_accessor :leaf
|
64
|
+
attr_accessor :name
|
65
|
+
# If true, the slabs are stacked vertically; otherwise, they're arranged
|
66
|
+
# horizontally
|
67
|
+
attr_accessor :vertical
|
68
|
+
attr_accessor :prev_node, :next_node, :bounds
|
69
|
+
attr_accessor :modified
|
70
|
+
def initialize(name,leaf,vertical,bounds)
|
71
|
+
@name = name
|
72
|
+
@leaf = leaf
|
73
|
+
@vertical = vertical
|
74
|
+
@bounds = bounds
|
75
|
+
@modified = false
|
76
|
+
end
|
77
|
+
|
78
|
+
def splittable
|
79
|
+
s = [@bounds.w,@bounds.h].max
|
80
|
+
s >= SPLITTABLE_LINEAR_SIZE
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class NodeL < Node
|
85
|
+
# name of overflow block (or zero)
|
86
|
+
attr_accessor :overflow
|
87
|
+
def initialize(name,vertical,bounds)
|
88
|
+
super(name,true,vertical,bounds)
|
89
|
+
@data_pts = []
|
90
|
+
@used = 0
|
91
|
+
@overflow = 0
|
92
|
+
end
|
93
|
+
|
94
|
+
def used
|
95
|
+
@data_pts.size
|
96
|
+
end
|
97
|
+
|
98
|
+
def pts
|
99
|
+
@data_pts
|
100
|
+
end
|
101
|
+
|
102
|
+
def set_data_point(index, dp)
|
103
|
+
@data_pts[index] = dp
|
104
|
+
end
|
105
|
+
|
106
|
+
def data_point(index)
|
107
|
+
@data_pts[index]
|
108
|
+
end
|
109
|
+
|
110
|
+
def add_data_point(dp)
|
111
|
+
@data_pts << dp
|
112
|
+
end
|
113
|
+
|
114
|
+
def pop_last_point
|
115
|
+
@data_pts.pop
|
116
|
+
end
|
117
|
+
|
118
|
+
# Find position of a point, given its name; returns -1 if not found
|
119
|
+
def find_point(pt)
|
120
|
+
ret = -1
|
121
|
+
@data_pts.each_with_index do |dp,i|
|
122
|
+
if DataPoint.match(dp,pt)
|
123
|
+
ret = i
|
124
|
+
break
|
125
|
+
end
|
126
|
+
end
|
127
|
+
ret
|
128
|
+
end
|
129
|
+
|
130
|
+
def to_s
|
131
|
+
s = "LEAF=> ##{name} "
|
132
|
+
s << "us=#{used} ov=#{overflow} ["
|
133
|
+
used.times do |i|
|
134
|
+
dp = data_point(i)
|
135
|
+
# s << " #{dp}"
|
136
|
+
s << " #{dp.name}"
|
137
|
+
end
|
138
|
+
s << ']'
|
139
|
+
s
|
140
|
+
end
|
141
|
+
|
142
|
+
def inspect
|
143
|
+
to_s
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
class NodeI < Node
|
148
|
+
|
149
|
+
attr_accessor :population
|
150
|
+
def initialize(name, vertical, bounds)
|
151
|
+
super(name,false,vertical,bounds)
|
152
|
+
@population = 0
|
153
|
+
@p = []
|
154
|
+
NODEI_CHILDREN.times{ @p << Partition.new }
|
155
|
+
end
|
156
|
+
|
157
|
+
def adjust_population(amt)
|
158
|
+
@population += amt
|
159
|
+
end
|
160
|
+
|
161
|
+
def slot(slot_index)
|
162
|
+
@p[slot_index]
|
163
|
+
end
|
164
|
+
|
165
|
+
def set_slot(slot, p)
|
166
|
+
@p[slot] = p
|
167
|
+
end
|
168
|
+
|
169
|
+
# Determine which slot intersects a line perpendicular to the bounds
|
170
|
+
# > linePosition if node is horizontal, the x coordinate of the line; else, the y coordinate
|
171
|
+
# < slot index
|
172
|
+
def slot_intersecting_line(line_position)
|
173
|
+
|
174
|
+
s0 = 0
|
175
|
+
s1 = NODEI_CHILDREN
|
176
|
+
while s0 < s1
|
177
|
+
s = (s0 + s1) / 2
|
178
|
+
if @p[s].start_position > line_position
|
179
|
+
s1 = s
|
180
|
+
else
|
181
|
+
s0 = s + 1
|
182
|
+
end
|
183
|
+
end
|
184
|
+
s0 - 1
|
185
|
+
end
|
186
|
+
|
187
|
+
# Determine which slot contains a particular point
|
188
|
+
# (assumes point lies within the bounds of some slot)
|
189
|
+
def slot_containing_point(loc)
|
190
|
+
line_pos = vertical ? loc.y : loc.x
|
191
|
+
slot_intersecting_line(line_pos)
|
192
|
+
end
|
193
|
+
|
194
|
+
def set_slot_child(slot, child_name)
|
195
|
+
@p[slot].child_name = child_name
|
196
|
+
end
|
197
|
+
|
198
|
+
def slot_child(slot)
|
199
|
+
@p[slot].child_name
|
200
|
+
end
|
201
|
+
|
202
|
+
def slot_bounds(slot)
|
203
|
+
nb = bounds
|
204
|
+
if vertical
|
205
|
+
nb = nb.flip
|
206
|
+
end
|
207
|
+
|
208
|
+
x = @p[slot].start_position
|
209
|
+
x2 = nb.x2
|
210
|
+
|
211
|
+
if slot+1 < NODEI_CHILDREN
|
212
|
+
x2 = @p[slot+1].start_position
|
213
|
+
end
|
214
|
+
|
215
|
+
b = Bounds.new(x,nb.y,x2-x,nb.h)
|
216
|
+
if vertical
|
217
|
+
b = b.flip
|
218
|
+
end
|
219
|
+
b
|
220
|
+
|
221
|
+
end
|
222
|
+
|
223
|
+
def remove_child_named(name)
|
224
|
+
@p.each do |p|
|
225
|
+
p.child_name = 0 if p.child_name == name
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
def to_s
|
230
|
+
s = "INTR=> ##{name} "
|
231
|
+
s << (self.vertical ? "V" : "H")
|
232
|
+
s << " pop=#{population}"
|
233
|
+
s << " bnds #{bounds} "
|
234
|
+
|
235
|
+
NODEI_CHILDREN.times do |i|
|
236
|
+
pt = slot(i)
|
237
|
+
|
238
|
+
b = slot_bounds(i)
|
239
|
+
b = b.flip if vertical
|
240
|
+
|
241
|
+
s << "#{b.x}/#{b.x2}--> #{pt.child_name} "
|
242
|
+
end
|
243
|
+
s
|
244
|
+
end
|
245
|
+
|
246
|
+
def inspect
|
247
|
+
to_s
|
248
|
+
end
|
249
|
+
|
250
|
+
end
|
251
|
+
|
252
|
+
end
|