geomodel 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,467 @@
1
+ module Geomodel
2
+
3
+ # Defines the notion of 'geocells' and exposes methods to operate on them.
4
+ #
5
+ # A geocell is a hexadecimal string that defines a two dimensional rectangular
6
+ # region inside the [-90,90] x [-180,180] latitude/longitude space. A geocell's
7
+ # 'resolution' is its length. For most practical purposes, at high resolutions,
8
+ # geocells can be treated as single points.
9
+ #
10
+ # Much like geohashes [see http://en.wikipedia.org/wiki/Geohash], geocells are
11
+ # hierarchical, in that any prefix of a geocell is considered its ancestor, with
12
+ # geocell[:-1] being geocell's immediate parent cell.
13
+ #
14
+ # To calculate the rectangle of a given geocell string, first divide the
15
+ # [-90,90] x [-180,180] latitude/longitude space evenly into a 4x4 grid like so:
16
+ #
17
+ # +---+---+---+---+ [90, 180]
18
+ # | a | b | e | f |
19
+ # +---+---+---+---+
20
+ # | 8 | 9 | c | d |
21
+ # +---+---+---+---+
22
+ # | 2 | 3 | 6 | 7 |
23
+ # +---+---+---+---+
24
+ # | 0 | 1 | 4 | 5 |
25
+ # [-90,-180] +---+---+---+---+
26
+ #
27
+ # NOTE: The point [0, 0] is at the intersection of grid cells 3, 6, 9 and c. And,
28
+ # for example, cell 7 should be the sub-rectangle from
29
+ # [-45, 90] to [0, 180].
30
+ #
31
+ # Calculate the sub-rectangle for the first character of the geocell string and
32
+ # re-divide this sub-rectangle into another 4x4 grid. For example, if the geocell
33
+ # string is '78a', we will re-divide the sub-rectangle like so:
34
+ #
35
+ # . .
36
+ # . .
37
+ # . . +----+----+----+----+ [0, 180]
38
+ # | 7a | 7b | 7e | 7f |
39
+ # +----+----+----+----+
40
+ # | 78 | 79 | 7c | 7d |
41
+ # +----+----+----+----+
42
+ # | 72 | 73 | 76 | 77 |
43
+ # +----+----+----+----+
44
+ # | 70 | 71 | 74 | 75 |
45
+ # . . [-45,90] +----+----+----+----+
46
+ # . .
47
+ # . .
48
+ #
49
+ # Continue to re-divide into sub-rectangles and 4x4 grids until the entire
50
+ # geocell string has been exhausted. The final sub-rectangle is the rectangular
51
+ # region for the geocell.
52
+ #
53
+ module GeoCell
54
+ # Geocell algorithm constants.
55
+ GEOCELL_GRID_SIZE = 4
56
+ GEOCELL_ALPHABET = '0123456789abcdef'
57
+
58
+ # The maximum *practical* geocell resolution.
59
+ MAX_GEOCELL_RESOLUTION = 13
60
+
61
+ # The maximum number of geocells to consider for a bounding box search.
62
+ MAX_FEASIBLE_BBOX_SEARCH_CELLS = 300
63
+
64
+ # Direction enumerations.
65
+ NORTHWEST = [-1, 1]
66
+ NORTH = [0, 1]
67
+ NORTHEAST = [1, 1]
68
+ EAST = [1, 0]
69
+ SOUTHEAST = [1, -1]
70
+ SOUTH = [0, -1]
71
+ SOUTHWEST = [-1, -1]
72
+ WEST = [-1, 0]
73
+
74
+ def self.generate_geocells(point)
75
+ geocell_max = self.compute(point, MAX_GEOCELL_RESOLUTION)
76
+
77
+ (1..MAX_GEOCELL_RESOLUTION).map do |resolution|
78
+ self.compute(point, resolution)
79
+ end
80
+ end
81
+
82
+ # Returns an efficient set of geocells to search in a bounding box query.
83
+ #
84
+ # This method is guaranteed to return a set of geocells having the same
85
+ # resolution.
86
+ #
87
+ # Args:
88
+ # bbox: A geotypes.Box indicating the bounding box being searched.
89
+ # cost_function: A function that accepts two arguments:
90
+ # * num_cells: the number of cells to search
91
+ # * resolution: the resolution of each cell to search
92
+ # and returns the 'cost' of querying against this number of cells
93
+ # at the given resolution.
94
+ #
95
+ # Returns:
96
+ # A list of geocell strings that contain the given box.
97
+ #
98
+ def self.best_bbox_search_cells(bbox, cost_function)
99
+
100
+ cell_ne = compute(bbox.north_east, MAX_GEOCELL_RESOLUTION)
101
+ cell_sw = compute(bbox.south_west, MAX_GEOCELL_RESOLUTION)
102
+
103
+ # The current lowest BBOX-search cost found; start with practical infinity.
104
+ min_cost = 1e10000
105
+
106
+ # The set of cells having the lowest calculated BBOX-search cost.
107
+ min_cost_cell_set = nil
108
+
109
+ # First find the common prefix, if there is one.. this will be the base
110
+ # resolution.. i.e. we don't have to look at any higher resolution cells.
111
+ min_resolution = common_prefix([cell_sw, cell_ne]).size
112
+
113
+ # Iteravely calculate all possible sets of cells that wholely contain
114
+ # the requested bounding box.
115
+ (min_resolution..(MAX_GEOCELL_RESOLUTION + 1)).each do |cur_resolution|
116
+ cur_ne = cell_ne[0...cur_resolution]
117
+ cur_sw = cell_sw[0...cur_resolution]
118
+
119
+ num_cells = interpolation_count(cur_ne, cur_sw)
120
+ next if num_cells > MAX_FEASIBLE_BBOX_SEARCH_CELLS
121
+
122
+ cell_set = interpolate(cur_ne, cur_sw).sort
123
+ simplified_cells = []
124
+
125
+ cost = cost_function.call(cell_set.size, cur_resolution)
126
+
127
+ # TODO(romannurik): See if this resolution is even possible, as in the
128
+ # future cells at certain resolutions may not be stored.
129
+ if cost <= min_cost
130
+ min_cost = cost
131
+ min_cost_cell_set = cell_set
132
+ else
133
+ # Once the cost starts rising, we won't be able to do better, so abort.
134
+ break
135
+ end
136
+ end
137
+
138
+ min_cost_cell_set
139
+ end
140
+
141
+ # Determines whether the given cells are collinear along a dimension.
142
+ #
143
+ # Returns True if the given cells are in the same row (column_test=False)
144
+ # or in the same column (column_test=True).
145
+ #
146
+ # Args:
147
+ # cell1: The first geocell string.
148
+ # cell2: The second geocell string.
149
+ # column_test: A boolean, where False invokes a row collinearity test
150
+ # and 1 invokes a column collinearity test.
151
+ #
152
+ # Returns:
153
+ # A bool indicating whether or not the given cells are collinear in the given
154
+ # dimension.
155
+ #
156
+ def self.collinear(cell1, cell2, column_test)
157
+ upto = [cell1.size, cell2.size].min - 1
158
+
159
+ (0..upto).each do |i|
160
+ x1, y1 = subdiv_xy(cell1[i])
161
+ x2, y2 = subdiv_xy(cell2[i])
162
+
163
+ # Check row collinearity (assure y's are always the same).
164
+ return false if (!column_test && y1 != y2)
165
+
166
+ # Check column collinearity (assure x's are always the same).
167
+ return false if (column_test && x1 != x2)
168
+ end
169
+
170
+ true
171
+ end
172
+
173
+ # Calculates the grid of cells formed between the two given cells.
174
+ #
175
+ # Generates the set of cells in the grid created by interpolating from the
176
+ # given Northeast geocell to the given Southwest geocell.
177
+ #
178
+ # Assumes the Northeast geocell is actually Northeast of Southwest geocell.
179
+ #
180
+ # Arguments:
181
+ # cell_ne: The Northeast geocell string.
182
+ # cell_sw: The Southwest geocell string.
183
+ #
184
+ # Returns:
185
+ # A list of geocell strings in the interpolation.
186
+ #
187
+ def self.interpolate(cell_ne, cell_sw)
188
+ # 2D array, will later be flattened.
189
+ cell_set = [[cell_sw]]
190
+
191
+ # First get adjacent geocells across until Southeast--collinearity with
192
+ # Northeast in vertical direction (0) means we're at Southeast.
193
+ while !collinear(cell_set.first.last, cell_ne, true)
194
+ cell_tmp = adjacent(cell_set.first.last, [1, 0])
195
+ cell_set.first << cell_tmp unless cell_tmp.nil?
196
+ end
197
+
198
+ # Then get adjacent geocells upwards.
199
+ while cell_set.last.last != cell_ne
200
+ cell_tmp_row = cell_set.last.map { |g| adjacent(g, [0, 1]) }
201
+ cell_set << cell_tmp_row unless cell_tmp_row.first.nil?
202
+ end
203
+
204
+ # Flatten cell_set, since it's currently a 2D array.
205
+ cell_set.flatten
206
+ end
207
+
208
+ # Computes the number of cells in the grid formed between two given cells.
209
+ #
210
+ # Computes the number of cells in the grid created by interpolating from the
211
+ # given Northeast geocell to the given Southwest geocell. Assumes the Northeast
212
+ # geocell is actually Northeast of Southwest geocell.
213
+ #
214
+ # Arguments:
215
+ # cell_ne: The Northeast geocell string.
216
+ # cell_sw: The Southwest geocell string.
217
+ #
218
+ # Returns:
219
+ # An int, indicating the number of geocells in the interpolation.
220
+ #
221
+ def self.interpolation_count(cell_ne, cell_sw)
222
+
223
+ bbox_ne = compute_box(cell_ne)
224
+ bbox_sw = compute_box(cell_sw)
225
+
226
+ cell_lat_span = bbox_sw.north - bbox_sw.south
227
+ cell_lon_span = bbox_sw.east - bbox_sw.west
228
+
229
+ num_cols = ((bbox_ne.east - bbox_sw.west) / cell_lon_span).to_i
230
+ num_rows = ((bbox_ne.north - bbox_sw.south) / cell_lat_span).to_i
231
+
232
+ num_cols * num_rows
233
+ end
234
+
235
+ # Calculates all of the given geocell's adjacent geocells.
236
+ #
237
+ # Args:
238
+ # cell: The geocell string for which to calculate adjacent/neighboring cells.
239
+ #
240
+ # Returns:
241
+ # A list of 8 geocell strings and/or None values indicating adjacent cells.
242
+ #
243
+ def self.all_adjacents(cell)
244
+ [NORTHWEST, NORTH, NORTHEAST, EAST, SOUTHEAST, SOUTH, SOUTHWEST, WEST].map { |d| adjacent(cell, d)}
245
+ end
246
+
247
+ # Calculates the geocell adjacent to the given cell in the given direction.
248
+ #
249
+ # Args:
250
+ # cell: The geocell string whose neighbor is being calculated.
251
+ # dir: An (x, y) tuple indicating direction, where x and y can be -1, 0, or 1.
252
+ # -1 corresponds to West for x and South for y, and
253
+ # 1 corresponds to East for x and North for y.
254
+ # Available helper constants are NORTH, EAST, SOUTH, WEST,
255
+ # NORTHEAST, NORTHWEST, SOUTHEAST, and SOUTHWEST.
256
+ #
257
+ # Returns:
258
+ # The geocell adjacent to the given cell in the given direction, or None if
259
+ # there is no such cell.
260
+ #
261
+ def self.adjacent(cell, dir)
262
+ return nil if cell.nil?
263
+
264
+ dx = dir[0]
265
+ dy = dir[1]
266
+
267
+ cell_adj_arr = cell.split(//) # Split the geocell string characters into a list.
268
+ i = cell_adj_arr.size - 1
269
+
270
+ while i >= 0 && (dx != 0 or dy != 0)
271
+ x, y = subdiv_xy(cell_adj_arr[i])
272
+
273
+ # Horizontal adjacency.
274
+ if dx == -1 # Asking for left.
275
+ if x == 0 # At left of parent cell.
276
+ x = GEOCELL_GRID_SIZE - 1 # Becomes right edge of adjacent parent.
277
+ else
278
+ x -= 1 # Adjacent, same parent.
279
+ dx = 0 # Done with x.
280
+ end
281
+ elsif dx == 1 # Asking for right.
282
+ if x == GEOCELL_GRID_SIZE - 1 # At right of parent cell.
283
+ x = 0 # Becomes left edge of adjacent parent.
284
+ else
285
+ x += 1 # Adjacent, same parent.
286
+ dx = 0 # Done with x.
287
+ end
288
+ end
289
+
290
+ # Vertical adjacency.
291
+ if dy == 1 # Asking for above.
292
+ if y == GEOCELL_GRID_SIZE - 1 # At top of parent cell.
293
+ y = 0 # Becomes bottom edge of adjacent parent.
294
+ else
295
+ y += 1 # Adjacent, same parent.
296
+ dy = 0 # Done with y.
297
+ end
298
+ elsif dy == -1 # Asking for below.
299
+ if y == 0 # At bottom of parent cell.
300
+ y = GEOCELL_GRID_SIZE - 1 # Becomes top edge of adjacent parent.
301
+ else
302
+ y -= 1 # Adjacent, same parent.
303
+ dy = 0 # Done with y.
304
+ end
305
+ end
306
+
307
+ cell_adj_arr[i] = subdiv_char([x,y])
308
+ i -= 1
309
+ end
310
+
311
+ # If we're not done with y then it's trying to wrap vertically,
312
+ # which is a failure.
313
+ return nil if dy != 0
314
+
315
+ # At this point, horizontal wrapping is done inherently.
316
+ cell_adj_arr.join('')
317
+ end
318
+
319
+ # Returns whether or not the given cell contains the given point.
320
+ def self.contains_point(cell, point)
321
+ compute(point, cell.size) == cell
322
+ end
323
+
324
+ # Returns the shortest distance between a point and a geocell bounding box.
325
+ #
326
+ # If the point is inside the cell, the shortest distance is always to a 'edge'
327
+ # of the cell rectangle. If the point is outside the cell, the shortest distance
328
+ # will be to either a 'edge' or 'corner' of the cell rectangle.
329
+ #
330
+ # Returns:
331
+ # The shortest distance from the point to the geocell's rectangle, in meters.
332
+ #
333
+ def self.point_distance(cell, point)
334
+ bbox = compute_box(cell)
335
+
336
+ between_w_e = bbox.west <= point.lon && point.lon <= bbox.east
337
+ between_n_s = bbox.south <= point.lat && point.lat <= bbox.north
338
+
339
+ if between_w_e
340
+ if between_n_s
341
+ # Inside the geocell.
342
+ return [Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.south, point.lon)),
343
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.north, point.lon)),
344
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(point.lat, bbox.east)),
345
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(point.lat, bbox.west))].min
346
+ else
347
+ return [Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.south, point.lon)),
348
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.north, point.lon))].min
349
+ end
350
+ else
351
+ if between_n_s
352
+ return [Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(point.lat, bbox.east)),
353
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(point.lat, bbox.west))]
354
+ else
355
+ # TODO(romannurik): optimize
356
+ return [Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.south, bbox.east)),
357
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.north, bbox.east)),
358
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.south, bbox.west)),
359
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.north, bbox.west))]
360
+ end
361
+ end
362
+ end
363
+
364
+ # Computes the geocell containing the given point to the given resolution.
365
+ #
366
+ # This is a simple 16-tree lookup to an arbitrary depth (resolution).
367
+ #
368
+ # Args:
369
+ # point: The geotypes.Point to compute the cell for.
370
+ # resolution: An int indicating the resolution of the cell to compute.
371
+ #
372
+ # Returns:
373
+ # The geocell string containing the given point, of length <resolution>.
374
+ #
375
+ def self.compute(point, resolution = MAX_GEOCELL_RESOLUTION)
376
+ north = 90.0
377
+ south = -90.0
378
+ east = 180.0
379
+ west = -180.0
380
+
381
+ cell = ''
382
+ while cell.size < resolution
383
+ subcell_lon_span = (east - west) / GEOCELL_GRID_SIZE
384
+ subcell_lat_span = (north - south) / GEOCELL_GRID_SIZE
385
+
386
+ x = [(GEOCELL_GRID_SIZE * (point.longitude - west) / (east - west)).to_i,
387
+ GEOCELL_GRID_SIZE - 1].min
388
+ y = [(GEOCELL_GRID_SIZE * (point.latitude - south) / (north - south)).to_i,
389
+ GEOCELL_GRID_SIZE - 1].min
390
+
391
+ cell += subdiv_char([x,y])
392
+
393
+ south += subcell_lat_span * y
394
+ north = south + subcell_lat_span
395
+
396
+ west += subcell_lon_span * x
397
+ east = west + subcell_lon_span
398
+ end
399
+
400
+ cell
401
+ end
402
+
403
+ # Computes the rectangular boundaries (bounding box) of the given geocell.
404
+ #
405
+ # Args:
406
+ # cell: The geocell string whose boundaries are to be computed.
407
+ #
408
+ # Returns:
409
+ # A geotypes.Box corresponding to the rectangular boundaries of the geocell.
410
+ #
411
+ def self.compute_box(cell)
412
+ return nil if cell.nil?
413
+
414
+ bbox = Geomodel::Types::Box.new(90.0, 180.0, -90.0, -180.0)
415
+
416
+ cell_copy = cell.clone
417
+
418
+ while cell_copy.size > 0
419
+ subcell_lon_span = (bbox.east - bbox.west) / GEOCELL_GRID_SIZE
420
+ subcell_lat_span = (bbox.north - bbox.south) / GEOCELL_GRID_SIZE
421
+
422
+ x, y = subdiv_xy(cell_copy[0])
423
+
424
+ bbox = Geomodel::Types::Box.new(bbox.south + subcell_lat_span * (y + 1),
425
+ bbox.west + subcell_lon_span * (x + 1),
426
+ bbox.south + subcell_lat_span * y,
427
+ bbox.west + subcell_lon_span * x)
428
+
429
+ cell_copy.slice!(0)
430
+ end
431
+
432
+ bbox
433
+ end
434
+
435
+ # Returns whether or not the given geocell string defines a valid geocell.
436
+ def self.is_valid(cell)
437
+ !cell.nil? &&
438
+ cell.size > 0 &&
439
+ cell.split(//).inject(true) { |val, c| val && GEOCELL_ALPHABET.include?(c) }
440
+ end
441
+
442
+ # Calculates the immediate children of the given geocell.
443
+ #
444
+ # For example, the immediate children of 'a' are 'a0', 'a1', ..., 'af'.
445
+ #
446
+ def self.children(cell)
447
+ GEOCELL_ALPHABET.map { |chr| cell + chr }
448
+ end
449
+
450
+ # Returns the (x, y) of the geocell character in the 4x4 alphabet grid.
451
+ # NOTE: This only works for grid size 4.
452
+ def self.subdiv_xy(char)
453
+ char = GEOCELL_ALPHABET.index(char)
454
+ [(char & 4) >> 1 | (char & 1) >> 0, (char & 8) >> 2 | (char & 2) >> 1]
455
+ end
456
+
457
+ # Returns the geocell character in the 4x4 alphabet grid at pos. (x, y).
458
+ # NOTE: This only works for grid size 4.
459
+ def self.subdiv_char(pos)
460
+ GEOCELL_ALPHABET[(pos[1] & 2) << 2 | (pos[0] & 2) << 1 | (pos[1] & 1) << 1 | (pos[0] & 1) << 0]
461
+ end
462
+
463
+ def self.common_prefix(list)
464
+ /\A(.*).*(\n\1.*)*\Z/.match(list.join("\n"))[1]
465
+ end
466
+ end
467
+ end