geotree 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.txt +3 -0
- data/README.txt +6 -0
- data/lib/geotree.rb +1 -0
- data/lib/geotree/blockfile.rb +453 -0
- data/lib/geotree/bounds.rb +81 -0
- data/lib/geotree/datapoint.rb +68 -0
- data/lib/geotree/diskblockfile.rb +64 -0
- data/lib/geotree/externalsort.rb +369 -0
- data/lib/geotree/geotree.rb +980 -0
- data/lib/geotree/loc.rb +76 -0
- data/lib/geotree/multitree.rb +190 -0
- data/lib/geotree/node.rb +252 -0
- data/lib/geotree/pswriter.rb +471 -0
- data/lib/geotree/ptbuffer.rb +120 -0
- data/lib/geotree/tools.rb +626 -0
- data/test/test_blockfile.rb +153 -0
- data/test/test_externalsort.rb +139 -0
- data/test/test_geotree.rb +432 -0
- data/test/test_ps.rb +56 -0
- metadata +76 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4e790d44323caacbf4a2bd3ec69bc02bb55dc847
|
4
|
+
data.tar.gz: bdb2927e4dea7857cd9863cbc2b53ee0c089bef7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: eef40796846cbabd4bddd8f32cef9334f7b4e6b0d17fcb851af0247907c8e89b9582808179bed41dc635069965e2c634a8c55e0fa5a1411f0ef09d23f4ca4980
|
7
|
+
data.tar.gz: e92b0f2e40906f8bc9853b21af28af24691a0b6d8e77682fd1fb8a557b36f9e520c33e092aa8b6ae4fe2f47eb78035460d901b8744c85bd56c5d5e4a6be4e329
|
data/CHANGELOG.txt
ADDED
data/README.txt
ADDED
data/lib/geotree.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require_relative 'lib/geotree'
|
@@ -0,0 +1,453 @@
|
|
1
|
+
require_relative 'tools'
|
2
|
+
|
3
|
+
# Block file.
|
4
|
+
#
|
5
|
+
# For storing data in a collection of 'blocks', fixed-length arrays of bytes,
|
6
|
+
# with facility for recycling blocks that are no longer used.
|
7
|
+
#
|
8
|
+
# Base class is in-memory only; subclass to persist blocks, e.g., to a file system.
|
9
|
+
# The DiskBlockFile class does exactly this.
|
10
|
+
#
|
11
|
+
# Each block has a unique 'name'. This is a positive integer. The word 'name' is
|
12
|
+
# chosen instead of 'id' to avoid conflicting with the use of 'id' in some languages.
|
13
|
+
#
|
14
|
+
# Each block file header includes space for four integers, for use by the application.
|
15
|
+
#
|
16
|
+
class BlockFile
|
17
|
+
|
18
|
+
FIRST_BLOCK_ID = 2
|
19
|
+
USER_HEADER_INTS = 4
|
20
|
+
|
21
|
+
#---- All constants ending with '_' will be made private
|
22
|
+
|
23
|
+
VERSION_ = 1965
|
24
|
+
|
25
|
+
# Header block fields; each is an integer
|
26
|
+
INT_BYTES_ = 4
|
27
|
+
HDR_VERSION_ = 0
|
28
|
+
HDR_BLOCKSIZE_ = 1
|
29
|
+
HDR_MAXINDEX_ = 2
|
30
|
+
HDR_RECYCLEINDEX_ = 3
|
31
|
+
HDR_USERSTART_ = 4
|
32
|
+
HDR_SIZE_BYTES_ = (HDR_USERSTART_ + USER_HEADER_INTS) * INT_BYTES_
|
33
|
+
|
34
|
+
# Fields of a recycle directory block
|
35
|
+
RC_PREV_DIR_NAME_ = 0
|
36
|
+
RC_ENTRIES_USED_ = 1
|
37
|
+
RC_ENTRIES_START_ = 2
|
38
|
+
|
39
|
+
privatize(self)
|
40
|
+
|
41
|
+
#---------------------------------------------
|
42
|
+
|
43
|
+
# Size of blocks, in bytes
|
44
|
+
attr_reader :block_size
|
45
|
+
# Constructor. Constructs a file, initially closed.
|
46
|
+
# @param block_size size of blocks, in bytes
|
47
|
+
def initialize(block_size)
|
48
|
+
block_size = [block_size, HDR_SIZE_BYTES_].max
|
49
|
+
|
50
|
+
@header_data = nil
|
51
|
+
@recycle_data = nil
|
52
|
+
@header_modified = false
|
53
|
+
|
54
|
+
# for in-memory version only:
|
55
|
+
@mem_file = nil
|
56
|
+
|
57
|
+
@block_size = block_size
|
58
|
+
end
|
59
|
+
|
60
|
+
# Determine if file is open
|
61
|
+
def open?
|
62
|
+
return @header_data != nil
|
63
|
+
end
|
64
|
+
|
65
|
+
# Open file. Creates underlying storage if necessary.
|
66
|
+
# File must not already be open.
|
67
|
+
# @return true if underlying storage already existed
|
68
|
+
#
|
69
|
+
def open
|
70
|
+
db = false
|
71
|
+
# db = true
|
72
|
+
!db || pr("BlockFile.open\n")
|
73
|
+
!open? || raise(IllegalStateException)
|
74
|
+
existed = open_storage
|
75
|
+
!db || pr(" existed=#{existed}\n")
|
76
|
+
if !existed
|
77
|
+
@header_data = alloc_buffer
|
78
|
+
BlockFile.write_int(@header_data, HDR_VERSION_, VERSION_)
|
79
|
+
BlockFile.write_int(@header_data, HDR_BLOCKSIZE_, block_size)
|
80
|
+
BlockFile.write_int(@header_data, HDR_RECYCLEINDEX_, 1)
|
81
|
+
append_or_replace(0, @header_data)
|
82
|
+
|
83
|
+
@recycle_data = alloc_buffer
|
84
|
+
append_or_replace(rdir_head_name, @recycle_data)
|
85
|
+
|
86
|
+
aux_flush
|
87
|
+
else
|
88
|
+
@header_data = read(0)
|
89
|
+
if BlockFile.read_int(@header_data,HDR_VERSION_) != VERSION_
|
90
|
+
raise ArgumentError,"bad version"
|
91
|
+
end
|
92
|
+
if BlockFile.read_int(@header_data,HDR_BLOCKSIZE_) != block_size
|
93
|
+
raise ArgumentError,"unexpected block size"
|
94
|
+
end
|
95
|
+
!db || puts(hex_dump_to_string(@header_data,'header data'))
|
96
|
+
@recycle_data = read(rdir_head_name)
|
97
|
+
end
|
98
|
+
existed
|
99
|
+
end
|
100
|
+
|
101
|
+
# Allocate a new block. First block allocated will have name = FIRST_BLOCK_ID.
|
102
|
+
# @param src block data to write; if null, allocates and writes zeros
|
103
|
+
# @return name of block
|
104
|
+
#
|
105
|
+
def alloc(src = nil)
|
106
|
+
|
107
|
+
db = false
|
108
|
+
!db || pr("blockfile alloc\n")
|
109
|
+
#!db || puts(self.to_s)
|
110
|
+
|
111
|
+
ensure_open
|
112
|
+
|
113
|
+
src ||= alloc_buffer
|
114
|
+
|
115
|
+
# get index of last recycle block directory
|
116
|
+
r_index = rdir_head_name
|
117
|
+
!db||pr(" last recycle block dir=%d\n",r_index)
|
118
|
+
|
119
|
+
# any entries remain in this directory?
|
120
|
+
n_ent = get_rdir_slots_used
|
121
|
+
!db||pr(" n_ent=%d\n",n_ent);
|
122
|
+
|
123
|
+
if n_ent == 0
|
124
|
+
prev_rb_block = get_rdir_next_name
|
125
|
+
|
126
|
+
if prev_rb_block > 0
|
127
|
+
# use directory as new block
|
128
|
+
ret = r_index
|
129
|
+
r_index = prev_rb_block
|
130
|
+
write_hdr(HDR_RECYCLEINDEX_, r_index)
|
131
|
+
read(prev_rb_block, @recycle_data)
|
132
|
+
!db||pr(" using directory as new block: %d\n",ret)
|
133
|
+
append_or_replace(ret, src)
|
134
|
+
else
|
135
|
+
ret = name_max
|
136
|
+
!db||pr(" using name_max %d\n",ret)
|
137
|
+
append_or_replace(ret, src)
|
138
|
+
end
|
139
|
+
else
|
140
|
+
slot = n_ent - 1;
|
141
|
+
ret = get_rdir_slot(slot)
|
142
|
+
!db || pr(" read slot %d to get %d\n",slot,ret)
|
143
|
+
set_rdir_slot(slot,0)
|
144
|
+
set_rdir_slots_used(slot)
|
145
|
+
append_or_replace(r_index, @recycle_data)
|
146
|
+
append_or_replace(ret,src)
|
147
|
+
end
|
148
|
+
ret
|
149
|
+
end
|
150
|
+
|
151
|
+
# Free up a block
|
152
|
+
def free(block_name)
|
153
|
+
ensure_open
|
154
|
+
|
155
|
+
raise(ArgumentError,"no such block: #{block_name}") if block_name >= name_max
|
156
|
+
|
157
|
+
slot = get_rdir_slots_used()
|
158
|
+
|
159
|
+
# if there is a free slot in the current recycle block, use it
|
160
|
+
|
161
|
+
if slot < get_rdir_capacity
|
162
|
+
set_rdir_slot(slot,block_name)
|
163
|
+
set_rdir_slots_used(slot+1)
|
164
|
+
append_or_replace(rdir_head_name, @recycle_data)
|
165
|
+
else
|
166
|
+
|
167
|
+
# use freed block as next recycle page
|
168
|
+
|
169
|
+
old_dir = rdir_head_name
|
170
|
+
|
171
|
+
write_hdr(HDR_RECYCLEINDEX_, block_name)
|
172
|
+
|
173
|
+
read(block_name, @recycle_data)
|
174
|
+
BlockFile.clear_block(@recycle_data)
|
175
|
+
# mark_rc_block
|
176
|
+
|
177
|
+
set_rdir_next_name(old_dir)
|
178
|
+
append_or_replace(block_name, @recycle_data)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def close
|
183
|
+
ensure_open
|
184
|
+
aux_flush
|
185
|
+
close_storage
|
186
|
+
|
187
|
+
@header_data = nil
|
188
|
+
@recycle_data = nil
|
189
|
+
@mem_file = nil
|
190
|
+
end
|
191
|
+
|
192
|
+
# Read one of the user values from the header
|
193
|
+
# @param int_index index of user value (0..3)
|
194
|
+
def read_user(int_index)
|
195
|
+
ensure_open
|
196
|
+
raise ArgumentError if !(int_index >= 0 && int_index < USER_HEADER_INTS)
|
197
|
+
BlockFile.read_int(@header_data, HDR_USERSTART_ + int_index)
|
198
|
+
end
|
199
|
+
|
200
|
+
# Write a user value
|
201
|
+
# @param int_index index of user value (0..3)
|
202
|
+
# @param value value to write
|
203
|
+
def write_user(int_index, value)
|
204
|
+
ensure_open
|
205
|
+
raise ArgumentError if !(int_index >= 0 && int_index < USER_HEADER_INTS)
|
206
|
+
BlockFile.write_int(@header_data, HDR_USERSTART_ + int_index, value)
|
207
|
+
@header_modified = true
|
208
|
+
end
|
209
|
+
|
210
|
+
def inspect
|
211
|
+
to_s
|
212
|
+
end
|
213
|
+
|
214
|
+
def to_s
|
215
|
+
s = ''
|
216
|
+
s << "BlockFile blockSize:#{block_size} "
|
217
|
+
if open?
|
218
|
+
s << "\n name_max=#{name_max}"
|
219
|
+
s << "\n rdir_head_name=#{rdir_head_name}"
|
220
|
+
|
221
|
+
s << "\n"
|
222
|
+
|
223
|
+
# Dump a map of currently allocated blocks
|
224
|
+
usage = {}
|
225
|
+
usage[0] = 'H'
|
226
|
+
ri = rdir_head_name
|
227
|
+
while ri != 0
|
228
|
+
usage[ri] = 'R'
|
229
|
+
rd = read(ri)
|
230
|
+
|
231
|
+
next_ri = BlockFile.read_int(rd, RC_PREV_DIR_NAME_)
|
232
|
+
used = BlockFile.read_int(rd, RC_ENTRIES_USED_)
|
233
|
+
used.times do |i|
|
234
|
+
rblock = BlockFile.read_int(rd,RC_ENTRIES_START_+i)
|
235
|
+
usage[rblock] = 'r'
|
236
|
+
end
|
237
|
+
ri = next_ri
|
238
|
+
end
|
239
|
+
|
240
|
+
row_size = 64
|
241
|
+
|
242
|
+
puts("------------- Block Map --------------")
|
243
|
+
name_max.times do |i|
|
244
|
+
if (i % row_size) == 0
|
245
|
+
printf("%04x: ",i)
|
246
|
+
elsif (i % 4 == 0)
|
247
|
+
print(' ')
|
248
|
+
end
|
249
|
+
label = usage[i]
|
250
|
+
label ||= '.'
|
251
|
+
print label
|
252
|
+
print "\n" if ((i+1) % row_size) == 0
|
253
|
+
end
|
254
|
+
print "\n" if (name_max % row_size != 0)
|
255
|
+
puts("--------------------------------------")
|
256
|
+
end
|
257
|
+
s
|
258
|
+
end
|
259
|
+
|
260
|
+
def dump(block_name)
|
261
|
+
b = read(block_name)
|
262
|
+
hex_dump(b,"Block #{block_name}")
|
263
|
+
end
|
264
|
+
|
265
|
+
# Create an array of bytes, all zeros, of length equal to this block file's block length
|
266
|
+
def alloc_buffer
|
267
|
+
zero_bytes(@block_size)
|
268
|
+
end
|
269
|
+
|
270
|
+
# Read an integer from a block of bytes
|
271
|
+
def BlockFile.read_int(block, int_offset)
|
272
|
+
# assert!(block)
|
273
|
+
j = int_offset*INT_BYTES_
|
274
|
+
|
275
|
+
# We must treat the most significant byte as a signed byte
|
276
|
+
high_byte = block[j].ord
|
277
|
+
if high_byte > 127
|
278
|
+
high_byte = high_byte - 256
|
279
|
+
end
|
280
|
+
(high_byte << 24) | (block[j+1].ord << 16) | (block[j+2].ord << 8) | block[j+3].ord
|
281
|
+
end
|
282
|
+
|
283
|
+
# Write an integer into a block of bytes
|
284
|
+
def BlockFile.write_int(block, int_offset, value)
|
285
|
+
j = int_offset * INT_BYTES_
|
286
|
+
block[j] = ((value >> 24) & 0xff).chr
|
287
|
+
block[j+1] = ((value >> 16) & 0xff).chr
|
288
|
+
block[j+2] = ((value >> 8) & 0xff).chr
|
289
|
+
block[j+3] = (value & 0xff).chr
|
290
|
+
end
|
291
|
+
|
292
|
+
# Clear block to zeros
|
293
|
+
def self.clear_block(block)
|
294
|
+
block[0..-1] = zero_bytes(block.size)
|
295
|
+
end
|
296
|
+
|
297
|
+
def BlockFile.copy_block(dest, src)
|
298
|
+
# assert!(dest && src)
|
299
|
+
dest[0..-1] = src
|
300
|
+
end
|
301
|
+
|
302
|
+
# -------------------------------------------------------
|
303
|
+
# These methods should be overridden by subclasses
|
304
|
+
# for block files that are not to be memory-only (as
|
305
|
+
# the default implementations assume)
|
306
|
+
|
307
|
+
# Read block from storage.
|
308
|
+
# @param block_name index of block
|
309
|
+
# @param dest_buffer where to store data; if nil, you should
|
310
|
+
# call alloc_buffer to create it
|
311
|
+
# @return buffer
|
312
|
+
#
|
313
|
+
def read(block_name, dest_buffer = nil)
|
314
|
+
db = false
|
315
|
+
# db = true
|
316
|
+
!db || pr("BlockFile read #{block_name}, memory version!\n")
|
317
|
+
|
318
|
+
dest_buffer ||= alloc_buffer
|
319
|
+
if block_name >= @mem_file.size
|
320
|
+
raise ArgumentError,"No such block name #{block_name} exists (size=#{@mem_file.size})"
|
321
|
+
end
|
322
|
+
|
323
|
+
src = @mem_file[block_name]
|
324
|
+
BlockFile.copy_block(dest_buffer, src)
|
325
|
+
!db || hex_dump(dest_buffer,"Contents of block #{block_name}")
|
326
|
+
dest_buffer
|
327
|
+
end
|
328
|
+
|
329
|
+
# Write block to storage.
|
330
|
+
# Name is either index of existing block, or
|
331
|
+
# number of existing blocks (to append to end of existing ones)
|
332
|
+
# @param block_name name of block
|
333
|
+
# @param src_buffer data to write
|
334
|
+
def write(block_name, src_buffer)
|
335
|
+
|
336
|
+
if block_name == @mem_file.size
|
337
|
+
@mem_file << alloc_buffer
|
338
|
+
end
|
339
|
+
BlockFile.copy_block(@mem_file[block_name], src_buffer)
|
340
|
+
end
|
341
|
+
|
342
|
+
# Open underlying storage; create it if necessary
|
343
|
+
# @return true if underlying storage already existed
|
344
|
+
def open_storage
|
345
|
+
@mem_file = []
|
346
|
+
false
|
347
|
+
end
|
348
|
+
|
349
|
+
# Close underlying storage
|
350
|
+
#
|
351
|
+
def close_storage
|
352
|
+
@mem_file = nil
|
353
|
+
end
|
354
|
+
|
355
|
+
# Flush underlying storage
|
356
|
+
#
|
357
|
+
def flush
|
358
|
+
end
|
359
|
+
|
360
|
+
# -------------------------------------------------------
|
361
|
+
|
362
|
+
# Get 1 + name of highest block ever created
|
363
|
+
def name_max
|
364
|
+
BlockFile.read_int(@header_data, HDR_MAXINDEX_)
|
365
|
+
end
|
366
|
+
|
367
|
+
private
|
368
|
+
|
369
|
+
def aux_flush
|
370
|
+
if @header_modified
|
371
|
+
append_or_replace(0, @header_data)
|
372
|
+
@header_modified = false
|
373
|
+
end
|
374
|
+
flush
|
375
|
+
end
|
376
|
+
|
377
|
+
def ensure_block_exists(name)
|
378
|
+
raise(ArgumentError,"no such block: #{name} (name_max=#{name_max})") if block_name >= name_max
|
379
|
+
end
|
380
|
+
|
381
|
+
def ensure_open
|
382
|
+
raise IllegalStateException if !open?
|
383
|
+
end
|
384
|
+
|
385
|
+
# Determine how many names can be stored in a single
|
386
|
+
# recycle directory block
|
387
|
+
#
|
388
|
+
def get_rdir_capacity
|
389
|
+
(@block_size / INT_BYTES_) - RC_ENTRIES_START_
|
390
|
+
end
|
391
|
+
|
392
|
+
def get_rdir_slots_used
|
393
|
+
BlockFile.read_int(@recycle_data, RC_ENTRIES_USED_)
|
394
|
+
end
|
395
|
+
|
396
|
+
def set_rdir_slots_used(n)
|
397
|
+
BlockFile.write_int(@recycle_data,RC_ENTRIES_USED_,n)
|
398
|
+
end
|
399
|
+
|
400
|
+
# Get name of next recycle directory block
|
401
|
+
# @return name of next, or 0 if there are no more
|
402
|
+
def get_rdir_next_name
|
403
|
+
BlockFile.read_int(@recycle_data,RC_PREV_DIR_NAME_)
|
404
|
+
end
|
405
|
+
|
406
|
+
def get_rdir_slot(offset)
|
407
|
+
BlockFile.read_int(@recycle_data,(RC_ENTRIES_START_+offset))
|
408
|
+
end
|
409
|
+
|
410
|
+
def set_rdir_slot(offset, value)
|
411
|
+
BlockFile.write_int(@recycle_data,(RC_ENTRIES_START_+offset),value)
|
412
|
+
end
|
413
|
+
|
414
|
+
def set_rdir_next_name(n)
|
415
|
+
BlockFile.write_int(@recycle_data,RC_PREV_DIR_NAME_,n)
|
416
|
+
end
|
417
|
+
|
418
|
+
# def mark_rc_block
|
419
|
+
# @recycle_data[RC_BLOCKTYPE_] = BLOCKTYPE_RECYCLE_.chr
|
420
|
+
# end
|
421
|
+
|
422
|
+
# Get name of first recycle directory block (they are connected as
|
423
|
+
# a singly-linked list)
|
424
|
+
#
|
425
|
+
def rdir_head_name
|
426
|
+
BlockFile.read_int(@header_data,HDR_RECYCLEINDEX_)
|
427
|
+
end
|
428
|
+
|
429
|
+
# Write a block; if we're appending a new block, increment
|
430
|
+
# the name_max field
|
431
|
+
#
|
432
|
+
def append_or_replace(block_name, src)
|
433
|
+
if (block_name < 0 || block_name > name_max)
|
434
|
+
raise ArgumentError,"write to nonexistent block #{block_name}; only #{name_max} in file"
|
435
|
+
end
|
436
|
+
|
437
|
+
if block_name == name_max
|
438
|
+
write_hdr(HDR_MAXINDEX_, block_name+1)
|
439
|
+
end
|
440
|
+
|
441
|
+
write(block_name, src)
|
442
|
+
end
|
443
|
+
|
444
|
+
# Write a value to a header field, mark header as modified
|
445
|
+
# @param field field number (HDR_xxx)
|
446
|
+
# @param value value to write
|
447
|
+
#
|
448
|
+
def write_hdr(field, value)
|
449
|
+
BlockFile.write_int(@header_data, field, value)
|
450
|
+
@header_modified = true
|
451
|
+
end
|
452
|
+
|
453
|
+
end
|