geotree 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ require_relative 'tools'
2
+ req 'geotree externalsort'
3
+ require 'tempfile'
4
+
5
+ module GeoTreeModule
6
+ # Support for buffering new points to a file, then shuffling the points
7
+ # before adding them to (one or more) geotrees.
8
+ #
9
+ class PtBuffer
10
+ include ExternalSortModule
11
+
12
+ # We support buffering the points to be added to the tree, so
13
+ # that the points can be shuffled into a random order.
14
+ # To do this shuffling, we sort them using a comparator that
15
+ # ideally induces a random ordering.
16
+ # One comparator that gives good results is to generate a CRC32
17
+ # hash for each point, and compare these hashes.
18
+
19
+ # A simpler and faster method is to just choose a random result
20
+ # for every comparison. Wikipedia cautions against this, that it
21
+ # gives very poor results and can even lead to infinite loops
22
+ # depending upon the sort algorithm used, since (I assume) the
23
+ # random result method doesn't induce a total ordering on the points.
24
+ #
25
+ # Despite this caution, I think the random result method is the
26
+ # way to go, since we don't need a mathematically pure or
27
+ # cryptographically secure shuffling, just one that yields a
28
+ # more-or-less balanced tree.
29
+ #
30
+
31
+ if false
32
+ require 'zlib'
33
+
34
+ def self.pt_hash_code(b)
35
+ buff,off = b
36
+ c = buff[off,DATAPOINT_BYTES]
37
+ Zlib::crc32(c)
38
+ end
39
+ PT_SHUFFLER_ = Proc.new do |x,y|
40
+ GeoTree.pt_hash_code(x) <=> GeoTree.pt_hash_code(y)
41
+ end
42
+ else
43
+ PT_SHUFFLER_ = Proc.new do |x,y|
44
+ rand(2) == 0 ? -1 : 1
45
+ end
46
+ end
47
+
48
+ # Construct an inactive buffer
49
+ # @param tree tree to receive the points; calls its add_buffered_point() method
50
+ # when the buffer is being closed
51
+ #
52
+ def initialize(tree)
53
+ @tree = tree
54
+ @buffering = false
55
+ @buff_file = nil
56
+ @buffered_count = 0
57
+ end
58
+
59
+ # Return true if buffer is active
60
+ def active
61
+ @buffering
62
+ end
63
+
64
+ # Change buffer's active state
65
+ def active=(val)
66
+ db = false
67
+
68
+ if @buffering != val
69
+
70
+ @buffering = val
71
+
72
+ # If we were buffering the points, then close the file,
73
+ # shuffle the points, and send them to the receiving tree(s).
74
+ if @buff_file
75
+ @buff_file.close
76
+
77
+ # Sort the buffered points into a random order
78
+ so = Sorter.new(@buff_file.path,DATAPOINT_BYTES, PT_SHUFFLER_)
79
+ so.sort
80
+
81
+ !db || pr(" opening chunk reader for #@buffered_count points\n")
82
+
83
+ @buff_file.open
84
+ @buff_file.binmode
85
+
86
+ r = ChunkReader.new(@buff_file, 0, DATAPOINT_BYTES * @buffered_count, DATAPOINT_BYTES)
87
+ while !r.done
88
+ by,off = r.peek
89
+ dp = GeoTree.read_data_point_from(by,off / INT_BYTES)
90
+ r.read
91
+ !db|| pr("adding data point: #{dp}\n")
92
+ @tree.add_buffered_point(dp)
93
+ end
94
+ @buff_file.close
95
+ end
96
+ @buff_file = nil
97
+ @buffered_count = 0
98
+ end
99
+
100
+ end
101
+
102
+ # Add point to buffer
103
+ def add(data_point)
104
+ if !active
105
+ @tree.add_buffered_point(data_point)
106
+ else
107
+ if @buffered_count == 0
108
+ @buff_file = Tempfile.new('_geotree_')
109
+ @buff_file.binmode
110
+ end
111
+
112
+ by = zero_bytes(DATAPOINT_BYTES)
113
+ GeoTree.write_data_point(data_point, by, 0)
114
+ nw = @buff_file.write(by)
115
+ raise IOError if nw != by.size
116
+ @buffered_count += 1
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,626 @@
1
+ require 'set'
2
+ require 'fileutils'
3
+
4
+ # Various utility and debug convenience functions.
5
+ #
6
+
7
+ # A string containing a single zero, with ASCII 8-bit encoding (i.e., plain old bytes)
8
+ ZERO_CHAR = "\0".force_encoding("ASCII-8BIT")
9
+
10
+ def zero_bytes(count)
11
+ ZERO_CHAR * count
12
+ end
13
+
14
+ # Convenience method to perform 'require_relative' on a set of files
15
+ #
16
+ # @param fileListStr space-delimited file/path items, without .rb extensions
17
+ # @param subdir optional path to files relative to this file
18
+ #
19
+ def req(fileListStr,subdir = nil)
20
+ fileListStr.split(' ').each do |x|
21
+ if subdir
22
+ x = File.join(subdir,x)
23
+ end
24
+ x += '.rb'
25
+ require_relative(x)
26
+ end
27
+ end
28
+
29
+ # Shorthand for printf(...)
30
+ # @param args passed to printf
31
+ def pr(*args)
32
+ printf(*args)
33
+ end
34
+
35
+
36
+ # Convert an object to a human-readable string,
37
+ # or <nil>
38
+ #
39
+ def d(arg)
40
+ arg.nil? ? "<nil>" : arg.inspect
41
+ end
42
+
43
+
44
+ # Convert an object to a human-readable string,
45
+ # by calling a type-appropriate function: da, dh, or just d.
46
+ # @param arg object
47
+ # @param indent optional indentation for pretty printing; if result
48
+ # spans multiple lines, each line should be indented by this amount
49
+ #
50
+ def d2(arg, indent = 0)
51
+ return da(arg, indent) if arg.is_a? Array
52
+ return dh(arg, indent) if arg.is_a? Hash
53
+ return df(arg) if arg.class == FalseClass || arg.class == TrueClass
54
+ return d(arg)
55
+ end
56
+
57
+ # Convert an object to a human-readable string, prefixed with its type
58
+ #
59
+ def dt(arg)
60
+ if arg.nil?
61
+ return "<nil>"
62
+ end
63
+ s = arg.class.to_s
64
+ s << ':'
65
+ s << arg.inspect
66
+ s
67
+ end
68
+
69
+ # Append a particular number of spaces to a string
70
+ def add_sp(s, indent = 0)
71
+ s << ' ' * indent
72
+ end
73
+
74
+ # Pretty-print an array,
75
+ # one element to a line
76
+ # @param indent indentation of each line, in spaces
77
+ def da(array, indent = 0)
78
+ return d(array) if !array
79
+ s = 'Array ['
80
+ indent += 2
81
+ array.each do |x|
82
+ s << "\n"
83
+ add_sp(s,indent)
84
+ s2 = d2(x, indent + 2)
85
+ s << s2
86
+ end
87
+ s << " ]"
88
+ s
89
+ end
90
+
91
+ # Pretty-print a hash,
92
+ # one element to a line
93
+ # @param indent indentation of each line, in spaces
94
+ def dh(hash, indent = 0)
95
+ return d(hash) if !hash
96
+ s = 'Hash {'
97
+ indent += 2
98
+ hash.each_pair do |key,val|
99
+ s2 = d(key)
100
+ s3 = d2(val, indent + 4)
101
+ s << "\n "
102
+ add_sp(s,indent)
103
+ s << s2.chomp << " => " << s3.chomp
104
+ end
105
+ s << " }"
106
+ s
107
+ end
108
+
109
+ # Generate debug description of a boolean value
110
+ # @param flag value to interpret as a boolean; prints 'T' iff not nil
111
+ # @param label optional label
112
+ def df(flag, label=nil)
113
+ s = ''
114
+ if label
115
+ s << label << ':'
116
+ end
117
+ s << (flag ? "T" : "F")
118
+ s << ' '
119
+ s
120
+ end
121
+
122
+
123
+
124
+ # Assert that a value is true. Should be considered a
125
+ # very temporary, debug-only option; it is slow and
126
+ # generates a warning that it is being called.
127
+ # @param cond condition
128
+ # @param msg generates additional message using printf(), if these arguments exist
129
+ def assert!(cond, *msg)
130
+ one_time_alert("warning",0,"Checking assertion")
131
+ if not cond
132
+ str = (msg.size == 0) ? "assertion error" : sprintf(*msg)
133
+ raise Exception, str
134
+ end
135
+ end
136
+ def unimp!(msg = nil)
137
+ msg2 = "Unimplemented code"
138
+ if msg
139
+ msg2 << ": " << msg
140
+ end
141
+ raise Exception, msg2
142
+ end
143
+
144
+
145
+
146
+ # Get a nice, concise description of the file and line
147
+ # of some caller within the stack.
148
+ #
149
+ # @param nSkip the number of items deep in the call stack to look
150
+ #
151
+ def get_caller_location(nSkip = 2)
152
+
153
+ filename = nil
154
+ linenumber = nil
155
+
156
+ if nSkip >= 0 && nSkip < caller.size
157
+ fi = caller[nSkip]
158
+
159
+ i = fi.index(':')
160
+ j = nil
161
+ if i
162
+ j = fi.index(':',i+1)
163
+ end
164
+ if j
165
+ pth = fi[0,i].split('/')
166
+ if pth.size
167
+ filename = pth[-1]
168
+ end
169
+ linenumber = fi[i+1,j-i-1]
170
+ end
171
+ end
172
+ if filename && linenumber
173
+ loc = filename + " ("+linenumber+")"
174
+ else
175
+ loc = "(UNKNOWN LOCATION)"
176
+ end
177
+ loc
178
+ end
179
+
180
+ # Set of alert strings that have already been reported
181
+ # (to avoid printing anything on subsequent invocations)
182
+ #
183
+ $AlertStrings = Set.new
184
+
185
+ # Print a message if it hasn't yet been printed,
186
+ # which includes the caller's location
187
+ #
188
+ # @param typeString e.g., "warning", "unimplemented"
189
+ # @param nSkip the number of levels deep that the caller is in the stack
190
+ # @param args if present, calls sprintf(...) with these to append to the message
191
+ #
192
+ def one_time_alert(typeString, nSkip, *args)
193
+ loc = get_caller_location(nSkip + 2)
194
+ s = "*** "+typeString+" " + loc
195
+ if args && args.size
196
+ s2 = sprintf(args[0], *args[1..-1])
197
+ msg = s + ": " + s2
198
+ else
199
+ msg = s
200
+ end
201
+
202
+ if $AlertStrings.add?(msg)
203
+ puts msg
204
+ end
205
+ end
206
+
207
+ # Print a 'warning' alert, one time only
208
+ # @param args if present, calls printf() with these
209
+ def warn(*args)
210
+ one_time_alert("warning",0, *args)
211
+ end
212
+
213
+ # Print an 'unimplemented' alert, one time only
214
+ # @param args if present, calls printf() with these
215
+ def unimp(*args)
216
+ one_time_alert("unimplemented", 0, *args)
217
+ end
218
+
219
+ # Write a string to a text file
220
+ #
221
+ def write_text_file(path, contents)
222
+ File.open(path, "wb") {|f| f.write(contents) }
223
+ end
224
+
225
+ # Read a file's contents, return as a string
226
+ #
227
+ def read_text_file(path)
228
+ contents = nil
229
+ File.open(path,"rb") {|f| contents = f.read }
230
+ contents
231
+ end
232
+
233
+ # Method that takes a code block as an argument to
234
+ # achieve the same functionality as Java/C++'s
235
+ # do {
236
+ # ...
237
+ # ... possibly with 'break' to jump to the end ...
238
+ # } while (false);
239
+ #
240
+ def block
241
+ yield
242
+ end
243
+
244
+ # Exception class for objects in illegal states
245
+ #
246
+ class IllegalStateException < Exception
247
+ end
248
+
249
+
250
+ def to_hex(value, num_digits=4)
251
+ s = sprintf("%x", value)
252
+ s.rjust(num_digits,'0')
253
+ end
254
+
255
+ def hex_dump(byte_array_or_string, title=nil, offset=0, length= -1, bytes_per_row=16, with_text=true)
256
+ ss = hex_dump_to_string(byte_array_or_string, title, offset, length, bytes_per_row, with_text)
257
+ puts ss
258
+ end
259
+
260
+ def hex_dump_to_string(byte_array_or_string, title=nil, offset=0, length= -1, bytes_per_row=16, with_text=true)
261
+
262
+ byte_array = byte_array_or_string
263
+ if byte_array.is_a? String
264
+ byte_array = byte_array.bytes.to_a
265
+ end
266
+
267
+ ss = ''
268
+
269
+ if title
270
+ ss << title << ":\n"
271
+ end
272
+
273
+ if length < 0
274
+ length = byte_array.size - offset
275
+ end
276
+
277
+ length = [length, byte_array.size - offset].min
278
+
279
+ max_addr = offset + length - 1
280
+ num_digits = 4
281
+ while (1 << (4 * num_digits)) <= max_addr
282
+ num_digits += 1
283
+ end
284
+
285
+ while true
286
+ ss << to_hex(offset, num_digits)
287
+ ss << ': '
288
+
289
+ chunk = [length, bytes_per_row].min
290
+ bytes_per_row.times do |i|
291
+ if i % 4 == 0
292
+ ss << ' '
293
+ end
294
+
295
+ if i < chunk
296
+ v = byte_array[offset + i]
297
+ ss << ((v != 0) ? to_hex(v,2) : '..')
298
+ ss << ' '
299
+ else
300
+ ss << ' '
301
+ end
302
+
303
+ end
304
+
305
+
306
+ if with_text
307
+ ss << ' |'
308
+ bytes_per_row.times do |i|
309
+ if i < chunk
310
+ v = byte_array[offset + i]
311
+ ss << ((v >= 32 && v < 127) ? v : '_')
312
+ end
313
+ end
314
+ ss << '|'
315
+ end
316
+ ss << "\n"
317
+
318
+ length -= chunk
319
+ offset += chunk
320
+ break if length <= 0
321
+ end
322
+ ss
323
+ end
324
+
325
+ $prevTime = nil
326
+
327
+ # Calculate time elapsed, in seconds, from last call to this function;
328
+ # if it's never been called, returns zero
329
+ def elapsed
330
+ curr = Time.now.to_f
331
+ elap = 0
332
+ if $prevTime
333
+ elap = curr - $prevTime
334
+ end
335
+ $prevTime = curr
336
+ elap
337
+ end
338
+
339
+ # Construct a string from an array of bytes
340
+ # @param byte_array array of bytes, or string (in which case it
341
+ # returns it unchanged)
342
+ #
343
+ def bytes_to_str(byte_array)
344
+ return byte_array if byte_array.is_a? String
345
+
346
+ byte_array.pack('C*')
347
+ end
348
+
349
+ # Construct an array of bytes from a string
350
+ # @param str string, or array of bytes (in which case it
351
+ # returns it unchanged)
352
+ #
353
+ def str_to_bytes(str)
354
+ return str if str.is_a? Array
355
+ str.bytes
356
+ end
357
+
358
+ # Get directory entries, excluding '.' and '..'
359
+ #
360
+ def dir_entries(path)
361
+ ents = Dir.entries(path)
362
+ ents.reject!{|entry| entry == '.' || entry == '..'}
363
+ end
364
+
365
+ # Convenience method for setting 'db' true within methods,
366
+ # and to print a one-time warning if so.
367
+ # @param val value to set db to; it is convenient to disable
368
+ # debug printing quickly by adding a zero, e.g., 'warndb 0'
369
+ #
370
+ def warndb(val = true)
371
+ if !val || val == 0
372
+ return false
373
+ end
374
+ one_time_alert("warning",1, "Debug printing enabled")
375
+ true
376
+ end
377
+
378
+
379
+ def int_to_bytes(x)
380
+ [(x >> 24) & 0xff, (x >> 16) & 0xff, (x >> 8) & 0xff, x & 0xff]
381
+ end
382
+
383
+ def short_to_bytes(x)
384
+ [(x >> 8) & 0xff, x & 0xff]
385
+ end
386
+
387
+ # Decode a short from an array of bytes (big-endian).
388
+ # @param ba array of bytes
389
+ # @param offset offset of first (most significant) byte
390
+ #
391
+ def short_from_bytes(ba, offset=0)
392
+ (ba[offset] << 8) | ba[offset + 1]
393
+ end
394
+
395
+ # Decode an int from an array of bytes (big-endian).
396
+ # @param ba array of bytes
397
+ # @param offset offset of first (most significant) byte
398
+ #
399
+ def int_from_bytes(ba, offset=0)
400
+ (((((ba[offset] << 8) | ba[offset + 1]) << 8) | \
401
+ ba[offset + 2]) << 8) | ba[offset + 3]
402
+ end
403
+
404
+
405
+ # Delete a file or directory, if it exists.
406
+ # Caution! If directory, deletes all files and subdirectories.
407
+ def remove_file_or_dir(pth)
408
+ if File.directory?(pth)
409
+ FileUtils.remove_dir(pth)
410
+ elsif File.file?(pth)
411
+ FileUtils.remove_file(pth)
412
+ end
413
+ end
414
+
415
+ # Transform string to 8-bit ASCII (i.e., just treat each byte as-is)
416
+ #
417
+ def to_ascii8(str)
418
+ str.force_encoding("ASCII-8BIT")
419
+ end
420
+
421
+ # Verify that a string is encoded as ASCII-8BIT
422
+ def simple_str(s)
423
+ if s.encoding.name != 'ASCII-8BIT' && s.encoding.name != 'UTF-8'
424
+ pr("string [%s]\n encoding is %s,\n expected ASCII-8BIT\n",s,s.encoding.name)
425
+ assert!(false)
426
+ end
427
+ end
428
+
429
+ # Truncate or pad string so it has a particular size
430
+ #
431
+ # @param s input string
432
+ # @param size
433
+ # @param pad padding character to use if string needs to grow
434
+ # @return modified string
435
+ #
436
+ def str_sized(s, size, pad="\0")
437
+ s[0...size].ljust(size,pad)
438
+ end
439
+
440
+ # Determine if running on the Windows operating system.
441
+ # Note: there is some debate about the best way to do this.
442
+ #
443
+ def windows?
444
+ if !defined? $__windows__
445
+ $__windows__ = (RUBY_PLATFORM =~ /mswin/)
446
+ end
447
+ $__windows__
448
+ end
449
+
450
+
451
+ # Convenience method to detect if a script is being run
452
+ # e.g. as a 'main' method (for debug purposes only).
453
+ # If so, it changes the current directory to the
454
+ # directory containing the script.
455
+ #
456
+ # @param file pass __FILE__ in here
457
+ # @return true if so
458
+ #
459
+ def main?(file)
460
+
461
+ scr = $0
462
+
463
+ # The test/unit framework seems to be adding a suffix ": xxx#xxx.."
464
+ # to the .rb filename, so adjust in this case
465
+ i = scr.index(".rb: ")
466
+ if i
467
+ scr = scr[0...i+3]
468
+ end
469
+
470
+ if (ret = (file == scr))
471
+ Dir.chdir(File.dirname(file))
472
+ end
473
+ ret
474
+ end
475
+
476
+ if defined? Test::Unit
477
+
478
+ # A simple extension to Ruby's Test::Unit class that provides
479
+ # suite-level setup/teardown methods.
480
+ #
481
+ # If test suite functionality is desired within a script,
482
+ # then require 'test/unit' before requiring 'tools.rb'.
483
+ # This will cause the following class, MyTestSuite, to be defined.
484
+ #
485
+ # The user's test script can define subclasses of this,
486
+ # and declare test methods with the name 'test_xxxx', where
487
+ # xxxx is lexicographically between 01 and zz.
488
+ #
489
+ # There are two levels of setup/teardown called : suite level, and
490
+ # method level. For example, if the user's test class performs two tests:
491
+ #
492
+ # def test_b ... end
493
+ # def test_c ... end
494
+ #
495
+ # Then the test framework will make these calls:
496
+ #
497
+ # suite_setup
498
+ #
499
+ # method_setup
500
+ # test_b
501
+ # method_teardown
502
+ #
503
+ # method_setup
504
+ # test_c
505
+ # method_teardown
506
+ #
507
+ # suite_teardown
508
+ #
509
+ # Notes
510
+ # -----
511
+ # 1) The usual setup / teardown methods should NOT be overridden; instead,
512
+ # use the method_xxx alternatives.
513
+ #
514
+ # 2) The base class implementations of method_/suite_xxx do nothing.
515
+ #
516
+ # 3) The number of test cases reported may be higher than you expect, since
517
+ # there are additional test methods defined by the TestSuite class to
518
+ # implement the suite setup / teardown functionality.
519
+ #
520
+ # 4) Avoid naming test methods that fall outside of test_01 ... test_zz.
521
+ #
522
+ class MyTestSuite < Test::Unit::TestCase
523
+
524
+ # This is named to be the FIRST test called. It
525
+ # will do suite-level setup, and nothing else.
526
+ def test_00_setup
527
+ @@suiteSetup = true
528
+ suite_setup()
529
+ end
530
+
531
+ # This is named to be the LAST test called. It
532
+ # will do suite-level teardown, and nothing else.
533
+ def test_zzzzzz_teardown
534
+ suite_teardown()
535
+ @@suiteSetup = false
536
+ end
537
+
538
+ # True if called within suite-level setup/teardown window
539
+ def _suite_active?
540
+ !(@__name__ == "test_00_setup" || @__name__ == "test_zzzzzz_teardown")
541
+ end
542
+
543
+ def setup
544
+ if _suite_active?
545
+ # If only a specific test was requested, the
546
+ # suite setup may not have run... if not, do it now.
547
+ if !defined? @@suiteSetup
548
+ suite_setup
549
+ end
550
+ return
551
+ end
552
+ method_setup
553
+ end
554
+
555
+ def teardown
556
+ if _suite_active?
557
+ if !defined? @@suiteSetup
558
+ suite_teardown
559
+ end
560
+ return
561
+ end
562
+ method_teardown
563
+ end
564
+
565
+ def suite_setup
566
+ end
567
+
568
+ def suite_teardown
569
+ end
570
+
571
+ def method_setup
572
+ end
573
+
574
+ def method_teardown
575
+ end
576
+ end
577
+ end
578
+
579
+ # Mark all constants ending with '_' as private constants
580
+ #
581
+ # @param entity the class to examine
582
+ # @param add_non_suffix_versions if true, for each constant ABC_ found, also
583
+ # defines a constant ABC with the same value that is also private
584
+ #
585
+ def privatize(entity, add_non_suffix_versions = false)
586
+
587
+ db = false
588
+
589
+ # First command defines constants ABC = n for each constant ABC_ = n;
590
+ # Second declares both versions to be private
591
+
592
+ cmd1 = nil
593
+ cmd2 = nil
594
+
595
+ entity.constants.each do |c|
596
+ nm = c.to_s
597
+
598
+ if nm.end_with?('_')
599
+ nm_small = nm[0..-2]
600
+
601
+ if !cmd2
602
+ if add_non_suffix_versions
603
+ cmd1 = ''
604
+ end
605
+ cmd2 = 'private_constant '
606
+ else
607
+ cmd2 << ','
608
+ end
609
+
610
+
611
+ !cmd1 || cmd1 << entity.to_s << '::' << nm_small << '=' << entity.const_get(c).to_s << "\n"
612
+ !cmd1 || cmd2 << ':' << nm_small << ','
613
+ cmd2 << ':' << nm
614
+ end
615
+ end
616
+
617
+ if cmd2
618
+ if cmd1
619
+ !db || pr("about to eval:\n%s\n",cmd1)
620
+ eval(cmd1)
621
+ end
622
+ !db || pr("about to eval:\n%s\n",cmd2)
623
+ eval(cmd2)
624
+ end
625
+ end
626
+