whispr 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,14 @@
1
+ # Whispr
2
+
3
+ Whispr is a port of the [Graphite](http://graphite.wikidot.com/) [Whisper](https://github.com/graphite-project/) library in Ruby.
4
+
5
+ It supports the basic create, update and read (fetch,dump) operations.
6
+
7
+
8
+ ## TODO
9
+
10
+ - unit tests
11
+ - whispr-resize
12
+ - whispr-merge
13
+ - whispr-set-aggregation-method
14
+ - rrd2whispr
data/bin/whispr-create ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "whispr"
4
+ require "optparse"
5
+
6
+ options = {:xff => 0.5, :overwrite => false, :aggregationMethod => :average}
7
+ opt_parser = OptionParser.new do |opts|
8
+ opts.on("--xFilesFactor Float", Float, "default 0.5") { |x| options[:xff] = x }
9
+ opts.on("--aggregationMethod String", String, "function to use when aggregating values #{Whispr::AGGR_TYPES[1..-1].map(&:to_s).inspect}") do |aggr|
10
+ aggr = aggr.intern
11
+ unless Whispr::AGGR_TYPES[1..-1].include?(aggr)
12
+ $stderr.puts "aggregationMethod must be one of: #{Whispr::AGGR_TYPES[1..-1]}"
13
+ exit 1
14
+ end
15
+ options[:aggregationMethod] = aggr
16
+ end
17
+ opts.on("--overwrite") {|o| options[:overwrite] = o }
18
+ opts.banner += " path timePerPoint:timeToStore [timePerPoint:timeToStore]*"
19
+ end
20
+ opt_parser.parse!
21
+
22
+ if ARGV.length < 2
23
+ $stderr.puts opt_parser
24
+ exit 1
25
+ end
26
+
27
+ path = ARGV.shift
28
+ unless File.exists?(File.dirname(path))
29
+ $stderr.puts "#{File.dirname(path)} does not exists"
30
+ exit 1
31
+ end
32
+ File.unlink if options[:overwrite] && File.exists?(path)
33
+
34
+ Whispr.create(path, ARGV.map{|a| Whispr.parse_retention_def(a) }, options)
data/bin/whispr-dump ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "whispr"
4
+ require "stringio"
5
+
6
+ if ARGV[0].nil? || ARGV[0] == "-"
7
+ data = StringIO.new(STDIN.read)
8
+ else
9
+ abort("data file #{ARGV[0]} does not exist") unless File.exist?(ARGV[0])
10
+ data = File.open(ARGV[0], "r")
11
+ end
12
+
13
+ whisper = Whispr.new(data)
14
+
15
+ puts "Meta data:"
16
+ puts " aggregation method: #{whisper.info[:aggregationMethod]}"
17
+ puts " max retention: #{whisper.info[:maxRetention]}"
18
+ puts " xFilesFactor: #{whisper.info[:xFilesFactor]}"
19
+
20
+ whisper.archives.each.with_index do |archive, i|
21
+ puts "\nArchive #{i} info:"
22
+ puts " offset #{archive.offset}"
23
+ puts " seconds per point #{archive.spp}"
24
+ puts " points #{archive.points}"
25
+ puts " retention #{archive.retention}"
26
+ puts " size #{archive.size}"
27
+ end
28
+
29
+ whisper.archives.each.with_index do |archive, i|
30
+ puts "\nArchive #{i} data: "
31
+ archive.to_enum.each do |point, timestamp, value|
32
+ puts sprintf("#{point}, #{timestamp}, %10.35g", value)
33
+ end
34
+ end
data/bin/whispr-fetch ADDED
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "whispr"
4
+ require "stringio"
5
+ require "optparse"
6
+ require "json"
7
+
8
+ options = {:from => Time.new - 86400, :until => Time.new}
9
+ OptionParser.new do |opts|
10
+ opts.on("--from Integer", "Unix epoch time of the beginning of your requested interval (default: 24 hours ago") do |f|
11
+ if ["n", "N"].include?(f[0])
12
+ f = Time.new + f[1..-1].to_i
13
+ end
14
+ options[:from] = Time.at(f.to_i)
15
+ end
16
+ opts.on("--until Integer", "Unix epoch time of the end of your requested interval (default: now)") do |u|
17
+ if ["n", "N"].include?(u[0])
18
+ u = Time.new + u[1..-1].to_i
19
+ end
20
+ options[:until] = Time.at(u.to_i)
21
+ end
22
+ opts.on("--pretty", "Show human-readable timestamps instead of unix times") { options[:pretty] = true }
23
+ opts.on("--json", "Output results in JSON form") { options[:json] = true }
24
+ end.parse!
25
+
26
+ if ARGV[0].nil? || ARGV[0] == "-"
27
+ data = StringIO.new(STDIN.read)
28
+ else
29
+ abort("data file #{ARGV[0]} does not exist") unless File.exist?(ARGV[0])
30
+ data = File.open(ARGV[0], "r")
31
+ end
32
+
33
+
34
+ (start_t, end_t, step), values = Whispr.new(data).fetch(options[:from], options[:until])
35
+ if options[:json]
36
+ puts JSON.dump({:start => start_t, :end => end_t, :step => step, :values => values })
37
+ else
38
+ t = start_t
39
+ values.each do |value|
40
+ time = options[:pretty] ? Time.at(t) : t
41
+ value = value ? sprintf("%f", value) : 'None'
42
+ puts "#{time}\t#{value}"
43
+ t += step
44
+ end
45
+ end
data/bin/whispr-info ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "whispr"
4
+ require "stringio"
5
+
6
+ if ARGV[0].nil? || ARGV[0] == "-"
7
+ data = StringIO.new(STDIN.read)
8
+ else
9
+ abort("data file #{ARGV[0]} does not exist") unless File.exist?(ARGV[0])
10
+ data = File.open(ARGV[0], "r")
11
+ end
12
+
13
+ info = Whispr.new(data).info
14
+ info[:fileSize] = data.size
15
+
16
+ unless (fields = Array(ARGV[1..-1])).empty?
17
+ fields.each do |field|
18
+ unless info.include?(field.to_sym)
19
+ puts "Unknown field '#{field}'. Valid fields are #{info.keys.join(", ")}"
20
+ exit 1
21
+ end
22
+ puts info[field]
23
+ end
24
+ exit 0
25
+ end
26
+
27
+ archives = info.delete(:archives)
28
+
29
+ info.each { |k,v| puts "#{k}: #{v}" }
30
+
31
+ archives.each_index do |i|
32
+ puts "\nArchive #{i}"
33
+ archives[i].each { |k,v| puts "#{k}: #{v}" }
34
+ end
data/bin/whispr-update ADDED
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "whispr"
4
+ require "stringio"
5
+
6
+ points = ARGV.select{|e| e.include?(":") }
7
+ if points.empty?
8
+ puts "#{File.basename($0)} path timestamp:value [timestamp:value]*"
9
+ exit -1
10
+ end
11
+
12
+ file = ARGV - points
13
+ if file.empty? || file == "-"
14
+ data = StringIO.new(STDIN.read, "r+")
15
+ else
16
+ abort("data file #{file[0]} does not exist") unless File.exist?(file[0])
17
+ data = File.open(file[0], "r+")
18
+ end
19
+
20
+ now = Time.now.to_i
21
+ points = points.map{|p| p.split(":") }.map{|t,v| [t == 'N' ? now : t.to_i, v.to_f] }
22
+
23
+ Whispr.new(data).update(*points)
data/lib/whispr.rb ADDED
@@ -0,0 +1,456 @@
1
+ require 'whispr/version'
2
+ require 'whispr/archive'
3
+ require 'stringio'
4
+
5
+ class Whispr
6
+ module Error; end
7
+ class WhisprError < StandardError
8
+ include Error
9
+ def self.exception(e)
10
+ return e if e.nil? || e == self
11
+ ne = new(e.to_s)
12
+ ne.set_backtrace e.backtrace if e.respond_to?(:backtrace)
13
+ ne
14
+ end
15
+ end
16
+ class CorruptWhisprFile < WhisprError; end
17
+ class InvalidTimeInterval < WhisprError; end
18
+ class TimestampNotCovered < WhisprError; end
19
+ class InvalidAggregationMethod < WhisprError; end
20
+ class ArchiveBoundaryExceeded < WhisprError; end
21
+ class ValueError < WhisprError; end
22
+ class InvalidConfiguration < WhisprError; end
23
+
24
+ LONG_FMT = "N"
25
+ METADATA_FMT = "#{LONG_FMT*2}g#{LONG_FMT}"
26
+ METADATA_SIZE = 16
27
+ ARCHIVE_INFO_FMT = LONG_FMT * 3
28
+ ARCHIVE_INFO_SIZE = 12
29
+ POINT_FMT = "#{LONG_FMT}G"
30
+ POINT_SIZE = 12
31
+ CHUNK_SIZE = 16384
32
+
33
+ AGGR_TYPES = [
34
+ :_,
35
+ :average,
36
+ :sum,
37
+ :last,
38
+ :max,
39
+ :min
40
+ ].freeze
41
+
42
+ class << self
43
+
44
+ def unitMultipliers
45
+ @unitMultipliers ||= {
46
+ 's' => 1,
47
+ 'm' => 60,
48
+ 'h' => 3600,
49
+ 'd' => 86400,
50
+ 'w' => 86400 * 7,
51
+ 'y' => 86400 * 365
52
+ }
53
+ end
54
+
55
+ def parse_retention_def(rdef)
56
+ raise ArgumentError.new("precision and points must be separated by a ':'") unless rdef && rdef.include?(":")
57
+ (precision, points) = rdef.strip.split(':')
58
+ if precision.to_i.to_s == precision
59
+ precision = precision.to_i * unitMultipliers['s']
60
+ else
61
+ _, precision, unit = precision.split(/([\d]+)/)
62
+ unit = 's' unless unit
63
+ raise ValueError.new("Invalid precision specification unit #{unit}") unless unitMultipliers[unit[0]]
64
+ precision = precision.to_i * unitMultipliers[unit[0]]
65
+ end
66
+
67
+ if points.to_i.to_s == points
68
+ points = points.to_i
69
+ else
70
+ _, points, unit = points.split(/([\d]+)/)
71
+ raise ValueError.new("Invalid retention specification unit #{unit}") unless unitMultipliers[unit[0]]
72
+ points = points.to_i * unitMultipliers[unit[0]] / precision
73
+ end
74
+
75
+ [precision, points]
76
+ end
77
+
78
+ # Create whipser file.
79
+ # @param [String] path
80
+ # @param [Array] archiveList each archive is an array with two elements: [secondsPerPoint,numberOfPoints]
81
+ # @param [Hash] opts
82
+ # @option opts [Float] :xff the fraction of data points in a propagation interval that must have known values for a propagation to occur
83
+ # @option opts [Symbol] :aggregationMethod the function to use when propogating data; must be one of AGGR_TYPES[1..-1]
84
+ # @option opts [Boolean] :overwrite (false)
85
+ # @raise [InvalidConfiguration] if the archiveList is inavlid, or if 'path' exists and :overwrite is not true
86
+ # @see Whsipr.validateArchiveList
87
+ def create(path, archiveList, opts = {})
88
+ opts = {:xff => 0.5, :aggregationMethod => :average, :sparse => false, :overwrite => false}.merge(opts)
89
+ unless AGGR_TYPES[1..-1].include?(opts[:aggregationMethod])
90
+ raise InvalidConfiguration.new("aggregationMethod must be one of #{AGGR_TYPES[1..-1]}")
91
+ end
92
+
93
+ validateArchiveList!(archiveList)
94
+ raise InvalidConfiguration.new("File #{path} already exists!") if File.exists?(path) && !opts[:overwrite]
95
+
96
+ # if file exists it will be truncated
97
+ File.open(path, "wb") do |fh|
98
+ fh.flock(File::LOCK_EX)
99
+ aggregationType = AGGR_TYPES.index(opts[:aggregationMethod])
100
+ oldest = archiveList.map{|spp, points| spp * points }.sort.last
101
+ packedMetadata = [aggregationType, oldest, opts[:xff], archiveList.length].pack(METADATA_FMT)
102
+ fh.write(packedMetadata)
103
+ headerSize = METADATA_SIZE + (ARCHIVE_INFO_SIZE * archiveList.length)
104
+ archiveOffsetPointer = headerSize
105
+ archiveList.each do |spp, points|
106
+ archiveInfo = [archiveOffsetPointer, spp, points].pack(ARCHIVE_INFO_FMT)
107
+ fh.write(archiveInfo)
108
+ archiveOffsetPointer += (points * POINT_SIZE)
109
+ end
110
+
111
+ if opts[:sparse]
112
+ fh.seek(archiveOffsetPointer - headerSize - 1)
113
+ fh.write("\0")
114
+ else
115
+ remaining = archiveOffsetPointer - headerSize
116
+ zeroes = "\x00" * CHUNK_SIZE
117
+ while remaining > CHUNK_SIZE
118
+ fh.write(zeroes)
119
+ remaining -= CHUNK_SIZE
120
+ end
121
+ fh.write(zeroes[0..remaining])
122
+ end
123
+
124
+ fh.flush
125
+ fh.fsync rescue nil
126
+ end
127
+
128
+ new(path)
129
+ end
130
+
131
+ # Is the provided archive list valid?
132
+ # @return [Boolean] true, false
133
+ def validArchiveList?(archiveList)
134
+ !(!!(validateArchiveList!(archiveList) rescue true))
135
+ end
136
+
137
+ # Validate an archive list without raising an exception
138
+ # @return [NilClass, InvalidConfiguration]
139
+ def validateArchiveList(archiveList)
140
+ validateArchiveList!(archiveList) rescue $!
141
+ end
142
+
143
+ # Validate an archive list
144
+ # An ArchiveList must:
145
+ # 1. Have at least one archive config. Example: [60, 86400]
146
+ # 2. No archive may be a duplicate of another.
147
+ # 3. Higher precision archives' precision must evenly divide all lower precision archives' precision.
148
+ # 4. Lower precision archives must cover larger time intervals than higher precision archives.
149
+ # 5. Each archive must have at least enough points to consolidate to the next archive
150
+ # @raise [InvalidConfiguration]
151
+ # @return [nil]
152
+ def validateArchiveList!(archiveList)
153
+ raise InvalidConfiguration.new("you must specify at least on archive configuration") if Array(archiveList).empty?
154
+ archiveList = archiveList.sort{|a,b| a[0] <=> b[0] }
155
+ archiveList[0..-2].each_with_index do |archive, i|
156
+ nextArchive = archiveList[i+1]
157
+ unless archive[0] < nextArchive[0]
158
+ raise InvalidConfiguration.new("A Whipser database may not be configured " +
159
+ "having two archives with the same precision " +
160
+ "(archive#{i}: #{archive}, archive#{i+1}: #{nextArchive})")
161
+ end
162
+ unless nextArchive[0] % archive[0] == 0
163
+ raise InvalidConfiguration.new("Higher precision archives' precision must " +
164
+ "evenly divide all lower precision archives' precision " +
165
+ "(archive#{i}: #{archive}, archive#{i+1}: #{nextArchive})")
166
+ end
167
+
168
+ retention = archive[0] * archive[1]
169
+ nextRetention = nextArchive[0] * nextArchive[1]
170
+ unless nextRetention > retention
171
+ raise InvalidConfiguration.new("Lower precision archives must cover larger " +
172
+ "time intervals than higher precision archives " +
173
+ "(archive#{i}: #{archive[1]}, archive#{i + 1}:, #{nextArchive[1]})")
174
+ end
175
+
176
+ archivePoints = archive[1]
177
+ pointsPerConsolidation = nextArchive[0] / archive[0]
178
+ unless archivePoints >= pointsPerConsolidation
179
+ raise InvalidConfiguration.new("Each archive must have at least enough points " +
180
+ "to consolidate to the next archive (archive#{i+1} consolidates #{pointsPerConsolidation} of " +
181
+ "archive#{i}'s points but it has only #{archivePoints} total points)")
182
+ end
183
+ end
184
+ nil
185
+ end
186
+ end
187
+
188
+ # @return [File, StringIO] file handle of the whisper file
189
+ attr_reader :fh
190
+
191
+ attr_accessor :auto_flush
192
+ alias :auto_flush? :auto_flush
193
+
194
+ def initialize(file, auto_flush = true)
195
+ @fh = file.is_a?(File) || file.is_a?(StringIO) ? file : File.open(file, 'r+')
196
+ @fh.binmode
197
+ @auto_flush = auto_flush
198
+ end
199
+
200
+ # @return [Hash]
201
+ def header
202
+ @header ||= read_header
203
+ end
204
+ alias :info :header
205
+
206
+ # @return [Array] Archives
207
+ # @see Whispr::Archive
208
+ def archives
209
+ @archives ||= info[:archives].map { |a| Archive.new(self, a) }
210
+ end
211
+
212
+
213
+ # Retrieve values from a whisper file within the given time window.
214
+ #
215
+ # The most appropriate archive within the whisper file will be chosen. The
216
+ # return value will be a two element Array. The first element will be a
217
+ # three element array containing the start time, end time and step. The
218
+ # second element will be a N element array containing each value at each
219
+ # step period.
220
+ #
221
+ # @see Archive#fetch
222
+ def fetch(fromTime, untilTime = Time.new)
223
+ fromTime = fromTime.to_i
224
+ untilTime = untilTime.to_i
225
+ now = Time.now.to_i
226
+ oldest = header[:maxRetention]
227
+ fromTime = oldest if fromTime < oldest
228
+ raise InvalidTimeInterval.new("Invalid time interval") unless fromTime < untilTime
229
+ untilTime = now if untilTime > now || untilTime < fromTime
230
+
231
+ diff = now - fromTime
232
+ archive = archives.find{|a| a.retention >= diff }
233
+ return archive.fetch(fromTime, untilTime)
234
+ end
235
+
236
+ # Update one or many points
237
+ # Each element of the points list should be a two dimensional Array where
238
+ # the first element is a timestamp and the second element is a value.
239
+ def update(*points)
240
+ return if points.empty?
241
+ # TODO lock the file
242
+ if points.length == 1
243
+ update_one(points[0][1], points[0][0])
244
+ else
245
+ update_many(points)
246
+ end
247
+ end
248
+
249
+ private
250
+
251
+
252
+ def read_header
253
+ o_pos = @fh.pos
254
+
255
+ begin
256
+ @fh.pos = 0
257
+ metadata = @fh.read(METADATA_SIZE)
258
+ aggr_type, max_retention, xff, arch_count = metadata.unpack(METADATA_FMT)
259
+ archives = arch_count.times.map do |i|
260
+ arch_info = @fh.read(ARCHIVE_INFO_SIZE)
261
+ offset, s_per_pnt, points = arch_info.unpack(ARCHIVE_INFO_FMT)
262
+ { :retention => s_per_pnt * points,
263
+ :secondsPerPoint => s_per_pnt,
264
+ :points => points,
265
+ :size => points * POINT_SIZE,
266
+ :offset => offset
267
+ }
268
+ end
269
+ rescue => e
270
+ raise CorruptWhisprFile.exception(e)
271
+ ensure
272
+ @fh.pos = o_pos
273
+ end
274
+
275
+ { :maxRetention => max_retention,
276
+ :xFilesFactor => xff,
277
+ :aggregationMethod => AGGR_TYPES[aggr_type],
278
+ :archives => archives
279
+ }
280
+ end
281
+
282
+ def update_one(value, timestamp = nil)
283
+ now = Time.new.to_i
284
+ timestamp = now if timestamp.nil?
285
+ diff = now - timestamp
286
+ if !(diff < header[:maxRetention] && diff >= 0)
287
+ raise TimestampNotCovered, "Timestamp (#{timestamp}) not covered by any archives in this database"
288
+ end
289
+
290
+ aidx = (0 ... archives.length).find { |i| archives[i].retention > diff }
291
+ archive = archives[aidx]
292
+ lowerArchives = archives[aidx + 1 .. - 1]
293
+
294
+ myInterval = timestamp - (timestamp % archive.spp)
295
+ myPackedPoint = [myInterval, value].pack(POINT_FMT)
296
+ @fh.seek(archive.offset)
297
+ baseInterval, baseValue = @fh.read(POINT_SIZE).unpack(POINT_FMT)
298
+
299
+ if baseInterval == 0
300
+ # this file's first update
301
+ @fh.seek(archive.offset)
302
+ @fh.write(myPackedPoint)
303
+ baseInterval, baseValue = myInterval, value
304
+ else
305
+ timeDistance = myInterval - baseInterval
306
+ pointDistance = timeDistance / archive.spp
307
+ byteDistance = pointDistance * POINT_SIZE
308
+ myOffset = archive.offset + (byteDistance % archive.size)
309
+ @fh.seek(myOffset)
310
+ @fh.write(myPackedPoint)
311
+ end
312
+
313
+ higher = archive
314
+ lowerArchives.each do |lower|
315
+ break unless propagate(myInterval, higher, lower)
316
+ higher = lower
317
+ end
318
+
319
+ @fh.flush if auto_flush?
320
+ end
321
+
322
+ def update_many(points)
323
+ # order points by timestamp, newest first
324
+ points = points.map{|ts, v| [ts.to_i, v.to_f ] }.sort {|b,a| a[0] <=> b[0] }
325
+ now = Time.new.to_i
326
+ archives = self.archives.to_enum
327
+ currentArchive = archives.next
328
+ currentPoints = []
329
+ points.each do |point|
330
+ age = now - point[0]
331
+ while currentArchive.retention < age
332
+ unless currentPoints.empty?
333
+ currentPoints.reverse! # put points in chronological order
334
+ currentArchive.update_many(currentPoints)
335
+ currentPoints = []
336
+ end
337
+ begin
338
+ currentArchive = archives.next
339
+ rescue StopIteration
340
+ currentArchive = nil
341
+ break
342
+ end
343
+ end
344
+ # drop remaining points that don't fit in the database
345
+ break unless currentArchive
346
+
347
+ currentPoints << point
348
+ end
349
+
350
+ if currentArchive && !currentPoints.empty?
351
+ # don't forget to commit after we've checked all the archives
352
+ currentPoints.reverse!
353
+ currentArchive.update_many(currentPoints)
354
+ end
355
+
356
+ @fh.flush if auto_flush?
357
+ end
358
+
359
+ def propagate(timestamp, higher, lower)
360
+ aggregationMethod = header[:aggregationMethod]
361
+ xff = header[:xFilesFactor]
362
+
363
+ lowerIntervalStart = timestamp - (timestamp % lower.spp)
364
+ lowerIntervalEnd = lowerIntervalStart + lower.spp
365
+ @fh.seek(higher.offset)
366
+ higherBaseInterval, higherBaseValue = @fh.read(POINT_SIZE).unpack(POINT_FMT)
367
+
368
+ if higherBaseInterval == 0
369
+ higherFirstOffset = higher.offset
370
+ else
371
+ timeDistance = lowerIntervalStart - higherBaseInterval
372
+ pointDistance = timeDistance / higher.spp
373
+ byteDistance = pointDistance * POINT_SIZE
374
+ higherFirstOffset = higher.offset + (byteDistance % higher.size)
375
+ end
376
+
377
+ higherPoints = lower.spp / higher.spp
378
+ higherSize = higherPoints * POINT_SIZE
379
+ relativeFirstOffset = higherFirstOffset - higher.offset
380
+ relativeLastOffset = (relativeFirstOffset + higherSize) % higher.size
381
+ higherLastOffset = relativeLastOffset + higher.offset
382
+ @fh.seek(higherFirstOffset)
383
+
384
+ if higherFirstOffset < higherLastOffset
385
+ # don't wrap the archive
386
+ seriesString = @fh.read(higherLastOffset - higherFirstOffset)
387
+ else
388
+ # wrap the archive
389
+ higherEnd = higher.offset + higher.size
390
+ seriesString = @fh.read(higherEnd - higherFirstOffset)
391
+ @fh.seek(higher.offset)
392
+ seriesString += @fh.read(higherLastOffset - higher.offset)
393
+ end
394
+
395
+ points = seriesString.length / POINT_SIZE
396
+ unpackedSeries = seriesString.unpack(POINT_FMT * points)
397
+
398
+ # construct a list of values
399
+ neighborValues = points.times.map{}
400
+ currentInterval = lowerIntervalStart
401
+ step = higher.spp
402
+ (0..unpackedSeries.length).step(2) do |i|
403
+ pointTime = unpackedSeries[i]
404
+ neighborValues[i/2] = unpackedSeries[i+1] if pointTime == currentInterval
405
+ currentInterval += step
406
+ end
407
+
408
+ knownValues = neighborValues.select { |v| !v.nil? }
409
+ return false if knownValues.empty?
410
+ if (knownValues.length / neighborValues.length).to_f < header[:xFilesFactor]
411
+ return false
412
+ end
413
+
414
+ # we have enough data to propagate a value
415
+ aggregateValue = aggregate(aggregationMethod, knownValues)
416
+ myPackedPoint = [lowerIntervalStart, aggregateValue].pack(POINT_FMT)
417
+ @fh.seek(lower.offset)
418
+ lowerBaseInterval, lowerBaseValue = @fh.read(POINT_SIZE).unpack(POINT_FMT)
419
+
420
+ if lowerBaseInterval == 0
421
+ # first propagated update to this lower archive
422
+ @fh.seek(lower.offset)
423
+ @fh.write(myPackedPoint)
424
+ else
425
+ timeDistance = lowerIntervalStart - lowerBaseInterval
426
+ pointDistance = timeDistance / lower.spp
427
+ byteDistance = pointDistance * POINT_SIZE
428
+ lowerOffset = lower.offset + (byteDistance % lower.size)
429
+ @fh.seek(lowerOffset)
430
+ @fh.write(myPackedPoint)
431
+ end
432
+ true
433
+ end
434
+
435
+ def aggregate(aggregationMethod, knownValues)
436
+ case aggregationMethod
437
+ when :average
438
+ (knownValues.inject(0){|sum, i| sum + i } / knownValues.length).to_f
439
+ when :sum
440
+ knownValues.inject(0){|sum, i| sum + i }
441
+ when :last
442
+ knownValues[-1]
443
+ when :max
444
+ v = knownValues[0]
445
+ knownValues[1..-1].each { |k| v = k if k > v }
446
+ v
447
+ when :min
448
+ v = knownValues[0]
449
+ knownValues[1..-1].each { |k| v = k if k < v }
450
+ v
451
+ else
452
+ raise InvalidAggregationMethod, "Unrecognized aggregation method #{aggregationMethod}"
453
+ end
454
+ end
455
+
456
+ end
@@ -0,0 +1,220 @@
1
+ class Whispr
2
+ class Archive
3
+ include Enumerable
4
+
5
+ # @return [Hash] the archive header
6
+ attr_reader :header
7
+ # @return [Fixnum] the start location in the whisper file of this Archive
8
+ attr_reader :offset
9
+ # @return [Fixnum] the number of points in this archive
10
+ attr_reader :points
11
+ # @return [Fixnum] the total size of this archive (points * POINT_SIZE)
12
+ attr_reader :size
13
+ # @return [Fixnum] number of seconds worth of data retained by this archive
14
+ attr_reader :retention
15
+ # @return [Fixnum] seconds per point
16
+ attr_reader :spp
17
+ # @return [Whispr} the Whisper that contains this Archive
18
+ attr_reader :whisper
19
+
20
+ def initialize(whisper, header)
21
+ @whisper = whisper
22
+ @header = header
23
+ @offset = @header[:offset]
24
+ @points = @header[:points]
25
+ @size = @header[:size]
26
+ @retention = @header[:retention]
27
+ @spp = @header[:secondsPerPoint]
28
+ @eoa = @size * @points + @offset
29
+ end
30
+
31
+ # Retrieve each point from the archive.
32
+ #
33
+ # If a block is provided each point is read directly from
34
+ # the whisper file one at a time and yielded. If a block
35
+ # is not provided, all points are read from the file and
36
+ # returned as an enum.
37
+ #
38
+ # Each point is represented as a three element Array. The first
39
+ # element is the index of the point. The second element is the
40
+ # timestamp of the point and the third element is the value of
41
+ # the point.
42
+ def each(&blk)
43
+ return slurp.to_enum unless block_given?
44
+ o_pos = @whisper.fh.pos
45
+ begin
46
+ @whisper.fh.pos = @offset
47
+ points.times {|i| yield(i, *next_point) }
48
+ ensure
49
+ @whisper.fh.pos = o_pos
50
+ end
51
+ end
52
+
53
+ # Has the end of the archive been reached?
54
+ def eoa?
55
+ @whisper.fh.pos >= @eoa
56
+ end
57
+
58
+ def to_enum
59
+ slurp.to_enum
60
+ end
61
+
62
+ # Retrieve the next point from the whisper file.
63
+ # @api private
64
+ def next_point
65
+ return nil if @whisper.fh.pos >= @eoa || @whisper.fh.pos < @offset
66
+ @whisper.fh.read(POINT_SIZE).unpack(POINT_FMT)
67
+ end
68
+
69
+ # Retrieve all points for this archive from the whisper file.
70
+ #
71
+ # Each point is represented as a three element Array. The first
72
+ # element is the index of the point. The second element is the
73
+ # timestamp of the point and the third element is the value of
74
+ # the point.
75
+ #
76
+ # @return [Array]
77
+ def slurp
78
+ o_pos = @whisper.fh.pos
79
+ @whisper.fh.pos = @offset
80
+ data = @whisper.fh.read(@size).unpack(POINT_FMT * @points)
81
+ @points.times.map { |i| [i, data.shift, data.shift] }
82
+ ensure
83
+ @whisper.fh.pos = o_pos
84
+ end
85
+
86
+ # Retrieve values for a time period from an archive within a whisper file
87
+ #
88
+ # The return value will be a two element Array. The first element will be
89
+ # a three element array containing the start time, end time and step. The
90
+ # second element will be a N element array containing each value at each
91
+ # step period.
92
+ #
93
+ # @see Whispr#fetch
94
+ def fetch(fromTime, untilTime)
95
+ fromInterval = (fromTime - (fromTime % spp)) + spp
96
+ untilInterval = (untilTime - (untilTime % spp)) + spp
97
+ o_pos = @whisper.fh.pos
98
+ begin
99
+ @whisper.fh.seek(offset)
100
+ baseInterval, baseValue = @whisper.fh.read(POINT_SIZE).unpack(POINT_FMT)
101
+ if baseInterval == 0
102
+ step = spp
103
+ points = (untilInterval - fromInterval) / step
104
+ timeInfo = [fromInterval, untilInterval, step]
105
+ return [timeInfo, points.times.map{}]
106
+ end
107
+
108
+ # Determine fromOffset
109
+ timeDistance = fromInterval - baseInterval
110
+ pointDistance = timeDistance / spp
111
+ byteDistance = pointDistance * POINT_SIZE
112
+ fromOffset = offset + (byteDistance % size)
113
+
114
+ # Determine untilOffset
115
+ timeDistance = untilInterval - baseInterval
116
+ pointDistance = timeDistance / spp
117
+ byteDistance = pointDistance * POINT_SIZE
118
+ untilOffset = offset + (byteDistance % size)
119
+
120
+ # Reall all the points in the interval
121
+ @whisper.fh.seek(fromOffset)
122
+ if fromOffset < untilOffset
123
+ # we don't wrap around the archive
124
+ series = @whisper.fh.read(untilOffset - fromOffset)
125
+ else
126
+ # we wrap around the archive, so we need two reads
127
+ archiveEnd = offset + size
128
+ series = @whisper.fh.read(archiveEnd - fromOffset)
129
+ @whisper.fh.seek(offset)
130
+ series += @whisper.fh.read(untilOffset - offset)
131
+ end
132
+
133
+ points = series.length / POINT_SIZE
134
+ series = series.unpack(POINT_FMT * points)
135
+ currentInterval = fromInterval
136
+ step = spp
137
+ valueList = points.times.map{}
138
+ (0..series.length).step(2) do |i|
139
+ pointTime = series[i]
140
+ if pointTime == currentInterval
141
+ pointValue = series[i+1]
142
+ valueList[i/2] = pointValue
143
+ end
144
+ currentInterval += step
145
+ end
146
+
147
+ timeInfo = [fromInterval, untilInterval, step]
148
+ ensure
149
+ @whisper.fh.pos = o_pos
150
+ end
151
+ [timeInfo, valueList]
152
+ end
153
+
154
+ def update_many(points)
155
+ step = spp
156
+ alignedPoints = points.map { |ts, v| [(ts - (ts % step)), v] }
157
+ # Create a packed string for each contiguous sequence of points
158
+ packedStrings = []
159
+ previousInterval = nil
160
+ currentString = ''
161
+ alignedPoints.each do |interval, value|
162
+ next if interval == previousInterval
163
+ if previousInterval.nil? || (interval == previousInterval + step)
164
+ currentString += [interval, value].pack(POINT_FMT)
165
+ else
166
+ numberOfPoints = currentString.length / POINT_SIZE
167
+ startInterval = previousInterval - (step * (numberOfPoints - 1))
168
+ packedStrings << [startInterval, currentString]
169
+ currentString = [interval, value].pack(POINT_FMT)
170
+ end
171
+ previousInterval = interval
172
+ end
173
+ if !currentString.empty?
174
+ numberOfPoints = currentString.length / POINT_SIZE
175
+ startInterval = previousInterval - (step * (numberOfPoints - 1))
176
+ packedStrings << [startInterval, currentString]
177
+ end
178
+
179
+ # Read base point and determine where our writes will start
180
+ @whisper.fh.seek(offset)
181
+ baseInterval, baseValue = @whisper.fh.read(POINT_SIZE).unpack(POINT_FMT)
182
+ baseInterval = packedStrings[0][0] if baseInterval == 0
183
+ packedStrings.each do |interval, packedString|
184
+ timeDistance = interval - baseInterval
185
+ pointDistance = timeDistance / step
186
+ byteDistance = pointDistance * POINT_SIZE
187
+ myOffset = offset + (byteDistance % size)
188
+ @whisper.fh.seek(myOffset)
189
+ archiveEnd = offset + size
190
+ bytesBeyond = (myOffset + packedString.length) - archiveEnd
191
+
192
+ if bytesBeyond > 0
193
+ @whisper.fh.write(packedString[0..-bytesBeyond])
194
+ if(@whisper.fh.pos != archiveEnd)
195
+ raise ArchiveBoundaryExceeded.new("archiveEnd=#{archiveEnd} pos=#{@whisper.fh.pos} bytesBeyond=#{bytesBeyond} len(packedString)=#{packedString.length}")
196
+ end
197
+ @whisper.fh.seek(offset)
198
+ @whisper.fh.write(packedString[-bytesBeyond..-1])
199
+ else
200
+ @whisper.fh.write(packedString)
201
+ end
202
+ end # interval, packedString|
203
+
204
+ # Now we propagate the updates to the lower-precision archives
205
+ higher = self
206
+ @whisper.archives.select{|a| a.spp > spp }.each do |lower|
207
+ lowerIntervals = alignedPoints.map{|p| p[0] - (p[0] % lower.spp) }
208
+ propagateFurther = false
209
+ lowerIntervals.uniq.each do |interval|
210
+ propagateFuther = @whisper.send(:propagate, interval, higher, lower)
211
+ end
212
+ break unless propagateFurther
213
+ higher = lower
214
+ end
215
+ end
216
+
217
+
218
+
219
+ end
220
+ end
@@ -0,0 +1,3 @@
1
+ class Whispr
2
+ VERSION = '0.0.1'
3
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: whispr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Caleb Crane
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-14 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: ''
15
+ email: whispr@simulacre.org
16
+ executables:
17
+ - whispr-create
18
+ - whispr-dump
19
+ - whispr-fetch
20
+ - whispr-info
21
+ - whispr-update
22
+ extensions: []
23
+ extra_rdoc_files: []
24
+ files:
25
+ - lib/whispr/archive.rb
26
+ - lib/whispr/version.rb
27
+ - lib/whispr.rb
28
+ - bin/whispr-create
29
+ - bin/whispr-dump
30
+ - bin/whispr-fetch
31
+ - bin/whispr-info
32
+ - bin/whispr-update
33
+ - README.md
34
+ homepage: http://github.com/simulacre/whispr
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.24
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: Read and write Graphite Whisper round-robin files
58
+ test_files: []
59
+ has_rdoc: