whispr 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,14 @@
1
+ # Whispr
2
+
3
+ Whispr is a port of the [Graphite](http://graphite.wikidot.com/) [Whisper](https://github.com/graphite-project/) library in Ruby.
4
+
5
+ It supports the basic create, update and read (fetch,dump) operations.
6
+
7
+
8
+ ## TODO
9
+
10
+ - unit tests
11
+ - whispr-resize
12
+ - whispr-merge
13
+ - whispr-set-aggregation-method
14
+ - rrd2whispr
data/bin/whispr-create ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "whispr"
4
+ require "optparse"
5
+
6
+ options = {:xff => 0.5, :overwrite => false, :aggregationMethod => :average}
7
+ opt_parser = OptionParser.new do |opts|
8
+ opts.on("--xFilesFactor Float", Float, "default 0.5") { |x| options[:xff] = x }
9
+ opts.on("--aggregationMethod String", String, "function to use when aggregating values #{Whispr::AGGR_TYPES[1..-1].map(&:to_s).inspect}") do |aggr|
10
+ aggr = aggr.intern
11
+ unless Whispr::AGGR_TYPES[1..-1].include?(aggr)
12
+ $stderr.puts "aggregationMethod must be one of: #{Whispr::AGGR_TYPES[1..-1]}"
13
+ exit 1
14
+ end
15
+ options[:aggregationMethod] = aggr
16
+ end
17
+ opts.on("--overwrite") {|o| options[:overwrite] = o }
18
+ opts.banner += " path timePerPoint:timeToStore [timePerPoint:timeToStore]*"
19
+ end
20
+ opt_parser.parse!
21
+
22
+ if ARGV.length < 2
23
+ $stderr.puts opt_parser
24
+ exit 1
25
+ end
26
+
27
+ path = ARGV.shift
28
+ unless File.exists?(File.dirname(path))
29
+ $stderr.puts "#{File.dirname(path)} does not exists"
30
+ exit 1
31
+ end
32
+ File.unlink if options[:overwrite] && File.exists?(path)
33
+
34
+ Whispr.create(path, ARGV.map{|a| Whispr.parse_retention_def(a) }, options)
data/bin/whispr-dump ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "whispr"
4
+ require "stringio"
5
+
6
+ if ARGV[0].nil? || ARGV[0] == "-"
7
+ data = StringIO.new(STDIN.read)
8
+ else
9
+ abort("data file #{ARGV[0]} does not exist") unless File.exist?(ARGV[0])
10
+ data = File.open(ARGV[0], "r")
11
+ end
12
+
13
+ whisper = Whispr.new(data)
14
+
15
+ puts "Meta data:"
16
+ puts " aggregation method: #{whisper.info[:aggregationMethod]}"
17
+ puts " max retention: #{whisper.info[:maxRetention]}"
18
+ puts " xFilesFactor: #{whisper.info[:xFilesFactor]}"
19
+
20
+ whisper.archives.each.with_index do |archive, i|
21
+ puts "\nArchive #{i} info:"
22
+ puts " offset #{archive.offset}"
23
+ puts " seconds per point #{archive.spp}"
24
+ puts " points #{archive.points}"
25
+ puts " retention #{archive.retention}"
26
+ puts " size #{archive.size}"
27
+ end
28
+
29
+ whisper.archives.each.with_index do |archive, i|
30
+ puts "\nArchive #{i} data: "
31
+ archive.to_enum.each do |point, timestamp, value|
32
+ puts sprintf("#{point}, #{timestamp}, %10.35g", value)
33
+ end
34
+ end
data/bin/whispr-fetch ADDED
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "whispr"
4
+ require "stringio"
5
+ require "optparse"
6
+ require "json"
7
+
8
+ options = {:from => Time.new - 86400, :until => Time.new}
9
+ OptionParser.new do |opts|
10
+ opts.on("--from Integer", "Unix epoch time of the beginning of your requested interval (default: 24 hours ago") do |f|
11
+ if ["n", "N"].include?(f[0])
12
+ f = Time.new + f[1..-1].to_i
13
+ end
14
+ options[:from] = Time.at(f.to_i)
15
+ end
16
+ opts.on("--until Integer", "Unix epoch time of the end of your requested interval (default: now)") do |u|
17
+ if ["n", "N"].include?(u[0])
18
+ u = Time.new + u[1..-1].to_i
19
+ end
20
+ options[:until] = Time.at(u.to_i)
21
+ end
22
+ opts.on("--pretty", "Show human-readable timestamps instead of unix times") { options[:pretty] = true }
23
+ opts.on("--json", "Output results in JSON form") { options[:json] = true }
24
+ end.parse!
25
+
26
+ if ARGV[0].nil? || ARGV[0] == "-"
27
+ data = StringIO.new(STDIN.read)
28
+ else
29
+ abort("data file #{ARGV[0]} does not exist") unless File.exist?(ARGV[0])
30
+ data = File.open(ARGV[0], "r")
31
+ end
32
+
33
+
34
+ (start_t, end_t, step), values = Whispr.new(data).fetch(options[:from], options[:until])
35
+ if options[:json]
36
+ puts JSON.dump({:start => start_t, :end => end_t, :step => step, :values => values })
37
+ else
38
+ t = start_t
39
+ values.each do |value|
40
+ time = options[:pretty] ? Time.at(t) : t
41
+ value = value ? sprintf("%f", value) : 'None'
42
+ puts "#{time}\t#{value}"
43
+ t += step
44
+ end
45
+ end
data/bin/whispr-info ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "whispr"
4
+ require "stringio"
5
+
6
+ if ARGV[0].nil? || ARGV[0] == "-"
7
+ data = StringIO.new(STDIN.read)
8
+ else
9
+ abort("data file #{ARGV[0]} does not exist") unless File.exist?(ARGV[0])
10
+ data = File.open(ARGV[0], "r")
11
+ end
12
+
13
+ info = Whispr.new(data).info
14
+ info[:fileSize] = data.size
15
+
16
+ unless (fields = Array(ARGV[1..-1])).empty?
17
+ fields.each do |field|
18
+ unless info.include?(field.to_sym)
19
+ puts "Unknown field '#{field}'. Valid fields are #{info.keys.join(", ")}"
20
+ exit 1
21
+ end
22
+ puts info[field]
23
+ end
24
+ exit 0
25
+ end
26
+
27
+ archives = info.delete(:archives)
28
+
29
+ info.each { |k,v| puts "#{k}: #{v}" }
30
+
31
+ archives.each_index do |i|
32
+ puts "\nArchive #{i}"
33
+ archives[i].each { |k,v| puts "#{k}: #{v}" }
34
+ end
data/bin/whispr-update ADDED
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "whispr"
4
+ require "stringio"
5
+
6
+ points = ARGV.select{|e| e.include?(":") }
7
+ if points.empty?
8
+ puts "#{File.basename($0)} path timestamp:value [timestamp:value]*"
9
+ exit -1
10
+ end
11
+
12
+ file = ARGV - points
13
+ if file.empty? || file == "-"
14
+ data = StringIO.new(STDIN.read, "r+")
15
+ else
16
+ abort("data file #{file[0]} does not exist") unless File.exist?(file[0])
17
+ data = File.open(file[0], "r+")
18
+ end
19
+
20
+ now = Time.now.to_i
21
+ points = points.map{|p| p.split(":") }.map{|t,v| [t == 'N' ? now : t.to_i, v.to_f] }
22
+
23
+ Whispr.new(data).update(*points)
data/lib/whispr.rb ADDED
@@ -0,0 +1,456 @@
1
+ require 'whispr/version'
2
+ require 'whispr/archive'
3
+ require 'stringio'
4
+
5
+ class Whispr
6
+ module Error; end
7
+ class WhisprError < StandardError
8
+ include Error
9
+ def self.exception(e)
10
+ return e if e.nil? || e == self
11
+ ne = new(e.to_s)
12
+ ne.set_backtrace e.backtrace if e.respond_to?(:backtrace)
13
+ ne
14
+ end
15
+ end
16
+ class CorruptWhisprFile < WhisprError; end
17
+ class InvalidTimeInterval < WhisprError; end
18
+ class TimestampNotCovered < WhisprError; end
19
+ class InvalidAggregationMethod < WhisprError; end
20
+ class ArchiveBoundaryExceeded < WhisprError; end
21
+ class ValueError < WhisprError; end
22
+ class InvalidConfiguration < WhisprError; end
23
+
24
+ LONG_FMT = "N"
25
+ METADATA_FMT = "#{LONG_FMT*2}g#{LONG_FMT}"
26
+ METADATA_SIZE = 16
27
+ ARCHIVE_INFO_FMT = LONG_FMT * 3
28
+ ARCHIVE_INFO_SIZE = 12
29
+ POINT_FMT = "#{LONG_FMT}G"
30
+ POINT_SIZE = 12
31
+ CHUNK_SIZE = 16384
32
+
33
+ AGGR_TYPES = [
34
+ :_,
35
+ :average,
36
+ :sum,
37
+ :last,
38
+ :max,
39
+ :min
40
+ ].freeze
41
+
42
+ class << self
43
+
44
+ def unitMultipliers
45
+ @unitMultipliers ||= {
46
+ 's' => 1,
47
+ 'm' => 60,
48
+ 'h' => 3600,
49
+ 'd' => 86400,
50
+ 'w' => 86400 * 7,
51
+ 'y' => 86400 * 365
52
+ }
53
+ end
54
+
55
+ def parse_retention_def(rdef)
56
+ raise ArgumentError.new("precision and points must be separated by a ':'") unless rdef && rdef.include?(":")
57
+ (precision, points) = rdef.strip.split(':')
58
+ if precision.to_i.to_s == precision
59
+ precision = precision.to_i * unitMultipliers['s']
60
+ else
61
+ _, precision, unit = precision.split(/([\d]+)/)
62
+ unit = 's' unless unit
63
+ raise ValueError.new("Invalid precision specification unit #{unit}") unless unitMultipliers[unit[0]]
64
+ precision = precision.to_i * unitMultipliers[unit[0]]
65
+ end
66
+
67
+ if points.to_i.to_s == points
68
+ points = points.to_i
69
+ else
70
+ _, points, unit = points.split(/([\d]+)/)
71
+ raise ValueError.new("Invalid retention specification unit #{unit}") unless unitMultipliers[unit[0]]
72
+ points = points.to_i * unitMultipliers[unit[0]] / precision
73
+ end
74
+
75
+ [precision, points]
76
+ end
77
+
78
+ # Create whipser file.
79
+ # @param [String] path
80
+ # @param [Array] archiveList each archive is an array with two elements: [secondsPerPoint,numberOfPoints]
81
+ # @param [Hash] opts
82
+ # @option opts [Float] :xff the fraction of data points in a propagation interval that must have known values for a propagation to occur
83
+ # @option opts [Symbol] :aggregationMethod the function to use when propogating data; must be one of AGGR_TYPES[1..-1]
84
+ # @option opts [Boolean] :overwrite (false)
85
+ # @raise [InvalidConfiguration] if the archiveList is inavlid, or if 'path' exists and :overwrite is not true
86
+ # @see Whsipr.validateArchiveList
87
+ def create(path, archiveList, opts = {})
88
+ opts = {:xff => 0.5, :aggregationMethod => :average, :sparse => false, :overwrite => false}.merge(opts)
89
+ unless AGGR_TYPES[1..-1].include?(opts[:aggregationMethod])
90
+ raise InvalidConfiguration.new("aggregationMethod must be one of #{AGGR_TYPES[1..-1]}")
91
+ end
92
+
93
+ validateArchiveList!(archiveList)
94
+ raise InvalidConfiguration.new("File #{path} already exists!") if File.exists?(path) && !opts[:overwrite]
95
+
96
+ # if file exists it will be truncated
97
+ File.open(path, "wb") do |fh|
98
+ fh.flock(File::LOCK_EX)
99
+ aggregationType = AGGR_TYPES.index(opts[:aggregationMethod])
100
+ oldest = archiveList.map{|spp, points| spp * points }.sort.last
101
+ packedMetadata = [aggregationType, oldest, opts[:xff], archiveList.length].pack(METADATA_FMT)
102
+ fh.write(packedMetadata)
103
+ headerSize = METADATA_SIZE + (ARCHIVE_INFO_SIZE * archiveList.length)
104
+ archiveOffsetPointer = headerSize
105
+ archiveList.each do |spp, points|
106
+ archiveInfo = [archiveOffsetPointer, spp, points].pack(ARCHIVE_INFO_FMT)
107
+ fh.write(archiveInfo)
108
+ archiveOffsetPointer += (points * POINT_SIZE)
109
+ end
110
+
111
+ if opts[:sparse]
112
+ fh.seek(archiveOffsetPointer - headerSize - 1)
113
+ fh.write("\0")
114
+ else
115
+ remaining = archiveOffsetPointer - headerSize
116
+ zeroes = "\x00" * CHUNK_SIZE
117
+ while remaining > CHUNK_SIZE
118
+ fh.write(zeroes)
119
+ remaining -= CHUNK_SIZE
120
+ end
121
+ fh.write(zeroes[0..remaining])
122
+ end
123
+
124
+ fh.flush
125
+ fh.fsync rescue nil
126
+ end
127
+
128
+ new(path)
129
+ end
130
+
131
+ # Is the provided archive list valid?
132
+ # @return [Boolean] true, false
133
+ def validArchiveList?(archiveList)
134
+ !(!!(validateArchiveList!(archiveList) rescue true))
135
+ end
136
+
137
+ # Validate an archive list without raising an exception
138
+ # @return [NilClass, InvalidConfiguration]
139
+ def validateArchiveList(archiveList)
140
+ validateArchiveList!(archiveList) rescue $!
141
+ end
142
+
143
+ # Validate an archive list
144
+ # An ArchiveList must:
145
+ # 1. Have at least one archive config. Example: [60, 86400]
146
+ # 2. No archive may be a duplicate of another.
147
+ # 3. Higher precision archives' precision must evenly divide all lower precision archives' precision.
148
+ # 4. Lower precision archives must cover larger time intervals than higher precision archives.
149
+ # 5. Each archive must have at least enough points to consolidate to the next archive
150
+ # @raise [InvalidConfiguration]
151
+ # @return [nil]
152
+ def validateArchiveList!(archiveList)
153
+ raise InvalidConfiguration.new("you must specify at least on archive configuration") if Array(archiveList).empty?
154
+ archiveList = archiveList.sort{|a,b| a[0] <=> b[0] }
155
+ archiveList[0..-2].each_with_index do |archive, i|
156
+ nextArchive = archiveList[i+1]
157
+ unless archive[0] < nextArchive[0]
158
+ raise InvalidConfiguration.new("A Whipser database may not be configured " +
159
+ "having two archives with the same precision " +
160
+ "(archive#{i}: #{archive}, archive#{i+1}: #{nextArchive})")
161
+ end
162
+ unless nextArchive[0] % archive[0] == 0
163
+ raise InvalidConfiguration.new("Higher precision archives' precision must " +
164
+ "evenly divide all lower precision archives' precision " +
165
+ "(archive#{i}: #{archive}, archive#{i+1}: #{nextArchive})")
166
+ end
167
+
168
+ retention = archive[0] * archive[1]
169
+ nextRetention = nextArchive[0] * nextArchive[1]
170
+ unless nextRetention > retention
171
+ raise InvalidConfiguration.new("Lower precision archives must cover larger " +
172
+ "time intervals than higher precision archives " +
173
+ "(archive#{i}: #{archive[1]}, archive#{i + 1}:, #{nextArchive[1]})")
174
+ end
175
+
176
+ archivePoints = archive[1]
177
+ pointsPerConsolidation = nextArchive[0] / archive[0]
178
+ unless archivePoints >= pointsPerConsolidation
179
+ raise InvalidConfiguration.new("Each archive must have at least enough points " +
180
+ "to consolidate to the next archive (archive#{i+1} consolidates #{pointsPerConsolidation} of " +
181
+ "archive#{i}'s points but it has only #{archivePoints} total points)")
182
+ end
183
+ end
184
+ nil
185
+ end
186
+ end
187
+
188
+ # @return [File, StringIO] file handle of the whisper file
189
+ attr_reader :fh
190
+
191
+ attr_accessor :auto_flush
192
+ alias :auto_flush? :auto_flush
193
+
194
+ def initialize(file, auto_flush = true)
195
+ @fh = file.is_a?(File) || file.is_a?(StringIO) ? file : File.open(file, 'r+')
196
+ @fh.binmode
197
+ @auto_flush = auto_flush
198
+ end
199
+
200
+ # @return [Hash]
201
+ def header
202
+ @header ||= read_header
203
+ end
204
+ alias :info :header
205
+
206
+ # @return [Array] Archives
207
+ # @see Whispr::Archive
208
+ def archives
209
+ @archives ||= info[:archives].map { |a| Archive.new(self, a) }
210
+ end
211
+
212
+
213
+ # Retrieve values from a whisper file within the given time window.
214
+ #
215
+ # The most appropriate archive within the whisper file will be chosen. The
216
+ # return value will be a two element Array. The first element will be a
217
+ # three element array containing the start time, end time and step. The
218
+ # second element will be a N element array containing each value at each
219
+ # step period.
220
+ #
221
+ # @see Archive#fetch
222
+ def fetch(fromTime, untilTime = Time.new)
223
+ fromTime = fromTime.to_i
224
+ untilTime = untilTime.to_i
225
+ now = Time.now.to_i
226
+ oldest = header[:maxRetention]
227
+ fromTime = oldest if fromTime < oldest
228
+ raise InvalidTimeInterval.new("Invalid time interval") unless fromTime < untilTime
229
+ untilTime = now if untilTime > now || untilTime < fromTime
230
+
231
+ diff = now - fromTime
232
+ archive = archives.find{|a| a.retention >= diff }
233
+ return archive.fetch(fromTime, untilTime)
234
+ end
235
+
236
+ # Update one or many points
237
+ # Each element of the points list should be a two dimensional Array where
238
+ # the first element is a timestamp and the second element is a value.
239
+ def update(*points)
240
+ return if points.empty?
241
+ # TODO lock the file
242
+ if points.length == 1
243
+ update_one(points[0][1], points[0][0])
244
+ else
245
+ update_many(points)
246
+ end
247
+ end
248
+
249
+ private
250
+
251
+
252
+ def read_header
253
+ o_pos = @fh.pos
254
+
255
+ begin
256
+ @fh.pos = 0
257
+ metadata = @fh.read(METADATA_SIZE)
258
+ aggr_type, max_retention, xff, arch_count = metadata.unpack(METADATA_FMT)
259
+ archives = arch_count.times.map do |i|
260
+ arch_info = @fh.read(ARCHIVE_INFO_SIZE)
261
+ offset, s_per_pnt, points = arch_info.unpack(ARCHIVE_INFO_FMT)
262
+ { :retention => s_per_pnt * points,
263
+ :secondsPerPoint => s_per_pnt,
264
+ :points => points,
265
+ :size => points * POINT_SIZE,
266
+ :offset => offset
267
+ }
268
+ end
269
+ rescue => e
270
+ raise CorruptWhisprFile.exception(e)
271
+ ensure
272
+ @fh.pos = o_pos
273
+ end
274
+
275
+ { :maxRetention => max_retention,
276
+ :xFilesFactor => xff,
277
+ :aggregationMethod => AGGR_TYPES[aggr_type],
278
+ :archives => archives
279
+ }
280
+ end
281
+
282
+ def update_one(value, timestamp = nil)
283
+ now = Time.new.to_i
284
+ timestamp = now if timestamp.nil?
285
+ diff = now - timestamp
286
+ if !(diff < header[:maxRetention] && diff >= 0)
287
+ raise TimestampNotCovered, "Timestamp (#{timestamp}) not covered by any archives in this database"
288
+ end
289
+
290
+ aidx = (0 ... archives.length).find { |i| archives[i].retention > diff }
291
+ archive = archives[aidx]
292
+ lowerArchives = archives[aidx + 1 .. - 1]
293
+
294
+ myInterval = timestamp - (timestamp % archive.spp)
295
+ myPackedPoint = [myInterval, value].pack(POINT_FMT)
296
+ @fh.seek(archive.offset)
297
+ baseInterval, baseValue = @fh.read(POINT_SIZE).unpack(POINT_FMT)
298
+
299
+ if baseInterval == 0
300
+ # this file's first update
301
+ @fh.seek(archive.offset)
302
+ @fh.write(myPackedPoint)
303
+ baseInterval, baseValue = myInterval, value
304
+ else
305
+ timeDistance = myInterval - baseInterval
306
+ pointDistance = timeDistance / archive.spp
307
+ byteDistance = pointDistance * POINT_SIZE
308
+ myOffset = archive.offset + (byteDistance % archive.size)
309
+ @fh.seek(myOffset)
310
+ @fh.write(myPackedPoint)
311
+ end
312
+
313
+ higher = archive
314
+ lowerArchives.each do |lower|
315
+ break unless propagate(myInterval, higher, lower)
316
+ higher = lower
317
+ end
318
+
319
+ @fh.flush if auto_flush?
320
+ end
321
+
322
+ def update_many(points)
323
+ # order points by timestamp, newest first
324
+ points = points.map{|ts, v| [ts.to_i, v.to_f ] }.sort {|b,a| a[0] <=> b[0] }
325
+ now = Time.new.to_i
326
+ archives = self.archives.to_enum
327
+ currentArchive = archives.next
328
+ currentPoints = []
329
+ points.each do |point|
330
+ age = now - point[0]
331
+ while currentArchive.retention < age
332
+ unless currentPoints.empty?
333
+ currentPoints.reverse! # put points in chronological order
334
+ currentArchive.update_many(currentPoints)
335
+ currentPoints = []
336
+ end
337
+ begin
338
+ currentArchive = archives.next
339
+ rescue StopIteration
340
+ currentArchive = nil
341
+ break
342
+ end
343
+ end
344
+ # drop remaining points that don't fit in the database
345
+ break unless currentArchive
346
+
347
+ currentPoints << point
348
+ end
349
+
350
+ if currentArchive && !currentPoints.empty?
351
+ # don't forget to commit after we've checked all the archives
352
+ currentPoints.reverse!
353
+ currentArchive.update_many(currentPoints)
354
+ end
355
+
356
+ @fh.flush if auto_flush?
357
+ end
358
+
359
+ def propagate(timestamp, higher, lower)
360
+ aggregationMethod = header[:aggregationMethod]
361
+ xff = header[:xFilesFactor]
362
+
363
+ lowerIntervalStart = timestamp - (timestamp % lower.spp)
364
+ lowerIntervalEnd = lowerIntervalStart + lower.spp
365
+ @fh.seek(higher.offset)
366
+ higherBaseInterval, higherBaseValue = @fh.read(POINT_SIZE).unpack(POINT_FMT)
367
+
368
+ if higherBaseInterval == 0
369
+ higherFirstOffset = higher.offset
370
+ else
371
+ timeDistance = lowerIntervalStart - higherBaseInterval
372
+ pointDistance = timeDistance / higher.spp
373
+ byteDistance = pointDistance * POINT_SIZE
374
+ higherFirstOffset = higher.offset + (byteDistance % higher.size)
375
+ end
376
+
377
+ higherPoints = lower.spp / higher.spp
378
+ higherSize = higherPoints * POINT_SIZE
379
+ relativeFirstOffset = higherFirstOffset - higher.offset
380
+ relativeLastOffset = (relativeFirstOffset + higherSize) % higher.size
381
+ higherLastOffset = relativeLastOffset + higher.offset
382
+ @fh.seek(higherFirstOffset)
383
+
384
+ if higherFirstOffset < higherLastOffset
385
+ # don't wrap the archive
386
+ seriesString = @fh.read(higherLastOffset - higherFirstOffset)
387
+ else
388
+ # wrap the archive
389
+ higherEnd = higher.offset + higher.size
390
+ seriesString = @fh.read(higherEnd - higherFirstOffset)
391
+ @fh.seek(higher.offset)
392
+ seriesString += @fh.read(higherLastOffset - higher.offset)
393
+ end
394
+
395
+ points = seriesString.length / POINT_SIZE
396
+ unpackedSeries = seriesString.unpack(POINT_FMT * points)
397
+
398
+ # construct a list of values
399
+ neighborValues = points.times.map{}
400
+ currentInterval = lowerIntervalStart
401
+ step = higher.spp
402
+ (0..unpackedSeries.length).step(2) do |i|
403
+ pointTime = unpackedSeries[i]
404
+ neighborValues[i/2] = unpackedSeries[i+1] if pointTime == currentInterval
405
+ currentInterval += step
406
+ end
407
+
408
+ knownValues = neighborValues.select { |v| !v.nil? }
409
+ return false if knownValues.empty?
410
+ if (knownValues.length / neighborValues.length).to_f < header[:xFilesFactor]
411
+ return false
412
+ end
413
+
414
+ # we have enough data to propagate a value
415
+ aggregateValue = aggregate(aggregationMethod, knownValues)
416
+ myPackedPoint = [lowerIntervalStart, aggregateValue].pack(POINT_FMT)
417
+ @fh.seek(lower.offset)
418
+ lowerBaseInterval, lowerBaseValue = @fh.read(POINT_SIZE).unpack(POINT_FMT)
419
+
420
+ if lowerBaseInterval == 0
421
+ # first propagated update to this lower archive
422
+ @fh.seek(lower.offset)
423
+ @fh.write(myPackedPoint)
424
+ else
425
+ timeDistance = lowerIntervalStart - lowerBaseInterval
426
+ pointDistance = timeDistance / lower.spp
427
+ byteDistance = pointDistance * POINT_SIZE
428
+ lowerOffset = lower.offset + (byteDistance % lower.size)
429
+ @fh.seek(lowerOffset)
430
+ @fh.write(myPackedPoint)
431
+ end
432
+ true
433
+ end
434
+
435
+ def aggregate(aggregationMethod, knownValues)
436
+ case aggregationMethod
437
+ when :average
438
+ (knownValues.inject(0){|sum, i| sum + i } / knownValues.length).to_f
439
+ when :sum
440
+ knownValues.inject(0){|sum, i| sum + i }
441
+ when :last
442
+ knownValues[-1]
443
+ when :max
444
+ v = knownValues[0]
445
+ knownValues[1..-1].each { |k| v = k if k > v }
446
+ v
447
+ when :min
448
+ v = knownValues[0]
449
+ knownValues[1..-1].each { |k| v = k if k < v }
450
+ v
451
+ else
452
+ raise InvalidAggregationMethod, "Unrecognized aggregation method #{aggregationMethod}"
453
+ end
454
+ end
455
+
456
+ end
@@ -0,0 +1,220 @@
1
+ class Whispr
2
+ class Archive
3
+ include Enumerable
4
+
5
+ # @return [Hash] the archive header
6
+ attr_reader :header
7
+ # @return [Fixnum] the start location in the whisper file of this Archive
8
+ attr_reader :offset
9
+ # @return [Fixnum] the number of points in this archive
10
+ attr_reader :points
11
+ # @return [Fixnum] the total size of this archive (points * POINT_SIZE)
12
+ attr_reader :size
13
+ # @return [Fixnum] number of seconds worth of data retained by this archive
14
+ attr_reader :retention
15
+ # @return [Fixnum] seconds per point
16
+ attr_reader :spp
17
+ # @return [Whispr} the Whisper that contains this Archive
18
+ attr_reader :whisper
19
+
20
+ def initialize(whisper, header)
21
+ @whisper = whisper
22
+ @header = header
23
+ @offset = @header[:offset]
24
+ @points = @header[:points]
25
+ @size = @header[:size]
26
+ @retention = @header[:retention]
27
+ @spp = @header[:secondsPerPoint]
28
+ @eoa = @size * @points + @offset
29
+ end
30
+
31
+ # Retrieve each point from the archive.
32
+ #
33
+ # If a block is provided each point is read directly from
34
+ # the whisper file one at a time and yielded. If a block
35
+ # is not provided, all points are read from the file and
36
+ # returned as an enum.
37
+ #
38
+ # Each point is represented as a three element Array. The first
39
+ # element is the index of the point. The second element is the
40
+ # timestamp of the point and the third element is the value of
41
+ # the point.
42
+ def each(&blk)
43
+ return slurp.to_enum unless block_given?
44
+ o_pos = @whisper.fh.pos
45
+ begin
46
+ @whisper.fh.pos = @offset
47
+ points.times {|i| yield(i, *next_point) }
48
+ ensure
49
+ @whisper.fh.pos = o_pos
50
+ end
51
+ end
52
+
53
+ # Has the end of the archive been reached?
54
+ def eoa?
55
+ @whisper.fh.pos >= @eoa
56
+ end
57
+
58
+ def to_enum
59
+ slurp.to_enum
60
+ end
61
+
62
+ # Retrieve the next point from the whisper file.
63
+ # @api private
64
+ def next_point
65
+ return nil if @whisper.fh.pos >= @eoa || @whisper.fh.pos < @offset
66
+ @whisper.fh.read(POINT_SIZE).unpack(POINT_FMT)
67
+ end
68
+
69
+ # Retrieve all points for this archive from the whisper file.
70
+ #
71
+ # Each point is represented as a three element Array. The first
72
+ # element is the index of the point. The second element is the
73
+ # timestamp of the point and the third element is the value of
74
+ # the point.
75
+ #
76
+ # @return [Array]
77
+ def slurp
78
+ o_pos = @whisper.fh.pos
79
+ @whisper.fh.pos = @offset
80
+ data = @whisper.fh.read(@size).unpack(POINT_FMT * @points)
81
+ @points.times.map { |i| [i, data.shift, data.shift] }
82
+ ensure
83
+ @whisper.fh.pos = o_pos
84
+ end
85
+
86
+ # Retrieve values for a time period from an archive within a whisper file
87
+ #
88
+ # The return value will be a two element Array. The first element will be
89
+ # a three element array containing the start time, end time and step. The
90
+ # second element will be a N element array containing each value at each
91
+ # step period.
92
+ #
93
+ # @see Whispr#fetch
94
+ def fetch(fromTime, untilTime)
95
+ fromInterval = (fromTime - (fromTime % spp)) + spp
96
+ untilInterval = (untilTime - (untilTime % spp)) + spp
97
+ o_pos = @whisper.fh.pos
98
+ begin
99
+ @whisper.fh.seek(offset)
100
+ baseInterval, baseValue = @whisper.fh.read(POINT_SIZE).unpack(POINT_FMT)
101
+ if baseInterval == 0
102
+ step = spp
103
+ points = (untilInterval - fromInterval) / step
104
+ timeInfo = [fromInterval, untilInterval, step]
105
+ return [timeInfo, points.times.map{}]
106
+ end
107
+
108
+ # Determine fromOffset
109
+ timeDistance = fromInterval - baseInterval
110
+ pointDistance = timeDistance / spp
111
+ byteDistance = pointDistance * POINT_SIZE
112
+ fromOffset = offset + (byteDistance % size)
113
+
114
+ # Determine untilOffset
115
+ timeDistance = untilInterval - baseInterval
116
+ pointDistance = timeDistance / spp
117
+ byteDistance = pointDistance * POINT_SIZE
118
+ untilOffset = offset + (byteDistance % size)
119
+
120
+ # Reall all the points in the interval
121
+ @whisper.fh.seek(fromOffset)
122
+ if fromOffset < untilOffset
123
+ # we don't wrap around the archive
124
+ series = @whisper.fh.read(untilOffset - fromOffset)
125
+ else
126
+ # we wrap around the archive, so we need two reads
127
+ archiveEnd = offset + size
128
+ series = @whisper.fh.read(archiveEnd - fromOffset)
129
+ @whisper.fh.seek(offset)
130
+ series += @whisper.fh.read(untilOffset - offset)
131
+ end
132
+
133
+ points = series.length / POINT_SIZE
134
+ series = series.unpack(POINT_FMT * points)
135
+ currentInterval = fromInterval
136
+ step = spp
137
+ valueList = points.times.map{}
138
+ (0..series.length).step(2) do |i|
139
+ pointTime = series[i]
140
+ if pointTime == currentInterval
141
+ pointValue = series[i+1]
142
+ valueList[i/2] = pointValue
143
+ end
144
+ currentInterval += step
145
+ end
146
+
147
+ timeInfo = [fromInterval, untilInterval, step]
148
+ ensure
149
+ @whisper.fh.pos = o_pos
150
+ end
151
+ [timeInfo, valueList]
152
+ end
153
+
154
+ def update_many(points)
155
+ step = spp
156
+ alignedPoints = points.map { |ts, v| [(ts - (ts % step)), v] }
157
+ # Create a packed string for each contiguous sequence of points
158
+ packedStrings = []
159
+ previousInterval = nil
160
+ currentString = ''
161
+ alignedPoints.each do |interval, value|
162
+ next if interval == previousInterval
163
+ if previousInterval.nil? || (interval == previousInterval + step)
164
+ currentString += [interval, value].pack(POINT_FMT)
165
+ else
166
+ numberOfPoints = currentString.length / POINT_SIZE
167
+ startInterval = previousInterval - (step * (numberOfPoints - 1))
168
+ packedStrings << [startInterval, currentString]
169
+ currentString = [interval, value].pack(POINT_FMT)
170
+ end
171
+ previousInterval = interval
172
+ end
173
+ if !currentString.empty?
174
+ numberOfPoints = currentString.length / POINT_SIZE
175
+ startInterval = previousInterval - (step * (numberOfPoints - 1))
176
+ packedStrings << [startInterval, currentString]
177
+ end
178
+
179
+ # Read base point and determine where our writes will start
180
+ @whisper.fh.seek(offset)
181
+ baseInterval, baseValue = @whisper.fh.read(POINT_SIZE).unpack(POINT_FMT)
182
+ baseInterval = packedStrings[0][0] if baseInterval == 0
183
+ packedStrings.each do |interval, packedString|
184
+ timeDistance = interval - baseInterval
185
+ pointDistance = timeDistance / step
186
+ byteDistance = pointDistance * POINT_SIZE
187
+ myOffset = offset + (byteDistance % size)
188
+ @whisper.fh.seek(myOffset)
189
+ archiveEnd = offset + size
190
+ bytesBeyond = (myOffset + packedString.length) - archiveEnd
191
+
192
+ if bytesBeyond > 0
193
+ @whisper.fh.write(packedString[0..-bytesBeyond])
194
+ if(@whisper.fh.pos != archiveEnd)
195
+ raise ArchiveBoundaryExceeded.new("archiveEnd=#{archiveEnd} pos=#{@whisper.fh.pos} bytesBeyond=#{bytesBeyond} len(packedString)=#{packedString.length}")
196
+ end
197
+ @whisper.fh.seek(offset)
198
+ @whisper.fh.write(packedString[-bytesBeyond..-1])
199
+ else
200
+ @whisper.fh.write(packedString)
201
+ end
202
+ end # interval, packedString|
203
+
204
+ # Now we propagate the updates to the lower-precision archives
205
+ higher = self
206
+ @whisper.archives.select{|a| a.spp > spp }.each do |lower|
207
+ lowerIntervals = alignedPoints.map{|p| p[0] - (p[0] % lower.spp) }
208
+ propagateFurther = false
209
+ lowerIntervals.uniq.each do |interval|
210
+ propagateFuther = @whisper.send(:propagate, interval, higher, lower)
211
+ end
212
+ break unless propagateFurther
213
+ higher = lower
214
+ end
215
+ end
216
+
217
+
218
+
219
+ end
220
+ end
@@ -0,0 +1,3 @@
1
+ class Whispr
2
+ VERSION = '0.0.1'
3
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: whispr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Caleb Crane
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-14 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: ''
15
+ email: whispr@simulacre.org
16
+ executables:
17
+ - whispr-create
18
+ - whispr-dump
19
+ - whispr-fetch
20
+ - whispr-info
21
+ - whispr-update
22
+ extensions: []
23
+ extra_rdoc_files: []
24
+ files:
25
+ - lib/whispr/archive.rb
26
+ - lib/whispr/version.rb
27
+ - lib/whispr.rb
28
+ - bin/whispr-create
29
+ - bin/whispr-dump
30
+ - bin/whispr-fetch
31
+ - bin/whispr-info
32
+ - bin/whispr-update
33
+ - README.md
34
+ homepage: http://github.com/simulacre/whispr
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 1.8.24
55
+ signing_key:
56
+ specification_version: 3
57
+ summary: Read and write Graphite Whisper round-robin files
58
+ test_files: []
59
+ has_rdoc: