milemarker 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1cc6c3aa16cc1d24991354663de1ceea5859d82a025e83d2683b9fa3b49d5f9e
4
+ data.tar.gz: 924edbc92d18fd924d4934f188a88e9c36ed8bbbae443a7815462b4d40388fe0
5
+ SHA512:
6
+ metadata.gz: 7841d99693517311d47206ca49890278386643130dd934b6ca6f93fa0b0032c5e6f8aaaf038f8c0336ff2e967744d0e86a86c7dcb9b34e8030cdcecaf97a5773
7
+ data.tar.gz: 033d5566f4d893668c03f358f40a8ee2d0bd26610010e05ca11871225b94c2288f3bd252c1e9fc05f75034f200ba43d28520be5eba5d715e6f63766f8b19e765
@@ -0,0 +1,16 @@
1
+ name: Ruby
2
+
3
+ on: [push,pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ steps:
9
+ - uses: actions/checkout@v2
10
+ - name: Set up Ruby
11
+ uses: ruby/setup-ruby@v1
12
+ with:
13
+ ruby-version: 2.6.6
14
+ bundler-cache: true
15
+ - name: Run the default task
16
+ run: bundle exec rake
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,13 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.7
3
+
4
+ Style/StringLiterals:
5
+ Enabled: false
6
+ EnforcedStyle: double_quotes
7
+
8
+ Style/StringLiteralsInInterpolation:
9
+ Enabled: false
10
+ EnforcedStyle: double_quotes
11
+
12
+ Layout/LineLength:
13
+ Max: 120
data/CHANGELOG.md ADDED
@@ -0,0 +1,4 @@
1
+ ## [1.0.0] - 2021-11-29
2
+
3
+ First public release
4
+
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in milemarker.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Bill Dueber
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,209 @@
1
+ # Milemarker -- track (and probably log) progress in batch jobs
2
+
3
+ Never again write code of the
4
+ form `log.info "Finished 1_000 in #{secs} seconds at a rate of #{total.to_f / secs}"`
5
+ .
6
+
7
+ ## Usage
8
+
9
+ ```ruby
10
+
11
+ require 'milemarker'
12
+ require 'logger'
13
+ input_file = "records.ndj"
14
+
15
+ # Create a new milemarker. Default batch_size is 1_000
16
+ milemarker = Milemarker.new(name: "Load #{input_file}", batch_size: 1_000_000)
17
+ logger = Logger.new(STDERR)
18
+
19
+ milemarker.logger = logger
20
+
21
+ File.open(input_file).each do |line|
22
+ do_whatever_needs_doing(line)
23
+ milemarker.increment_and_log_batch_line
24
+ end
25
+ milemarker.log_final_line # if logging is set up
26
+
27
+ # Identical to the above, but do the logging "by hand"
28
+ File.open(input_file).each do |line|
29
+ do_whatever_needs_doing(line)
30
+ milemarker.increment_and_on_batch { logger.info milemarker.batch_line }
31
+ end
32
+ logger.info milemarker.final_line
33
+
34
+ # Sample output
35
+ # ...
36
+ # I, [2021-11-02T01:51:06.959137 #11710] INFO -- : load records.ndj 8_000_000. This batch 2_000_000 in 26.2s (76_469 r/s). Overall 72_705 r/s.
37
+ # I, [2021-11-02T01:51:36.992831 #11710] INFO -- : load records.ndj 10_000_000. This batch 2_000_000 in 30.0s (66_591 r/s). Overall 71_394 r/s.
38
+ # ...
39
+ # I, [2021-11-02T02:01:56.702196 #11710] INFO -- : load records.ndj FINISHED. 27_138_118 total records in 00h 12m 39s. Overall 35_718 r/s.
40
+
41
+ ```
42
+
43
+ ## Basic usage
44
+
45
+ Most programs will probably use `milemarker` is via
46
+ `#increment_and_log_batch_line`
47
+ (or its counterpart `#increment_and_on_batch {|milemarker| ... }` ). As
48
+ the name suggests, this will:
49
+
50
+ * increment the batch counter
51
+ * If the batch counter >= the batch size:
52
+ * run the provided block (or write the logline)
53
+ * reset count/time/etc for the next batch
54
+
55
+ Some examples:
56
+
57
+ ```ruby
58
+
59
+ # Logging, as above
60
+ milemarker = Milemarker.new(batch_size: 1000, name: 'Load myfile')
61
+ milemarker.increment_and_on_batch { logger.info milemarker.batch_line }
62
+
63
+ # Alert when things seem to to take too long
64
+
65
+ milemarker.increment_and_on_batch do |milemarker|
66
+ secs = milemarker.last_batch_seconds
67
+ if secs > way_too_long
68
+ logger.error "Whoa: #{secs} is too long for a batch of #{milemarker.batch_size}"
69
+ end
70
+ end
71
+
72
+ # #on_batch and #increment_and_on_batch can be used to do real (i.e.,
73
+ # non-logging) work after every `batch` calls, too
74
+ queue = []
75
+ my_stuff.each do |doc|
76
+ queue << do_something_to(doc)
77
+ milemarker.increment_and_on_batch do |milemarker|
78
+ write_to_datastore(queue)
79
+ queue = []
80
+ logger.info milemarker.batch_line
81
+ end
82
+ end
83
+ ```
84
+
85
+ `#incr` and `#on_batch(&blk)` are also available separately if you need to be
86
+ more explicit and less atomic.
87
+
88
+ All the components that make up a batch_line (e.g., the records/second as
89
+ a nice string) are available to roll your own batch line. See the API
90
+ documentation for details.
91
+
92
+ ### Incorporating a logger into milemarker
93
+
94
+ For standard logging cases, you can also pass in a logger, or let milemarker
95
+ create one for its own use based on an IO-like object you provide
96
+
97
+ ```ruby
98
+ logger = Logger.new(STDERR)
99
+ milemarker = Milemarker.new(name: 'my_process', batch_size: 10_000, logger: logger)
100
+
101
+ # same thing
102
+ milemarker = Milemarker.new(name: 'my_process', batch_size: 10_000)
103
+ milemarker.logger = logger
104
+
105
+ # same thing again
106
+ milemarker = Milemarker.new(name: 'my_process', batch_size: 10_000)
107
+ milemarker.create_logger!(STDERR)
108
+
109
+ File.open(input_file).each do |line|
110
+ do_whatever_needs_doing(line)
111
+ milemarker.increment_and_log_batch_line
112
+ end
113
+
114
+ milemarker.log_final_line
115
+
116
+ # All the logging methods take an optional :level argument
117
+ milemarker.log_final_line(level: :debug)
118
+
119
+ ```
120
+
121
+ ### Structured logging with Milemarker::Structured
122
+
123
+ `Milemarker::Structured` will return hashes for `#batch_line` and `#final_line`
124
+ (aliased to `#batch_data` and `#final_data`, respectively) and pass those
125
+ hashes along to whatever logger you provide. `#create_logger!` for this
126
+ subclass will create a logger that provides json lines instead of text, too.
127
+
128
+ Presumably, if you pass in your own logger you'll use something like
129
+ [semantic_logger](https://github.com/reidmorrison/semantic_logger)
130
+ or [ougai](https://github.com/tilfin/ougai).
131
+
132
+ ```ruby
133
+ milemarker = Milemarker::Structured.new(name: 'my_process', batch_size: 10_000)
134
+ milemarker.create_logger!(STDERR)
135
+
136
+ File.open(input_file).each do |line|
137
+ do_whatever_needs_doing(line)
138
+ milemarker.increment_and_log_batch_line
139
+ end
140
+
141
+ # Usually one line; broken up for readability
142
+ # {"name":"my_process","batch_count":10_000,"batch_seconds":97.502088,
143
+ # "batch_rate":1.035875252230496,"total_count":100,"total_seconds":97.502094,
144
+ # "total_rate":1.0358751884856956,"level":"INFO","time":"2021-11-06 17:32:21 -0400"}
145
+
146
+ ```
147
+
148
+ ## Threadsafety
149
+
150
+ A call to `milemaker.threadsafify!` will wrap `increment_and_on_batch` (and
151
+ `increment_and_log_batch_line`) to be a threadsafe atomic operation at the
152
+ cost of some performance.
153
+
154
+ ```
155
+ milemarker.threadsafify!
156
+
157
+ ```
158
+
159
+ ## Turning off logging
160
+
161
+ If the logger is set to `nil`, no logging will occur.
162
+
163
+ ```ruby
164
+ # Turn off logging
165
+
166
+ milemarker.logger = nil
167
+ ```
168
+
169
+ You could also just configure your logger to ignore stuff
170
+
171
+ ```ruby
172
+
173
+ milemarker.logger.level = :error
174
+
175
+ ```
176
+
177
+ ## Accuracy
178
+
179
+ Note that `milemarker` isn't designed for real benchmarking. The assumption is
180
+ that whatever work your code is actually doing will drown out any
181
+ inefficiencies in the `milemarker` code, and milemarker numbers can be used to suss out
182
+ where weird things are happening.
183
+
184
+ ## Installation
185
+
186
+ Add this line to your application's Gemfile:
187
+
188
+ ```ruby
189
+ gem 'milemarker'
190
+ ```
191
+
192
+ And then execute:
193
+
194
+ $ bundle install
195
+
196
+ Or install it yourself as:
197
+
198
+ $ gem install milemarker
199
+
200
+
201
+ ## Contributing
202
+
203
+ Bug reports and pull requests are welcome on GitHub
204
+ at https://github.com/billdueber/milemarker.
205
+
206
+ ## License
207
+
208
+ The gem is available as open source under the terms of
209
+ the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec]
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "waypoint"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Milemarker
4
+ # Milemarker for structured logging
5
+ # * #create_logger! creates a logger that spits out JSON lines instead of human-centered strings
6
+ # * #batch_line and #final_line return hashes of count/time/rate data
7
+ # *...and are aliased to #batch_data and #final_data
8
+ #
9
+ # Milemarker::Structured should be a drop-in replacement for Milemarker, with the above differences
10
+ # and of course the caveat that if you provide your own logger it should expect to deal with
11
+ # the hashes coming from #batch_data and #final_data
12
+ class Structured < Milemarker
13
+ # Create a logger that spits out JSON strings instead of human-oriented strings'
14
+ # In addition to whatever message is passed, will always also include
15
+ # { level: severity, time: datetime }
16
+ #
17
+ # The logger will try to deal intelligently with different types of arguments
18
+ # * a Hash will just be passed
19
+ # * a String;s return json will show up in the hash under the key 'msg'
20
+ # * an Exception's return json will have the error's message, class, the first bit of the backtrace, and hostname
21
+ # * Anything else will be treated like a hash if it responds to #to_h;
22
+ # otherwise use msg.inspect as a message string
23
+ def create_logger!(*args, **kwargs)
24
+ super
25
+ @logger.formatter = proc do |severity, datetime, _progname, msg|
26
+ case msg
27
+ when Hash
28
+ msg
29
+ when String
30
+ { msg: msg }
31
+ when Exception
32
+ exception_message_hash(msg)
33
+ else
34
+ other_message_hash(msg)
35
+ end.merge({ level: severity, time: datetime }).to_json
36
+ end
37
+ self
38
+ end
39
+
40
+ # @return [Hash] hash with information about the last batch
41
+ def batch_line
42
+ {
43
+ name: name,
44
+ batch_count: last_batch_size,
45
+ batch_seconds: last_batch_seconds,
46
+ batch_rate: batch_rate,
47
+ total_count: count,
48
+ total_seconds: total_seconds_so_far,
49
+ total_rate: total_rate
50
+ }
51
+ end
52
+
53
+ alias batch_data batch_line
54
+
55
+ # @return [Hash] hash with information about the last batch
56
+ def final_line
57
+ {
58
+ name: name,
59
+ final_batch_size: final_batch_size,
60
+ total_count: count,
61
+ total_seconds: total_seconds_so_far,
62
+ total_rate: total_rate
63
+ }
64
+ end
65
+
66
+ alias final_data final_line
67
+
68
+ def exception_message_hash(msg)
69
+ { msg: msg.message, error: msg.class, at: msg.backtrace&.first, hostname: Socket.gethostname }
70
+ end
71
+
72
+ def other_message_hash(msg)
73
+ if msg.respond_to? :to_h
74
+ msg.to_h
75
+ else
76
+ { msg: msg.inspect }
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Milemarker
4
+ VERSION = "1.0.0"
5
+ end
data/lib/milemarker.rb ADDED
@@ -0,0 +1,248 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "ppnum"
4
+ require 'logger'
5
+ require 'socket'
6
+ require 'json'
7
+ require 'milemarker/structured'
8
+
9
+ # milemarker class, to keep track of progress over time for long-running
10
+ # iterating processes
11
+ #
12
+ # @author Bill Dueber <bill@dueber.com>
13
+ class Milemarker
14
+ # @return [String] optional "name" of this milemarker, for logging purposes
15
+ attr_accessor :name
16
+
17
+ # @return [Integer] batch size for computing `on_batch` calls
18
+ attr_accessor :batch_size
19
+
20
+ # @return [Logger, #info] logging object for automatic logging methods
21
+ attr_accessor :logger
22
+
23
+ # @return [Integer] which batch number (total increment / batch_size)
24
+ attr_reader :batch_number
25
+
26
+ # @return [Integer] number of second to process the last batch
27
+ attr_reader :last_batch_seconds
28
+
29
+ # @return [Integer] number of records (really, number of increments) in the last batch
30
+ attr_reader :last_batch_size
31
+
32
+ # @return [Time] Time the full process started
33
+ attr_reader :start_time
34
+
35
+ # @return [Time] Time the last batch started processing
36
+ attr_reader :batch_start_time
37
+
38
+ # @return [Time] Time the last batch ended processing
39
+ attr_reader :batch_end_time
40
+
41
+ # @return [Integer] Total records (really, increments) for the full run
42
+ attr_reader :count
43
+
44
+ # @return [Integer] Total count at the time of the last on_batch call. Used to figure out
45
+ # how many records were in the final batch
46
+ attr_reader :prev_count
47
+
48
+ # Create a new milemarker tracker, with an optional name and logger
49
+ # @param [Integer] batch_size How often the on_batch block will be called
50
+ # @param [String] name Optional "name" for this milemarker, included in the generated log lines
51
+ # @param [Logger, #info, #warn] Optional logger that responds to the normal #info, #warn, etc.
52
+ def initialize(batch_size: 1000, name: nil, logger: nil)
53
+ @batch_size = batch_size
54
+ @name = name
55
+ @logger = logger
56
+
57
+ @batch_number = 0
58
+ @last_batch_size = 0
59
+ @last_batch_seconds = 0
60
+
61
+ @start_time = Time.now
62
+ @batch_start_time = @start_time
63
+ @batch_end_time = @start_time
64
+
65
+ @count = 0
66
+ @prev_count = 0
67
+ end
68
+
69
+ # Turn `increment_and_batch` (and thus `increment_and_log_batch_line`) into
70
+ # a threadsafe version
71
+ # @return [Milemarker] self
72
+ def threadsafify!
73
+ @mutex = Mutex.new
74
+ define_singleton_method(:increment_and_on_batch) do |&blk|
75
+ threadsafe_increment_and_on_batch(&blk)
76
+ end
77
+ self
78
+ end
79
+
80
+ # Increment the counter -- how many records processed, e.g.
81
+ # @return [Milemarker] self
82
+ def incr(increase = 1)
83
+ @count += increase
84
+ self
85
+ end
86
+
87
+ alias increment incr
88
+
89
+ # Create a logger for use in logging milemaker information
90
+ # @example mm.create_logger!(STDOUT)
91
+ # @return [Milemarker] self
92
+ def create_logger!(*args, **kwargs)
93
+ @logger = Logger.new(*args, **kwargs)
94
+ self
95
+ end
96
+
97
+ # Run the given block if we've exceeded the batch size for the current batch
98
+ # @yield [Milemarker] self
99
+ def on_batch
100
+ if batch_size_exceeded?
101
+ set_milemarker!
102
+ yield self
103
+ end
104
+ end
105
+
106
+ # Single call to increment and run (if needed) the on_batch block
107
+ def _increment_and_on_batch(&blk)
108
+ incr.on_batch(&blk)
109
+ end
110
+
111
+ alias increment_and_on_batch _increment_and_on_batch
112
+
113
+ # Threadsafe version of #increment_and_on_batch, doing the whole thing as a single atomic action
114
+ def threadsafe_increment_and_on_batch(&blk)
115
+ @mutex.synchronize do
116
+ _increment_and_on_batch(&blk)
117
+ end
118
+ end
119
+
120
+ # Convenience method, exactly the same as the common idiom
121
+ # `mm.incr; mm.on_batch {|mm| log.info mm.batch_line}`
122
+ # @param [Symbol] level The level to log at
123
+ def increment_and_log_batch_line(level: :info)
124
+ increment_and_on_batch { log_batch_line(level: level) }
125
+ end
126
+
127
+ # Log the batch line, as described in #batch_line
128
+ # @param [Symbol] level The level to log at
129
+ def log_batch_line(level: :info)
130
+ log(batch_line, level: level)
131
+ end
132
+
133
+ # Log the final line, as described in #final_line
134
+ # @param [Symbol] level The level to log at
135
+ def log_final_line(level: :info)
136
+ log(final_line, level: level)
137
+ end
138
+
139
+ # A line describing the batch suitable for logging, of the form
140
+ # load records.ndj 8_000_000. This batch 2_000_000 in 26.2s (76_469 r/s). Overall 72_705 r/s.
141
+ # @return [String] The batch log line
142
+ def batch_line
143
+ # rubocop:disable Layout/LineLength
144
+ "#{name} #{ppnum(count, 10)}. This batch #{ppnum(last_batch_size, 5)} in #{ppnum(last_batch_seconds, 4, 1)}s (#{batch_rate_str} r/s). Overall #{total_rate_str} r/s."
145
+ # rubocop:enable Layout/LineLength
146
+ end
147
+
148
+ # Record how many increments there have been since the last on_batch call.
149
+ # Most useful to count how many items are in the final (usually incomplete) batch
150
+ # Note that since Milemarker can't tell when you're done processing, you can call this
151
+ # anytime and get the number of items processed since the last on_batch call.
152
+ # @return [Integer] Number of items processed in the final batch
153
+ def final_batch_size
154
+ count - prev_count
155
+ end
156
+
157
+ alias batch_count_so_far final_batch_size
158
+
159
+ # A line describing the entire run, suitable for logging, of the form
160
+ # load records.ndj FINISHED. 27_138_118 total records in 00h 12m 39s. Overall 35_718 r/s.
161
+ # @return [String] The full log line
162
+ def final_line
163
+ # rubocop:disable Layout/LineLength
164
+ "#{name} FINISHED. #{ppnum(count, 10)} total records in #{seconds_to_time_string(total_seconds_so_far)}. Overall #{total_rate_str} r/s."
165
+ # rubocop:enable Layout/LineLength
166
+ end
167
+
168
+ # @return [Float] rate of the last batch (in recs/second)
169
+ def batch_rate
170
+ return 0.0 if count.zero?
171
+
172
+ last_batch_size.to_f / last_batch_seconds
173
+ end
174
+
175
+ # @param [Integer] decimals Number of decimal places to the right of the
176
+ # decimal point
177
+ # @return [String] Rate-per-second in form XXX.YY
178
+ def batch_rate_str(decimals = 0)
179
+ ppnum(batch_rate, 0, decimals)
180
+ end
181
+
182
+ # @return [Float] total rate so far (in rec/second)
183
+ def total_rate
184
+ return 0.0 if @count.zero?
185
+
186
+ count / total_seconds_so_far
187
+ end
188
+
189
+ # @param [Integer] decimals Number of decimal places to the right of the
190
+ # decimal point
191
+ # @return [String] Rate-per-second in form XXX.YY
192
+ def total_rate_str(decimals = 0)
193
+ ppnum(total_rate, 0, decimals)
194
+ end
195
+
196
+ # Total seconds since the beginning of this milemarker
197
+ # @return [Float] seconds since the milemarker was created
198
+ def total_seconds_so_far
199
+ Time.now - start_time
200
+ end
201
+
202
+ # Total seconds since this batch started
203
+ # @return [Float] seconds since the beginning of this batch
204
+ def batch_seconds_so_far
205
+ Time.now - batch_start_time
206
+ end
207
+
208
+ # Set/reset all the internal state. Called by #on_batch when necessary;
209
+ # should probably not be called manually
210
+ def set_milemarker!
211
+ @batch_end_time = Time.now
212
+ @last_batch_size = @count - @prev_count
213
+ @last_batch_seconds = @batch_end_time - @batch_start_time
214
+
215
+ reset_for_next_batch!
216
+ end
217
+
218
+ # Reset the internal counters/timers at the end of a batch. Taken care of
219
+ # by #on_batch; should probably not be called manually.
220
+ def reset_for_next_batch!
221
+ @batch_start_time = batch_end_time
222
+ @prev_count = count
223
+ @batch_number = batch_divisor
224
+ end
225
+
226
+ # Log a line using the internal logger. Do nothing if no logger is configured.
227
+ # @param [String] msg The message to log
228
+ # @param [Symbol] level The level to log at
229
+ def log(msg, level: :info)
230
+ logger&.send(level, msg)
231
+ end
232
+
233
+ private
234
+
235
+ def batch_size_exceeded?
236
+ batch_divisor > @batch_number
237
+ end
238
+
239
+ def batch_divisor
240
+ count.div batch_size
241
+ end
242
+
243
+ def seconds_to_time_string(sec)
244
+ hours, leftover = sec.divmod(3600)
245
+ minutes, secs = leftover.divmod(60)
246
+ format("%02dh %02dm %02ds", hours, minutes, secs)
247
+ end
248
+ end
data/lib/ppnum.rb ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # WHAT? Defining a global function? Yup.
4
+ #
5
+ # "Pretty print" a number into an underscore-delimited numeric string,
6
+ # right-space-padded out to the specified width (default 0 indicating
7
+ # "no padding") and with the specified number of digits to the right
8
+ # of the decimal point (default again 0, meaning no decimal point at all)
9
+ #
10
+ # Example: ppnum(10111) => "10_111"
11
+ # ppnum(1234.56) => 1_235
12
+ # ppnum(10111.3656, 10, 1) => " 10_111.4"
13
+ #
14
+ # No attempt is made to deal gracefully with numbers that overrun the
15
+ # specified width
16
+ # @param [Numeric] num the number to format
17
+ # @param [Integer] width The width to target
18
+ # @param [Integer] decimals Number of decimal places to show
19
+ # @return [String] The formatted number
20
+ def ppnum(num, width = 0, decimals = 0)
21
+ num = num.round(decimals)
22
+ dec_str = if decimals.zero?
23
+ ""
24
+ else
25
+ ".#{format("%.#{decimals}f", num).split(".").last}"
26
+ end
27
+ numstr = num.floor.to_s.reverse.split(/(...)/)
28
+ .reject(&:empty?)
29
+ .map(&:reverse)
30
+ .reverse
31
+ .join("_") + dec_str
32
+ if width.zero?
33
+ numstr
34
+ else
35
+ format "%#{width}s", numstr
36
+ end
37
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/milemarker/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "milemarker"
7
+ spec.version = Milemarker::VERSION
8
+ spec.authors = ["Bill Dueber"]
9
+ spec.email = ["bill@dueber.com"]
10
+
11
+ spec.summary = "Track and produce loglines for batch processing progress."
12
+ spec.homepage = "https://github.com/billdueber/milemarker"
13
+ spec.license = "MIT"
14
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
15
+
16
+ spec.metadata["homepage_uri"] = spec.homepage
17
+ spec.metadata["source_code_uri"] = spec.homepage
18
+ spec.metadata["changelog_uri"] = spec.homepage + '/CHANGELOG.md'
19
+
20
+ # Specify which files should be added to the gem when it is released.
21
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
24
+ end
25
+ spec.bindir = "exe"
26
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ["lib"]
28
+
29
+ # Uncomment to register a new dependency of your gem
30
+ # spec.add_dependency "example-gem", "~> 1.0"
31
+
32
+ # For more information and examples about making a new gem, checkout our
33
+ # guide at: https://bundler.io/guides/creating_gem.html
34
+ #
35
+
36
+ spec.add_development_dependency 'bundler', '~>2.0'
37
+ spec.add_development_dependency 'pry'
38
+ spec.add_development_dependency 'rake', '~>13.0'
39
+ spec.add_development_dependency 'rspec', '~> 3.0'
40
+ spec.add_development_dependency 'rubocop', '~> 1.7'
41
+ end
metadata ADDED
@@ -0,0 +1,132 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: milemarker
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Bill Dueber
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-11-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '13.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '13.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.7'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.7'
83
+ description:
84
+ email:
85
+ - bill@dueber.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".github/workflows/main.yml"
91
+ - ".gitignore"
92
+ - ".rspec"
93
+ - ".rubocop.yml"
94
+ - CHANGELOG.md
95
+ - Gemfile
96
+ - LICENSE.txt
97
+ - README.md
98
+ - Rakefile
99
+ - bin/console
100
+ - bin/setup
101
+ - lib/milemarker.rb
102
+ - lib/milemarker/structured.rb
103
+ - lib/milemarker/version.rb
104
+ - lib/ppnum.rb
105
+ - milemarker.gemspec
106
+ homepage: https://github.com/billdueber/milemarker
107
+ licenses:
108
+ - MIT
109
+ metadata:
110
+ homepage_uri: https://github.com/billdueber/milemarker
111
+ source_code_uri: https://github.com/billdueber/milemarker
112
+ changelog_uri: https://github.com/billdueber/milemarker/CHANGELOG.md
113
+ post_install_message:
114
+ rdoc_options: []
115
+ require_paths:
116
+ - lib
117
+ required_ruby_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: 2.4.0
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ requirements: []
128
+ rubygems_version: 3.2.32
129
+ signing_key:
130
+ specification_version: 4
131
+ summary: Track and produce loglines for batch processing progress.
132
+ test_files: []