milemarker 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1cc6c3aa16cc1d24991354663de1ceea5859d82a025e83d2683b9fa3b49d5f9e
4
+ data.tar.gz: 924edbc92d18fd924d4934f188a88e9c36ed8bbbae443a7815462b4d40388fe0
5
+ SHA512:
6
+ metadata.gz: 7841d99693517311d47206ca49890278386643130dd934b6ca6f93fa0b0032c5e6f8aaaf038f8c0336ff2e967744d0e86a86c7dcb9b34e8030cdcecaf97a5773
7
+ data.tar.gz: 033d5566f4d893668c03f358f40a8ee2d0bd26610010e05ca11871225b94c2288f3bd252c1e9fc05f75034f200ba43d28520be5eba5d715e6f63766f8b19e765
@@ -0,0 +1,16 @@
1
+ name: Ruby
2
+
3
+ on: [push,pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ steps:
9
+ - uses: actions/checkout@v2
10
+ - name: Set up Ruby
11
+ uses: ruby/setup-ruby@v1
12
+ with:
13
+ ruby-version: 2.6.6
14
+ bundler-cache: true
15
+ - name: Run the default task
16
+ run: bundle exec rake
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,13 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.7
3
+
4
+ Style/StringLiterals:
5
+ Enabled: false
6
+ EnforcedStyle: double_quotes
7
+
8
+ Style/StringLiteralsInInterpolation:
9
+ Enabled: false
10
+ EnforcedStyle: double_quotes
11
+
12
+ Layout/LineLength:
13
+ Max: 120
data/CHANGELOG.md ADDED
@@ -0,0 +1,4 @@
1
+ ## [1.0.0] - 2021-11-29
2
+
3
+ First public release
4
+
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in milemarker.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Bill Dueber
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,209 @@
1
+ # Milemarker -- track (and probably log) progress in batch jobs
2
+
3
+ Never again write code of the
4
+ form `log.info "Finished 1_000 in #{secs} seconds at a rate of #{total.to_f / secs}"`
5
+ .
6
+
7
+ ## Usage
8
+
9
+ ```ruby
10
+
11
+ require 'milemarker'
12
+ require 'logger'
13
+ input_file = "records.ndj"
14
+
15
+ # Create a new milemarker. Default batch_size is 1_000
16
+ milemarker = Milemarker.new(name: "Load #{input_file}", batch_size: 1_000_000)
17
+ logger = Logger.new(STDERR)
18
+
19
+ milemarker.logger = logger
20
+
21
+ File.open(input_file).each do |line|
22
+ do_whatever_needs_doing(line)
23
+ milemarker.increment_and_log_batch_line
24
+ end
25
+ milemarker.log_final_line # if logging is set up
26
+
27
+ # Identical to the above, but do the logging "by hand"
28
+ File.open(input_file).each do |line|
29
+ do_whatever_needs_doing(line)
30
+ milemarker.increment_and_on_batch { logger.info milemarker.batch_line }
31
+ end
32
+ logger.info milemarker.final_line
33
+
34
+ # Sample output
35
+ # ...
36
+ # I, [2021-11-02T01:51:06.959137 #11710] INFO -- : load records.ndj 8_000_000. This batch 2_000_000 in 26.2s (76_469 r/s). Overall 72_705 r/s.
37
+ # I, [2021-11-02T01:51:36.992831 #11710] INFO -- : load records.ndj 10_000_000. This batch 2_000_000 in 30.0s (66_591 r/s). Overall 71_394 r/s.
38
+ # ...
39
+ # I, [2021-11-02T02:01:56.702196 #11710] INFO -- : load records.ndj FINISHED. 27_138_118 total records in 00h 12m 39s. Overall 35_718 r/s.
40
+
41
+ ```
42
+
43
+ ## Basic usage
44
+
45
+ Most programs will probably use `milemarker` is via
46
+ `#increment_and_log_batch_line`
47
+ (or its counterpart `#increment_and_on_batch {|milemarker| ... }` ). As
48
+ the name suggests, this will:
49
+
50
+ * increment the batch counter
51
+ * If the batch counter >= the batch size:
52
+ * run the provided block (or write the logline)
53
+ * reset count/time/etc for the next batch
54
+
55
+ Some examples:
56
+
57
+ ```ruby
58
+
59
+ # Logging, as above
60
+ milemarker = Milemarker.new(batch_size: 1000, name: 'Load myfile')
61
+ milemarker.increment_and_on_batch { logger.info milemarker.batch_line }
62
+
63
+ # Alert when things seem to to take too long
64
+
65
+ milemarker.increment_and_on_batch do |milemarker|
66
+ secs = milemarker.last_batch_seconds
67
+ if secs > way_too_long
68
+ logger.error "Whoa: #{secs} is too long for a batch of #{milemarker.batch_size}"
69
+ end
70
+ end
71
+
72
+ # #on_batch and #increment_and_on_batch can be used to do real (i.e.,
73
+ # non-logging) work after every `batch` calls, too
74
+ queue = []
75
+ my_stuff.each do |doc|
76
+ queue << do_something_to(doc)
77
+ milemarker.increment_and_on_batch do |milemarker|
78
+ write_to_datastore(queue)
79
+ queue = []
80
+ logger.info milemarker.batch_line
81
+ end
82
+ end
83
+ ```
84
+
85
+ `#incr` and `#on_batch(&blk)` are also available separately if you need to be
86
+ more explicit and less atomic.
87
+
88
+ All the components that make up a batch_line (e.g., the records/second as
89
+ a nice string) are available to roll your own batch line. See the API
90
+ documentation for details.
91
+
92
+ ### Incorporating a logger into milemarker
93
+
94
+ For standard logging cases, you can also pass in a logger, or let milemarker
95
+ create one for its own use based on an IO-like object you provide
96
+
97
+ ```ruby
98
+ logger = Logger.new(STDERR)
99
+ milemarker = Milemarker.new(name: 'my_process', batch_size: 10_000, logger: logger)
100
+
101
+ # same thing
102
+ milemarker = Milemarker.new(name: 'my_process', batch_size: 10_000)
103
+ milemarker.logger = logger
104
+
105
+ # same thing again
106
+ milemarker = Milemarker.new(name: 'my_process', batch_size: 10_000)
107
+ milemarker.create_logger!(STDERR)
108
+
109
+ File.open(input_file).each do |line|
110
+ do_whatever_needs_doing(line)
111
+ milemarker.increment_and_log_batch_line
112
+ end
113
+
114
+ milemarker.log_final_line
115
+
116
+ # All the logging methods take an optional :level argument
117
+ milemarker.log_final_line(level: :debug)
118
+
119
+ ```
120
+
121
+ ### Structured logging with Milemarker::Structured
122
+
123
+ `Milemarker::Structured` will return hashes for `#batch_line` and `#final_line`
124
+ (aliased to `#batch_data` and `#final_data`, respectively) and pass those
125
+ hashes along to whatever logger you provide. `#create_logger!` for this
126
+ subclass will create a logger that provides json lines instead of text, too.
127
+
128
+ Presumably, if you pass in your own logger you'll use something like
129
+ [semantic_logger](https://github.com/reidmorrison/semantic_logger)
130
+ or [ougai](https://github.com/tilfin/ougai).
131
+
132
+ ```ruby
133
+ milemarker = Milemarker::Structured.new(name: 'my_process', batch_size: 10_000)
134
+ milemarker.create_logger!(STDERR)
135
+
136
+ File.open(input_file).each do |line|
137
+ do_whatever_needs_doing(line)
138
+ milemarker.increment_and_log_batch_line
139
+ end
140
+
141
+ # Usually one line; broken up for readability
142
+ # {"name":"my_process","batch_count":10_000,"batch_seconds":97.502088,
143
+ # "batch_rate":1.035875252230496,"total_count":100,"total_seconds":97.502094,
144
+ # "total_rate":1.0358751884856956,"level":"INFO","time":"2021-11-06 17:32:21 -0400"}
145
+
146
+ ```
147
+
148
+ ## Threadsafety
149
+
150
+ A call to `milemaker.threadsafify!` will wrap `increment_and_on_batch` (and
151
+ `increment_and_log_batch_line`) to be a threadsafe atomic operation at the
152
+ cost of some performance.
153
+
154
+ ```
155
+ milemarker.threadsafify!
156
+
157
+ ```
158
+
159
+ ## Turning off logging
160
+
161
+ If the logger is set to `nil`, no logging will occur.
162
+
163
+ ```ruby
164
+ # Turn off logging
165
+
166
+ milemarker.logger = nil
167
+ ```
168
+
169
+ You could also just configure your logger to ignore stuff
170
+
171
+ ```ruby
172
+
173
+ milemarker.logger.level = :error
174
+
175
+ ```
176
+
177
+ ## Accuracy
178
+
179
+ Note that `milemarker` isn't designed for real benchmarking. The assumption is
180
+ that whatever work your code is actually doing will drown out any
181
+ inefficiencies in the `milemarker` code, and milemarker numbers can be used to suss out
182
+ where weird things are happening.
183
+
184
+ ## Installation
185
+
186
+ Add this line to your application's Gemfile:
187
+
188
+ ```ruby
189
+ gem 'milemarker'
190
+ ```
191
+
192
+ And then execute:
193
+
194
+ $ bundle install
195
+
196
+ Or install it yourself as:
197
+
198
+ $ gem install milemarker
199
+
200
+
201
+ ## Contributing
202
+
203
+ Bug reports and pull requests are welcome on GitHub
204
+ at https://github.com/billdueber/milemarker.
205
+
206
+ ## License
207
+
208
+ The gem is available as open source under the terms of
209
+ the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec]
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "waypoint"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Milemarker
4
+ # Milemarker for structured logging
5
+ # * #create_logger! creates a logger that spits out JSON lines instead of human-centered strings
6
+ # * #batch_line and #final_line return hashes of count/time/rate data
7
+ # *...and are aliased to #batch_data and #final_data
8
+ #
9
+ # Milemarker::Structured should be a drop-in replacement for Milemarker, with the above differences
10
+ # and of course the caveat that if you provide your own logger it should expect to deal with
11
+ # the hashes coming from #batch_data and #final_data
12
+ class Structured < Milemarker
13
+ # Create a logger that spits out JSON strings instead of human-oriented strings'
14
+ # In addition to whatever message is passed, will always also include
15
+ # { level: severity, time: datetime }
16
+ #
17
+ # The logger will try to deal intelligently with different types of arguments
18
+ # * a Hash will just be passed
19
+ # * a String;s return json will show up in the hash under the key 'msg'
20
+ # * an Exception's return json will have the error's message, class, the first bit of the backtrace, and hostname
21
+ # * Anything else will be treated like a hash if it responds to #to_h;
22
+ # otherwise use msg.inspect as a message string
23
+ def create_logger!(*args, **kwargs)
24
+ super
25
+ @logger.formatter = proc do |severity, datetime, _progname, msg|
26
+ case msg
27
+ when Hash
28
+ msg
29
+ when String
30
+ { msg: msg }
31
+ when Exception
32
+ exception_message_hash(msg)
33
+ else
34
+ other_message_hash(msg)
35
+ end.merge({ level: severity, time: datetime }).to_json
36
+ end
37
+ self
38
+ end
39
+
40
+ # @return [Hash] hash with information about the last batch
41
+ def batch_line
42
+ {
43
+ name: name,
44
+ batch_count: last_batch_size,
45
+ batch_seconds: last_batch_seconds,
46
+ batch_rate: batch_rate,
47
+ total_count: count,
48
+ total_seconds: total_seconds_so_far,
49
+ total_rate: total_rate
50
+ }
51
+ end
52
+
53
+ alias batch_data batch_line
54
+
55
+ # @return [Hash] hash with information about the last batch
56
+ def final_line
57
+ {
58
+ name: name,
59
+ final_batch_size: final_batch_size,
60
+ total_count: count,
61
+ total_seconds: total_seconds_so_far,
62
+ total_rate: total_rate
63
+ }
64
+ end
65
+
66
+ alias final_data final_line
67
+
68
+ def exception_message_hash(msg)
69
+ { msg: msg.message, error: msg.class, at: msg.backtrace&.first, hostname: Socket.gethostname }
70
+ end
71
+
72
+ def other_message_hash(msg)
73
+ if msg.respond_to? :to_h
74
+ msg.to_h
75
+ else
76
+ { msg: msg.inspect }
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Milemarker
4
+ VERSION = "1.0.0"
5
+ end
data/lib/milemarker.rb ADDED
@@ -0,0 +1,248 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "ppnum"
4
+ require 'logger'
5
+ require 'socket'
6
+ require 'json'
7
+ require 'milemarker/structured'
8
+
9
+ # milemarker class, to keep track of progress over time for long-running
10
+ # iterating processes
11
+ #
12
+ # @author Bill Dueber <bill@dueber.com>
13
+ class Milemarker
14
+ # @return [String] optional "name" of this milemarker, for logging purposes
15
+ attr_accessor :name
16
+
17
+ # @return [Integer] batch size for computing `on_batch` calls
18
+ attr_accessor :batch_size
19
+
20
+ # @return [Logger, #info] logging object for automatic logging methods
21
+ attr_accessor :logger
22
+
23
+ # @return [Integer] which batch number (total increment / batch_size)
24
+ attr_reader :batch_number
25
+
26
+ # @return [Integer] number of second to process the last batch
27
+ attr_reader :last_batch_seconds
28
+
29
+ # @return [Integer] number of records (really, number of increments) in the last batch
30
+ attr_reader :last_batch_size
31
+
32
+ # @return [Time] Time the full process started
33
+ attr_reader :start_time
34
+
35
+ # @return [Time] Time the last batch started processing
36
+ attr_reader :batch_start_time
37
+
38
+ # @return [Time] Time the last batch ended processing
39
+ attr_reader :batch_end_time
40
+
41
+ # @return [Integer] Total records (really, increments) for the full run
42
+ attr_reader :count
43
+
44
+ # @return [Integer] Total count at the time of the last on_batch call. Used to figure out
45
+ # how many records were in the final batch
46
+ attr_reader :prev_count
47
+
48
+ # Create a new milemarker tracker, with an optional name and logger
49
+ # @param [Integer] batch_size How often the on_batch block will be called
50
+ # @param [String] name Optional "name" for this milemarker, included in the generated log lines
51
+ # @param [Logger, #info, #warn] Optional logger that responds to the normal #info, #warn, etc.
52
+ def initialize(batch_size: 1000, name: nil, logger: nil)
53
+ @batch_size = batch_size
54
+ @name = name
55
+ @logger = logger
56
+
57
+ @batch_number = 0
58
+ @last_batch_size = 0
59
+ @last_batch_seconds = 0
60
+
61
+ @start_time = Time.now
62
+ @batch_start_time = @start_time
63
+ @batch_end_time = @start_time
64
+
65
+ @count = 0
66
+ @prev_count = 0
67
+ end
68
+
69
+ # Turn `increment_and_batch` (and thus `increment_and_log_batch_line`) into
70
+ # a threadsafe version
71
+ # @return [Milemarker] self
72
+ def threadsafify!
73
+ @mutex = Mutex.new
74
+ define_singleton_method(:increment_and_on_batch) do |&blk|
75
+ threadsafe_increment_and_on_batch(&blk)
76
+ end
77
+ self
78
+ end
79
+
80
+ # Increment the counter -- how many records processed, e.g.
81
+ # @return [Milemarker] self
82
+ def incr(increase = 1)
83
+ @count += increase
84
+ self
85
+ end
86
+
87
+ alias increment incr
88
+
89
+ # Create a logger for use in logging milemaker information
90
+ # @example mm.create_logger!(STDOUT)
91
+ # @return [Milemarker] self
92
+ def create_logger!(*args, **kwargs)
93
+ @logger = Logger.new(*args, **kwargs)
94
+ self
95
+ end
96
+
97
+ # Run the given block if we've exceeded the batch size for the current batch
98
+ # @yield [Milemarker] self
99
+ def on_batch
100
+ if batch_size_exceeded?
101
+ set_milemarker!
102
+ yield self
103
+ end
104
+ end
105
+
106
+ # Single call to increment and run (if needed) the on_batch block
107
+ def _increment_and_on_batch(&blk)
108
+ incr.on_batch(&blk)
109
+ end
110
+
111
+ alias increment_and_on_batch _increment_and_on_batch
112
+
113
+ # Threadsafe version of #increment_and_on_batch, doing the whole thing as a single atomic action
114
+ def threadsafe_increment_and_on_batch(&blk)
115
+ @mutex.synchronize do
116
+ _increment_and_on_batch(&blk)
117
+ end
118
+ end
119
+
120
+ # Convenience method, exactly the same as the common idiom
121
+ # `mm.incr; mm.on_batch {|mm| log.info mm.batch_line}`
122
+ # @param [Symbol] level The level to log at
123
+ def increment_and_log_batch_line(level: :info)
124
+ increment_and_on_batch { log_batch_line(level: level) }
125
+ end
126
+
127
+ # Log the batch line, as described in #batch_line
128
+ # @param [Symbol] level The level to log at
129
+ def log_batch_line(level: :info)
130
+ log(batch_line, level: level)
131
+ end
132
+
133
+ # Log the final line, as described in #final_line
134
+ # @param [Symbol] level The level to log at
135
+ def log_final_line(level: :info)
136
+ log(final_line, level: level)
137
+ end
138
+
139
+ # A line describing the batch suitable for logging, of the form
140
+ # load records.ndj 8_000_000. This batch 2_000_000 in 26.2s (76_469 r/s). Overall 72_705 r/s.
141
+ # @return [String] The batch log line
142
+ def batch_line
143
+ # rubocop:disable Layout/LineLength
144
+ "#{name} #{ppnum(count, 10)}. This batch #{ppnum(last_batch_size, 5)} in #{ppnum(last_batch_seconds, 4, 1)}s (#{batch_rate_str} r/s). Overall #{total_rate_str} r/s."
145
+ # rubocop:enable Layout/LineLength
146
+ end
147
+
148
+ # Record how many increments there have been since the last on_batch call.
149
+ # Most useful to count how many items are in the final (usually incomplete) batch
150
+ # Note that since Milemarker can't tell when you're done processing, you can call this
151
+ # anytime and get the number of items processed since the last on_batch call.
152
+ # @return [Integer] Number of items processed in the final batch
153
+ def final_batch_size
154
+ count - prev_count
155
+ end
156
+
157
+ alias batch_count_so_far final_batch_size
158
+
159
+ # A line describing the entire run, suitable for logging, of the form
160
+ # load records.ndj FINISHED. 27_138_118 total records in 00h 12m 39s. Overall 35_718 r/s.
161
+ # @return [String] The full log line
162
+ def final_line
163
+ # rubocop:disable Layout/LineLength
164
+ "#{name} FINISHED. #{ppnum(count, 10)} total records in #{seconds_to_time_string(total_seconds_so_far)}. Overall #{total_rate_str} r/s."
165
+ # rubocop:enable Layout/LineLength
166
+ end
167
+
168
+ # @return [Float] rate of the last batch (in recs/second)
169
+ def batch_rate
170
+ return 0.0 if count.zero?
171
+
172
+ last_batch_size.to_f / last_batch_seconds
173
+ end
174
+
175
+ # @param [Integer] decimals Number of decimal places to the right of the
176
+ # decimal point
177
+ # @return [String] Rate-per-second in form XXX.YY
178
+ def batch_rate_str(decimals = 0)
179
+ ppnum(batch_rate, 0, decimals)
180
+ end
181
+
182
+ # @return [Float] total rate so far (in rec/second)
183
+ def total_rate
184
+ return 0.0 if @count.zero?
185
+
186
+ count / total_seconds_so_far
187
+ end
188
+
189
+ # @param [Integer] decimals Number of decimal places to the right of the
190
+ # decimal point
191
+ # @return [String] Rate-per-second in form XXX.YY
192
+ def total_rate_str(decimals = 0)
193
+ ppnum(total_rate, 0, decimals)
194
+ end
195
+
196
+ # Total seconds since the beginning of this milemarker
197
+ # @return [Float] seconds since the milemarker was created
198
+ def total_seconds_so_far
199
+ Time.now - start_time
200
+ end
201
+
202
+ # Total seconds since this batch started
203
+ # @return [Float] seconds since the beginning of this batch
204
+ def batch_seconds_so_far
205
+ Time.now - batch_start_time
206
+ end
207
+
208
+ # Set/reset all the internal state. Called by #on_batch when necessary;
209
+ # should probably not be called manually
210
+ def set_milemarker!
211
+ @batch_end_time = Time.now
212
+ @last_batch_size = @count - @prev_count
213
+ @last_batch_seconds = @batch_end_time - @batch_start_time
214
+
215
+ reset_for_next_batch!
216
+ end
217
+
218
+ # Reset the internal counters/timers at the end of a batch. Taken care of
219
+ # by #on_batch; should probably not be called manually.
220
+ def reset_for_next_batch!
221
+ @batch_start_time = batch_end_time
222
+ @prev_count = count
223
+ @batch_number = batch_divisor
224
+ end
225
+
226
+ # Log a line using the internal logger. Do nothing if no logger is configured.
227
+ # @param [String] msg The message to log
228
+ # @param [Symbol] level The level to log at
229
+ def log(msg, level: :info)
230
+ logger&.send(level, msg)
231
+ end
232
+
233
+ private
234
+
235
+ def batch_size_exceeded?
236
+ batch_divisor > @batch_number
237
+ end
238
+
239
+ def batch_divisor
240
+ count.div batch_size
241
+ end
242
+
243
+ def seconds_to_time_string(sec)
244
+ hours, leftover = sec.divmod(3600)
245
+ minutes, secs = leftover.divmod(60)
246
+ format("%02dh %02dm %02ds", hours, minutes, secs)
247
+ end
248
+ end
data/lib/ppnum.rb ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # WHAT? Defining a global function? Yup.
4
+ #
5
+ # "Pretty print" a number into an underscore-delimited numeric string,
6
+ # right-space-padded out to the specified width (default 0 indicating
7
+ # "no padding") and with the specified number of digits to the right
8
+ # of the decimal point (default again 0, meaning no decimal point at all)
9
+ #
10
+ # Example: ppnum(10111) => "10_111"
11
+ # ppnum(1234.56) => 1_235
12
+ # ppnum(10111.3656, 10, 1) => " 10_111.4"
13
+ #
14
+ # No attempt is made to deal gracefully with numbers that overrun the
15
+ # specified width
16
+ # @param [Numeric] num the number to format
17
+ # @param [Integer] width The width to target
18
+ # @param [Integer] decimals Number of decimal places to show
19
+ # @return [String] The formatted number
20
+ def ppnum(num, width = 0, decimals = 0)
21
+ num = num.round(decimals)
22
+ dec_str = if decimals.zero?
23
+ ""
24
+ else
25
+ ".#{format("%.#{decimals}f", num).split(".").last}"
26
+ end
27
+ numstr = num.floor.to_s.reverse.split(/(...)/)
28
+ .reject(&:empty?)
29
+ .map(&:reverse)
30
+ .reverse
31
+ .join("_") + dec_str
32
+ if width.zero?
33
+ numstr
34
+ else
35
+ format "%#{width}s", numstr
36
+ end
37
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/milemarker/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "milemarker"
7
+ spec.version = Milemarker::VERSION
8
+ spec.authors = ["Bill Dueber"]
9
+ spec.email = ["bill@dueber.com"]
10
+
11
+ spec.summary = "Track and produce loglines for batch processing progress."
12
+ spec.homepage = "https://github.com/billdueber/milemarker"
13
+ spec.license = "MIT"
14
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
15
+
16
+ spec.metadata["homepage_uri"] = spec.homepage
17
+ spec.metadata["source_code_uri"] = spec.homepage
18
+ spec.metadata["changelog_uri"] = spec.homepage + '/CHANGELOG.md'
19
+
20
+ # Specify which files should be added to the gem when it is released.
21
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
24
+ end
25
+ spec.bindir = "exe"
26
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ["lib"]
28
+
29
+ # Uncomment to register a new dependency of your gem
30
+ # spec.add_dependency "example-gem", "~> 1.0"
31
+
32
+ # For more information and examples about making a new gem, checkout our
33
+ # guide at: https://bundler.io/guides/creating_gem.html
34
+ #
35
+
36
+ spec.add_development_dependency 'bundler', '~>2.0'
37
+ spec.add_development_dependency 'pry'
38
+ spec.add_development_dependency 'rake', '~>13.0'
39
+ spec.add_development_dependency 'rspec', '~> 3.0'
40
+ spec.add_development_dependency 'rubocop', '~> 1.7'
41
+ end
metadata ADDED
@@ -0,0 +1,132 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: milemarker
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Bill Dueber
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2021-11-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '13.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '13.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.7'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.7'
83
+ description:
84
+ email:
85
+ - bill@dueber.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".github/workflows/main.yml"
91
+ - ".gitignore"
92
+ - ".rspec"
93
+ - ".rubocop.yml"
94
+ - CHANGELOG.md
95
+ - Gemfile
96
+ - LICENSE.txt
97
+ - README.md
98
+ - Rakefile
99
+ - bin/console
100
+ - bin/setup
101
+ - lib/milemarker.rb
102
+ - lib/milemarker/structured.rb
103
+ - lib/milemarker/version.rb
104
+ - lib/ppnum.rb
105
+ - milemarker.gemspec
106
+ homepage: https://github.com/billdueber/milemarker
107
+ licenses:
108
+ - MIT
109
+ metadata:
110
+ homepage_uri: https://github.com/billdueber/milemarker
111
+ source_code_uri: https://github.com/billdueber/milemarker
112
+ changelog_uri: https://github.com/billdueber/milemarker/CHANGELOG.md
113
+ post_install_message:
114
+ rdoc_options: []
115
+ require_paths:
116
+ - lib
117
+ required_ruby_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: 2.4.0
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ requirements: []
128
+ rubygems_version: 3.2.32
129
+ signing_key:
130
+ specification_version: 4
131
+ summary: Track and produce loglines for batch processing progress.
132
+ test_files: []