milemarker 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/main.yml +16 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.rubocop.yml +13 -0
- data/CHANGELOG.md +4 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +209 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/lib/milemarker/structured.rb +80 -0
- data/lib/milemarker/version.rb +5 -0
- data/lib/milemarker.rb +248 -0
- data/lib/ppnum.rb +37 -0
- data/milemarker.gemspec +41 -0
- metadata +132 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1cc6c3aa16cc1d24991354663de1ceea5859d82a025e83d2683b9fa3b49d5f9e
|
4
|
+
data.tar.gz: 924edbc92d18fd924d4934f188a88e9c36ed8bbbae443a7815462b4d40388fe0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7841d99693517311d47206ca49890278386643130dd934b6ca6f93fa0b0032c5e6f8aaaf038f8c0336ff2e967744d0e86a86c7dcb9b34e8030cdcecaf97a5773
|
7
|
+
data.tar.gz: 033d5566f4d893668c03f358f40a8ee2d0bd26610010e05ca11871225b94c2288f3bd252c1e9fc05f75034f200ba43d28520be5eba5d715e6f63766f8b19e765
|
@@ -0,0 +1,16 @@
|
|
1
|
+
name: Ruby
|
2
|
+
|
3
|
+
on: [push,pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
steps:
|
9
|
+
- uses: actions/checkout@v2
|
10
|
+
- name: Set up Ruby
|
11
|
+
uses: ruby/setup-ruby@v1
|
12
|
+
with:
|
13
|
+
ruby-version: 2.6.6
|
14
|
+
bundler-cache: true
|
15
|
+
- name: Run the default task
|
16
|
+
run: bundle exec rake
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2021 Bill Dueber
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,209 @@
|
|
1
|
+
# Milemarker -- track (and probably log) progress in batch jobs
|
2
|
+
|
3
|
+
Never again write code of the
|
4
|
+
form `log.info "Finished 1_000 in #{secs} seconds at a rate of #{total.to_f / secs}"`
|
5
|
+
.
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
|
11
|
+
require 'milemarker'
|
12
|
+
require 'logger'
|
13
|
+
input_file = "records.ndj"
|
14
|
+
|
15
|
+
# Create a new milemarker. Default batch_size is 1_000
|
16
|
+
milemarker = Milemarker.new(name: "Load #{input_file}", batch_size: 1_000_000)
|
17
|
+
logger = Logger.new(STDERR)
|
18
|
+
|
19
|
+
milemarker.logger = logger
|
20
|
+
|
21
|
+
File.open(input_file).each do |line|
|
22
|
+
do_whatever_needs_doing(line)
|
23
|
+
milemarker.increment_and_log_batch_line
|
24
|
+
end
|
25
|
+
milemarker.log_final_line # if logging is set up
|
26
|
+
|
27
|
+
# Identical to the above, but do the logging "by hand"
|
28
|
+
File.open(input_file).each do |line|
|
29
|
+
do_whatever_needs_doing(line)
|
30
|
+
milemarker.increment_and_on_batch { logger.info milemarker.batch_line }
|
31
|
+
end
|
32
|
+
logger.info milemarker.final_line
|
33
|
+
|
34
|
+
# Sample output
|
35
|
+
# ...
|
36
|
+
# I, [2021-11-02T01:51:06.959137 #11710] INFO -- : load records.ndj 8_000_000. This batch 2_000_000 in 26.2s (76_469 r/s). Overall 72_705 r/s.
|
37
|
+
# I, [2021-11-02T01:51:36.992831 #11710] INFO -- : load records.ndj 10_000_000. This batch 2_000_000 in 30.0s (66_591 r/s). Overall 71_394 r/s.
|
38
|
+
# ...
|
39
|
+
# I, [2021-11-02T02:01:56.702196 #11710] INFO -- : load records.ndj FINISHED. 27_138_118 total records in 00h 12m 39s. Overall 35_718 r/s.
|
40
|
+
|
41
|
+
```
|
42
|
+
|
43
|
+
## Basic usage
|
44
|
+
|
45
|
+
Most programs will probably use `milemarker` is via
|
46
|
+
`#increment_and_log_batch_line`
|
47
|
+
(or its counterpart `#increment_and_on_batch {|milemarker| ... }` ). As
|
48
|
+
the name suggests, this will:
|
49
|
+
|
50
|
+
* increment the batch counter
|
51
|
+
* If the batch counter >= the batch size:
|
52
|
+
* run the provided block (or write the logline)
|
53
|
+
* reset count/time/etc for the next batch
|
54
|
+
|
55
|
+
Some examples:
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
|
59
|
+
# Logging, as above
|
60
|
+
milemarker = Milemarker.new(batch_size: 1000, name: 'Load myfile')
|
61
|
+
milemarker.increment_and_on_batch { logger.info milemarker.batch_line }
|
62
|
+
|
63
|
+
# Alert when things seem to to take too long
|
64
|
+
|
65
|
+
milemarker.increment_and_on_batch do |milemarker|
|
66
|
+
secs = milemarker.last_batch_seconds
|
67
|
+
if secs > way_too_long
|
68
|
+
logger.error "Whoa: #{secs} is too long for a batch of #{milemarker.batch_size}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# #on_batch and #increment_and_on_batch can be used to do real (i.e.,
|
73
|
+
# non-logging) work after every `batch` calls, too
|
74
|
+
queue = []
|
75
|
+
my_stuff.each do |doc|
|
76
|
+
queue << do_something_to(doc)
|
77
|
+
milemarker.increment_and_on_batch do |milemarker|
|
78
|
+
write_to_datastore(queue)
|
79
|
+
queue = []
|
80
|
+
logger.info milemarker.batch_line
|
81
|
+
end
|
82
|
+
end
|
83
|
+
```
|
84
|
+
|
85
|
+
`#incr` and `#on_batch(&blk)` are also available separately if you need to be
|
86
|
+
more explicit and less atomic.
|
87
|
+
|
88
|
+
All the components that make up a batch_line (e.g., the records/second as
|
89
|
+
a nice string) are available to roll your own batch line. See the API
|
90
|
+
documentation for details.
|
91
|
+
|
92
|
+
### Incorporating a logger into milemarker
|
93
|
+
|
94
|
+
For standard logging cases, you can also pass in a logger, or let milemarker
|
95
|
+
create one for its own use based on an IO-like object you provide
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
logger = Logger.new(STDERR)
|
99
|
+
milemarker = Milemarker.new(name: 'my_process', batch_size: 10_000, logger: logger)
|
100
|
+
|
101
|
+
# same thing
|
102
|
+
milemarker = Milemarker.new(name: 'my_process', batch_size: 10_000)
|
103
|
+
milemarker.logger = logger
|
104
|
+
|
105
|
+
# same thing again
|
106
|
+
milemarker = Milemarker.new(name: 'my_process', batch_size: 10_000)
|
107
|
+
milemarker.create_logger!(STDERR)
|
108
|
+
|
109
|
+
File.open(input_file).each do |line|
|
110
|
+
do_whatever_needs_doing(line)
|
111
|
+
milemarker.increment_and_log_batch_line
|
112
|
+
end
|
113
|
+
|
114
|
+
milemarker.log_final_line
|
115
|
+
|
116
|
+
# All the logging methods take an optional :level argument
|
117
|
+
milemarker.log_final_line(level: :debug)
|
118
|
+
|
119
|
+
```
|
120
|
+
|
121
|
+
### Structured logging with Milemarker::Structured
|
122
|
+
|
123
|
+
`Milemarker::Structured` will return hashes for `#batch_line` and `#final_line`
|
124
|
+
(aliased to `#batch_data` and `#final_data`, respectively) and pass those
|
125
|
+
hashes along to whatever logger you provide. `#create_logger!` for this
|
126
|
+
subclass will create a logger that provides json lines instead of text, too.
|
127
|
+
|
128
|
+
Presumably, if you pass in your own logger you'll use something like
|
129
|
+
[semantic_logger](https://github.com/reidmorrison/semantic_logger)
|
130
|
+
or [ougai](https://github.com/tilfin/ougai).
|
131
|
+
|
132
|
+
```ruby
|
133
|
+
milemarker = Milemarker::Structured.new(name: 'my_process', batch_size: 10_000)
|
134
|
+
milemarker.create_logger!(STDERR)
|
135
|
+
|
136
|
+
File.open(input_file).each do |line|
|
137
|
+
do_whatever_needs_doing(line)
|
138
|
+
milemarker.increment_and_log_batch_line
|
139
|
+
end
|
140
|
+
|
141
|
+
# Usually one line; broken up for readability
|
142
|
+
# {"name":"my_process","batch_count":10_000,"batch_seconds":97.502088,
|
143
|
+
# "batch_rate":1.035875252230496,"total_count":100,"total_seconds":97.502094,
|
144
|
+
# "total_rate":1.0358751884856956,"level":"INFO","time":"2021-11-06 17:32:21 -0400"}
|
145
|
+
|
146
|
+
```
|
147
|
+
|
148
|
+
## Threadsafety
|
149
|
+
|
150
|
+
A call to `milemaker.threadsafify!` will wrap `increment_and_on_batch` (and
|
151
|
+
`increment_and_log_batch_line`) to be a threadsafe atomic operation at the
|
152
|
+
cost of some performance.
|
153
|
+
|
154
|
+
```
|
155
|
+
milemarker.threadsafify!
|
156
|
+
|
157
|
+
```
|
158
|
+
|
159
|
+
## Turning off logging
|
160
|
+
|
161
|
+
If the logger is set to `nil`, no logging will occur.
|
162
|
+
|
163
|
+
```ruby
|
164
|
+
# Turn off logging
|
165
|
+
|
166
|
+
milemarker.logger = nil
|
167
|
+
```
|
168
|
+
|
169
|
+
You could also just configure your logger to ignore stuff
|
170
|
+
|
171
|
+
```ruby
|
172
|
+
|
173
|
+
milemarker.logger.level = :error
|
174
|
+
|
175
|
+
```
|
176
|
+
|
177
|
+
## Accuracy
|
178
|
+
|
179
|
+
Note that `milemarker` isn't designed for real benchmarking. The assumption is
|
180
|
+
that whatever work your code is actually doing will drown out any
|
181
|
+
inefficiencies in the `milemarker` code, and milemarker numbers can be used to suss out
|
182
|
+
where weird things are happening.
|
183
|
+
|
184
|
+
## Installation
|
185
|
+
|
186
|
+
Add this line to your application's Gemfile:
|
187
|
+
|
188
|
+
```ruby
|
189
|
+
gem 'milemarker'
|
190
|
+
```
|
191
|
+
|
192
|
+
And then execute:
|
193
|
+
|
194
|
+
$ bundle install
|
195
|
+
|
196
|
+
Or install it yourself as:
|
197
|
+
|
198
|
+
$ gem install milemarker
|
199
|
+
|
200
|
+
|
201
|
+
## Contributing
|
202
|
+
|
203
|
+
Bug reports and pull requests are welcome on GitHub
|
204
|
+
at https://github.com/billdueber/milemarker.
|
205
|
+
|
206
|
+
## License
|
207
|
+
|
208
|
+
The gem is available as open source under the terms of
|
209
|
+
the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "waypoint"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require "irb"
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Milemarker
|
4
|
+
# Milemarker for structured logging
|
5
|
+
# * #create_logger! creates a logger that spits out JSON lines instead of human-centered strings
|
6
|
+
# * #batch_line and #final_line return hashes of count/time/rate data
|
7
|
+
# *...and are aliased to #batch_data and #final_data
|
8
|
+
#
|
9
|
+
# Milemarker::Structured should be a drop-in replacement for Milemarker, with the above differences
|
10
|
+
# and of course the caveat that if you provide your own logger it should expect to deal with
|
11
|
+
# the hashes coming from #batch_data and #final_data
|
12
|
+
class Structured < Milemarker
|
13
|
+
# Create a logger that spits out JSON strings instead of human-oriented strings'
|
14
|
+
# In addition to whatever message is passed, will always also include
|
15
|
+
# { level: severity, time: datetime }
|
16
|
+
#
|
17
|
+
# The logger will try to deal intelligently with different types of arguments
|
18
|
+
# * a Hash will just be passed
|
19
|
+
# * a String;s return json will show up in the hash under the key 'msg'
|
20
|
+
# * an Exception's return json will have the error's message, class, the first bit of the backtrace, and hostname
|
21
|
+
# * Anything else will be treated like a hash if it responds to #to_h;
|
22
|
+
# otherwise use msg.inspect as a message string
|
23
|
+
def create_logger!(*args, **kwargs)
|
24
|
+
super
|
25
|
+
@logger.formatter = proc do |severity, datetime, _progname, msg|
|
26
|
+
case msg
|
27
|
+
when Hash
|
28
|
+
msg
|
29
|
+
when String
|
30
|
+
{ msg: msg }
|
31
|
+
when Exception
|
32
|
+
exception_message_hash(msg)
|
33
|
+
else
|
34
|
+
other_message_hash(msg)
|
35
|
+
end.merge({ level: severity, time: datetime }).to_json
|
36
|
+
end
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [Hash] hash with information about the last batch
|
41
|
+
def batch_line
|
42
|
+
{
|
43
|
+
name: name,
|
44
|
+
batch_count: last_batch_size,
|
45
|
+
batch_seconds: last_batch_seconds,
|
46
|
+
batch_rate: batch_rate,
|
47
|
+
total_count: count,
|
48
|
+
total_seconds: total_seconds_so_far,
|
49
|
+
total_rate: total_rate
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
alias batch_data batch_line
|
54
|
+
|
55
|
+
# @return [Hash] hash with information about the last batch
|
56
|
+
def final_line
|
57
|
+
{
|
58
|
+
name: name,
|
59
|
+
final_batch_size: final_batch_size,
|
60
|
+
total_count: count,
|
61
|
+
total_seconds: total_seconds_so_far,
|
62
|
+
total_rate: total_rate
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
alias final_data final_line
|
67
|
+
|
68
|
+
def exception_message_hash(msg)
|
69
|
+
{ msg: msg.message, error: msg.class, at: msg.backtrace&.first, hostname: Socket.gethostname }
|
70
|
+
end
|
71
|
+
|
72
|
+
def other_message_hash(msg)
|
73
|
+
if msg.respond_to? :to_h
|
74
|
+
msg.to_h
|
75
|
+
else
|
76
|
+
{ msg: msg.inspect }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
data/lib/milemarker.rb
ADDED
@@ -0,0 +1,248 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "ppnum"
|
4
|
+
require 'logger'
|
5
|
+
require 'socket'
|
6
|
+
require 'json'
|
7
|
+
require 'milemarker/structured'
|
8
|
+
|
9
|
+
# milemarker class, to keep track of progress over time for long-running
|
10
|
+
# iterating processes
|
11
|
+
#
|
12
|
+
# @author Bill Dueber <bill@dueber.com>
|
13
|
+
class Milemarker
|
14
|
+
# @return [String] optional "name" of this milemarker, for logging purposes
|
15
|
+
attr_accessor :name
|
16
|
+
|
17
|
+
# @return [Integer] batch size for computing `on_batch` calls
|
18
|
+
attr_accessor :batch_size
|
19
|
+
|
20
|
+
# @return [Logger, #info] logging object for automatic logging methods
|
21
|
+
attr_accessor :logger
|
22
|
+
|
23
|
+
# @return [Integer] which batch number (total increment / batch_size)
|
24
|
+
attr_reader :batch_number
|
25
|
+
|
26
|
+
# @return [Integer] number of second to process the last batch
|
27
|
+
attr_reader :last_batch_seconds
|
28
|
+
|
29
|
+
# @return [Integer] number of records (really, number of increments) in the last batch
|
30
|
+
attr_reader :last_batch_size
|
31
|
+
|
32
|
+
# @return [Time] Time the full process started
|
33
|
+
attr_reader :start_time
|
34
|
+
|
35
|
+
# @return [Time] Time the last batch started processing
|
36
|
+
attr_reader :batch_start_time
|
37
|
+
|
38
|
+
# @return [Time] Time the last batch ended processing
|
39
|
+
attr_reader :batch_end_time
|
40
|
+
|
41
|
+
# @return [Integer] Total records (really, increments) for the full run
|
42
|
+
attr_reader :count
|
43
|
+
|
44
|
+
# @return [Integer] Total count at the time of the last on_batch call. Used to figure out
|
45
|
+
# how many records were in the final batch
|
46
|
+
attr_reader :prev_count
|
47
|
+
|
48
|
+
# Create a new milemarker tracker, with an optional name and logger
|
49
|
+
# @param [Integer] batch_size How often the on_batch block will be called
|
50
|
+
# @param [String] name Optional "name" for this milemarker, included in the generated log lines
|
51
|
+
# @param [Logger, #info, #warn] Optional logger that responds to the normal #info, #warn, etc.
|
52
|
+
def initialize(batch_size: 1000, name: nil, logger: nil)
|
53
|
+
@batch_size = batch_size
|
54
|
+
@name = name
|
55
|
+
@logger = logger
|
56
|
+
|
57
|
+
@batch_number = 0
|
58
|
+
@last_batch_size = 0
|
59
|
+
@last_batch_seconds = 0
|
60
|
+
|
61
|
+
@start_time = Time.now
|
62
|
+
@batch_start_time = @start_time
|
63
|
+
@batch_end_time = @start_time
|
64
|
+
|
65
|
+
@count = 0
|
66
|
+
@prev_count = 0
|
67
|
+
end
|
68
|
+
|
69
|
+
# Turn `increment_and_batch` (and thus `increment_and_log_batch_line`) into
|
70
|
+
# a threadsafe version
|
71
|
+
# @return [Milemarker] self
|
72
|
+
def threadsafify!
|
73
|
+
@mutex = Mutex.new
|
74
|
+
define_singleton_method(:increment_and_on_batch) do |&blk|
|
75
|
+
threadsafe_increment_and_on_batch(&blk)
|
76
|
+
end
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
# Increment the counter -- how many records processed, e.g.
|
81
|
+
# @return [Milemarker] self
|
82
|
+
def incr(increase = 1)
|
83
|
+
@count += increase
|
84
|
+
self
|
85
|
+
end
|
86
|
+
|
87
|
+
alias increment incr
|
88
|
+
|
89
|
+
# Create a logger for use in logging milemaker information
|
90
|
+
# @example mm.create_logger!(STDOUT)
|
91
|
+
# @return [Milemarker] self
|
92
|
+
def create_logger!(*args, **kwargs)
|
93
|
+
@logger = Logger.new(*args, **kwargs)
|
94
|
+
self
|
95
|
+
end
|
96
|
+
|
97
|
+
# Run the given block if we've exceeded the batch size for the current batch
|
98
|
+
# @yield [Milemarker] self
|
99
|
+
def on_batch
|
100
|
+
if batch_size_exceeded?
|
101
|
+
set_milemarker!
|
102
|
+
yield self
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Single call to increment and run (if needed) the on_batch block
|
107
|
+
def _increment_and_on_batch(&blk)
|
108
|
+
incr.on_batch(&blk)
|
109
|
+
end
|
110
|
+
|
111
|
+
alias increment_and_on_batch _increment_and_on_batch
|
112
|
+
|
113
|
+
# Threadsafe version of #increment_and_on_batch, doing the whole thing as a single atomic action
|
114
|
+
def threadsafe_increment_and_on_batch(&blk)
|
115
|
+
@mutex.synchronize do
|
116
|
+
_increment_and_on_batch(&blk)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
# Convenience method, exactly the same as the common idiom
|
121
|
+
# `mm.incr; mm.on_batch {|mm| log.info mm.batch_line}`
|
122
|
+
# @param [Symbol] level The level to log at
|
123
|
+
def increment_and_log_batch_line(level: :info)
|
124
|
+
increment_and_on_batch { log_batch_line(level: level) }
|
125
|
+
end
|
126
|
+
|
127
|
+
# Log the batch line, as described in #batch_line
|
128
|
+
# @param [Symbol] level The level to log at
|
129
|
+
def log_batch_line(level: :info)
|
130
|
+
log(batch_line, level: level)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Log the final line, as described in #final_line
|
134
|
+
# @param [Symbol] level The level to log at
|
135
|
+
def log_final_line(level: :info)
|
136
|
+
log(final_line, level: level)
|
137
|
+
end
|
138
|
+
|
139
|
+
# A line describing the batch suitable for logging, of the form
|
140
|
+
# load records.ndj 8_000_000. This batch 2_000_000 in 26.2s (76_469 r/s). Overall 72_705 r/s.
|
141
|
+
# @return [String] The batch log line
|
142
|
+
def batch_line
|
143
|
+
# rubocop:disable Layout/LineLength
|
144
|
+
"#{name} #{ppnum(count, 10)}. This batch #{ppnum(last_batch_size, 5)} in #{ppnum(last_batch_seconds, 4, 1)}s (#{batch_rate_str} r/s). Overall #{total_rate_str} r/s."
|
145
|
+
# rubocop:enable Layout/LineLength
|
146
|
+
end
|
147
|
+
|
148
|
+
# Record how many increments there have been since the last on_batch call.
|
149
|
+
# Most useful to count how many items are in the final (usually incomplete) batch
|
150
|
+
# Note that since Milemarker can't tell when you're done processing, you can call this
|
151
|
+
# anytime and get the number of items processed since the last on_batch call.
|
152
|
+
# @return [Integer] Number of items processed in the final batch
|
153
|
+
def final_batch_size
|
154
|
+
count - prev_count
|
155
|
+
end
|
156
|
+
|
157
|
+
alias batch_count_so_far final_batch_size
|
158
|
+
|
159
|
+
# A line describing the entire run, suitable for logging, of the form
|
160
|
+
# load records.ndj FINISHED. 27_138_118 total records in 00h 12m 39s. Overall 35_718 r/s.
|
161
|
+
# @return [String] The full log line
|
162
|
+
def final_line
|
163
|
+
# rubocop:disable Layout/LineLength
|
164
|
+
"#{name} FINISHED. #{ppnum(count, 10)} total records in #{seconds_to_time_string(total_seconds_so_far)}. Overall #{total_rate_str} r/s."
|
165
|
+
# rubocop:enable Layout/LineLength
|
166
|
+
end
|
167
|
+
|
168
|
+
# @return [Float] rate of the last batch (in recs/second)
|
169
|
+
def batch_rate
|
170
|
+
return 0.0 if count.zero?
|
171
|
+
|
172
|
+
last_batch_size.to_f / last_batch_seconds
|
173
|
+
end
|
174
|
+
|
175
|
+
# @param [Integer] decimals Number of decimal places to the right of the
|
176
|
+
# decimal point
|
177
|
+
# @return [String] Rate-per-second in form XXX.YY
|
178
|
+
def batch_rate_str(decimals = 0)
|
179
|
+
ppnum(batch_rate, 0, decimals)
|
180
|
+
end
|
181
|
+
|
182
|
+
# @return [Float] total rate so far (in rec/second)
|
183
|
+
def total_rate
|
184
|
+
return 0.0 if @count.zero?
|
185
|
+
|
186
|
+
count / total_seconds_so_far
|
187
|
+
end
|
188
|
+
|
189
|
+
# @param [Integer] decimals Number of decimal places to the right of the
|
190
|
+
# decimal point
|
191
|
+
# @return [String] Rate-per-second in form XXX.YY
|
192
|
+
def total_rate_str(decimals = 0)
|
193
|
+
ppnum(total_rate, 0, decimals)
|
194
|
+
end
|
195
|
+
|
196
|
+
# Total seconds since the beginning of this milemarker
|
197
|
+
# @return [Float] seconds since the milemarker was created
|
198
|
+
def total_seconds_so_far
|
199
|
+
Time.now - start_time
|
200
|
+
end
|
201
|
+
|
202
|
+
# Total seconds since this batch started
|
203
|
+
# @return [Float] seconds since the beginning of this batch
|
204
|
+
def batch_seconds_so_far
|
205
|
+
Time.now - batch_start_time
|
206
|
+
end
|
207
|
+
|
208
|
+
# Set/reset all the internal state. Called by #on_batch when necessary;
|
209
|
+
# should probably not be called manually
|
210
|
+
def set_milemarker!
|
211
|
+
@batch_end_time = Time.now
|
212
|
+
@last_batch_size = @count - @prev_count
|
213
|
+
@last_batch_seconds = @batch_end_time - @batch_start_time
|
214
|
+
|
215
|
+
reset_for_next_batch!
|
216
|
+
end
|
217
|
+
|
218
|
+
# Reset the internal counters/timers at the end of a batch. Taken care of
|
219
|
+
# by #on_batch; should probably not be called manually.
|
220
|
+
def reset_for_next_batch!
|
221
|
+
@batch_start_time = batch_end_time
|
222
|
+
@prev_count = count
|
223
|
+
@batch_number = batch_divisor
|
224
|
+
end
|
225
|
+
|
226
|
+
# Log a line using the internal logger. Do nothing if no logger is configured.
|
227
|
+
# @param [String] msg The message to log
|
228
|
+
# @param [Symbol] level The level to log at
|
229
|
+
def log(msg, level: :info)
|
230
|
+
logger&.send(level, msg)
|
231
|
+
end
|
232
|
+
|
233
|
+
private
|
234
|
+
|
235
|
+
def batch_size_exceeded?
|
236
|
+
batch_divisor > @batch_number
|
237
|
+
end
|
238
|
+
|
239
|
+
def batch_divisor
|
240
|
+
count.div batch_size
|
241
|
+
end
|
242
|
+
|
243
|
+
def seconds_to_time_string(sec)
|
244
|
+
hours, leftover = sec.divmod(3600)
|
245
|
+
minutes, secs = leftover.divmod(60)
|
246
|
+
format("%02dh %02dm %02ds", hours, minutes, secs)
|
247
|
+
end
|
248
|
+
end
|
data/lib/ppnum.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# WHAT? Defining a global function? Yup.
|
4
|
+
#
|
5
|
+
# "Pretty print" a number into an underscore-delimited numeric string,
|
6
|
+
# right-space-padded out to the specified width (default 0 indicating
|
7
|
+
# "no padding") and with the specified number of digits to the right
|
8
|
+
# of the decimal point (default again 0, meaning no decimal point at all)
|
9
|
+
#
|
10
|
+
# Example: ppnum(10111) => "10_111"
|
11
|
+
# ppnum(1234.56) => 1_235
|
12
|
+
# ppnum(10111.3656, 10, 1) => " 10_111.4"
|
13
|
+
#
|
14
|
+
# No attempt is made to deal gracefully with numbers that overrun the
|
15
|
+
# specified width
|
16
|
+
# @param [Numeric] num the number to format
|
17
|
+
# @param [Integer] width The width to target
|
18
|
+
# @param [Integer] decimals Number of decimal places to show
|
19
|
+
# @return [String] The formatted number
|
20
|
+
def ppnum(num, width = 0, decimals = 0)
|
21
|
+
num = num.round(decimals)
|
22
|
+
dec_str = if decimals.zero?
|
23
|
+
""
|
24
|
+
else
|
25
|
+
".#{format("%.#{decimals}f", num).split(".").last}"
|
26
|
+
end
|
27
|
+
numstr = num.floor.to_s.reverse.split(/(...)/)
|
28
|
+
.reject(&:empty?)
|
29
|
+
.map(&:reverse)
|
30
|
+
.reverse
|
31
|
+
.join("_") + dec_str
|
32
|
+
if width.zero?
|
33
|
+
numstr
|
34
|
+
else
|
35
|
+
format "%#{width}s", numstr
|
36
|
+
end
|
37
|
+
end
|
data/milemarker.gemspec
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/milemarker/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "milemarker"
|
7
|
+
spec.version = Milemarker::VERSION
|
8
|
+
spec.authors = ["Bill Dueber"]
|
9
|
+
spec.email = ["bill@dueber.com"]
|
10
|
+
|
11
|
+
spec.summary = "Track and produce loglines for batch processing progress."
|
12
|
+
spec.homepage = "https://github.com/billdueber/milemarker"
|
13
|
+
spec.license = "MIT"
|
14
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
15
|
+
|
16
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
17
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
18
|
+
spec.metadata["changelog_uri"] = spec.homepage + '/CHANGELOG.md'
|
19
|
+
|
20
|
+
# Specify which files should be added to the gem when it is released.
|
21
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
22
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
23
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
24
|
+
end
|
25
|
+
spec.bindir = "exe"
|
26
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
27
|
+
spec.require_paths = ["lib"]
|
28
|
+
|
29
|
+
# Uncomment to register a new dependency of your gem
|
30
|
+
# spec.add_dependency "example-gem", "~> 1.0"
|
31
|
+
|
32
|
+
# For more information and examples about making a new gem, checkout our
|
33
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
34
|
+
#
|
35
|
+
|
36
|
+
spec.add_development_dependency 'bundler', '~>2.0'
|
37
|
+
spec.add_development_dependency 'pry'
|
38
|
+
spec.add_development_dependency 'rake', '~>13.0'
|
39
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
40
|
+
spec.add_development_dependency 'rubocop', '~> 1.7'
|
41
|
+
end
|
metadata
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: milemarker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Bill Dueber
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-11-29 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pry
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '13.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '13.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rubocop
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.7'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.7'
|
83
|
+
description:
|
84
|
+
email:
|
85
|
+
- bill@dueber.com
|
86
|
+
executables: []
|
87
|
+
extensions: []
|
88
|
+
extra_rdoc_files: []
|
89
|
+
files:
|
90
|
+
- ".github/workflows/main.yml"
|
91
|
+
- ".gitignore"
|
92
|
+
- ".rspec"
|
93
|
+
- ".rubocop.yml"
|
94
|
+
- CHANGELOG.md
|
95
|
+
- Gemfile
|
96
|
+
- LICENSE.txt
|
97
|
+
- README.md
|
98
|
+
- Rakefile
|
99
|
+
- bin/console
|
100
|
+
- bin/setup
|
101
|
+
- lib/milemarker.rb
|
102
|
+
- lib/milemarker/structured.rb
|
103
|
+
- lib/milemarker/version.rb
|
104
|
+
- lib/ppnum.rb
|
105
|
+
- milemarker.gemspec
|
106
|
+
homepage: https://github.com/billdueber/milemarker
|
107
|
+
licenses:
|
108
|
+
- MIT
|
109
|
+
metadata:
|
110
|
+
homepage_uri: https://github.com/billdueber/milemarker
|
111
|
+
source_code_uri: https://github.com/billdueber/milemarker
|
112
|
+
changelog_uri: https://github.com/billdueber/milemarker/CHANGELOG.md
|
113
|
+
post_install_message:
|
114
|
+
rdoc_options: []
|
115
|
+
require_paths:
|
116
|
+
- lib
|
117
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: 2.4.0
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
requirements: []
|
128
|
+
rubygems_version: 3.2.32
|
129
|
+
signing_key:
|
130
|
+
specification_version: 4
|
131
|
+
summary: Track and produce loglines for batch processing progress.
|
132
|
+
test_files: []
|