sqlitesweep 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +111 -0
- data/exe/sqlitesweep +5 -0
- data/lib/sqlitesweep/aggregator.rb +86 -0
- data/lib/sqlitesweep/cli.rb +89 -0
- data/lib/sqlitesweep/config.rb +58 -0
- data/lib/sqlitesweep/database_uri.rb +89 -0
- data/lib/sqlitesweep/display.rb +77 -0
- data/lib/sqlitesweep/errors.rb +19 -0
- data/lib/sqlitesweep/host_batcher.rb +127 -0
- data/lib/sqlitesweep/query/base.rb +27 -0
- data/lib/sqlitesweep/query/local.rb +32 -0
- data/lib/sqlitesweep/query/remote.rb +90 -0
- data/lib/sqlitesweep/result.rb +17 -0
- data/lib/sqlitesweep/result_file.rb +53 -0
- data/lib/sqlitesweep/runner.rb +93 -0
- data/lib/sqlitesweep/source_stream.rb +44 -0
- data/lib/sqlitesweep/ssh/connection_manager.rb +142 -0
- data/lib/sqlitesweep/version.rb +3 -0
- data/lib/sqlitesweep/worker_pool.rb +45 -0
- data/lib/sqlitesweep.rb +53 -0
- metadata +89 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 1873157a16c9f24d344824160bbcc81bfd02bff047a3d7f8aa293abf69a5784e
|
|
4
|
+
data.tar.gz: 105e3556be154e3c16d329ce14d78a7cb5cd890a9bbc7771447ce014ee2a91e0
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 2128e4c7ab40f54c45873d5c91472e5807a2011818fa4f9946819015e3931ecaacc53d8458420ab24843b05e8b4bb9b261f1ea1b6bd58265a767a1e3c9f66059
|
|
7
|
+
data.tar.gz: 62bb09f2f215fcfb6865557768c2779adddc53265470fbe8bccb7636676be1956fb664300aea63b0daccde7e558ecc37aed68c918dbe277cd38ea7c916f1f9a1
|
data/README.md
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# SQLiteSweep
|
|
2
|
+
|
|
3
|
+
Query millions of SQLite databases across remote hosts via SSH, aggregating results in real-time. Designed for multi-tenant apps where each tenant has their own SQLite database.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
gem install sqlitesweep
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Or add to your Gemfile:
|
|
12
|
+
|
|
13
|
+
```ruby
|
|
14
|
+
gem "sqlitesweep"
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
sqlitesweep \
|
|
21
|
+
-q 'SELECT count(*) FROM products WHERE price > 0' \
|
|
22
|
+
-a sum \
|
|
23
|
+
-s 'rails runner "Account.active.find_each { |a| puts a.db_uri }"' \
|
|
24
|
+
-c 16
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
The `-s` (source) command should output one database URI per line. URIs can be:
|
|
28
|
+
|
|
29
|
+
- Local paths: `/data/tenants/acme.sqlite3`
|
|
30
|
+
- File URIs: `file:///data/tenants/acme.sqlite3`
|
|
31
|
+
- SSH URIs: `ssh://deploy@web1.example.com/data/tenants/acme.sqlite3`
|
|
32
|
+
|
|
33
|
+
Results go to stdout, progress goes to stderr — so it's pipe-friendly:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
sqlitesweep -q "SELECT count(*) FROM users" -a sum -s "cat db_uris.txt" > result.txt
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Actions
|
|
40
|
+
|
|
41
|
+
| Action | Description |
|
|
42
|
+
|--------|-------------|
|
|
43
|
+
| `sum` | Sum the first column across all databases (default) |
|
|
44
|
+
| `average` / `avg` | Average the first column across all databases |
|
|
45
|
+
| `list` | Write all rows to a JSONL file, print the file path |
|
|
46
|
+
|
|
47
|
+
### Options
|
|
48
|
+
|
|
49
|
+
| Flag | Long | Default | Description |
|
|
50
|
+
|------|------|---------|-------------|
|
|
51
|
+
| `-q` | `--query` | *(required)* | SQL query to execute on each database |
|
|
52
|
+
| `-a` | `--action` | `sum` | `sum`, `average`/`avg`, or `list` |
|
|
53
|
+
| `-s` | `--source` | *(required)* | Shell command that outputs database URIs |
|
|
54
|
+
| `-c` | `--concurrency` | `8` | Max parallel query workers |
|
|
55
|
+
| | `--max-ssh` | `50` | Max SSH master connections |
|
|
56
|
+
| | `--no-live` | `false` | Disable live progress display |
|
|
57
|
+
| | `--batch-size` | `4` | Databases to query per SSH call |
|
|
58
|
+
| | `--ssh-timeout` | `10` | SSH connect timeout (seconds) |
|
|
59
|
+
| | `--query-timeout` | `30` | Per-query timeout (seconds) |
|
|
60
|
+
|
|
61
|
+
## How it works
|
|
62
|
+
|
|
63
|
+
- **Local databases** are queried directly via the `sqlite3` gem
|
|
64
|
+
- **Remote databases** are queried by shelling out to `ssh` + `sqlite3` on the remote host
|
|
65
|
+
- SSH connections use `ControlMaster` multiplexing — one master connection per host, shared across queries
|
|
66
|
+
- Multiple databases on the same host are batched into a single SSH call (configurable via `--batch-size`)
|
|
67
|
+
- A thread pool (via `concurrent-ruby`) runs queries in parallel with back-pressure
|
|
68
|
+
|
|
69
|
+
### Requirements
|
|
70
|
+
|
|
71
|
+
- Ruby >= 4.0
|
|
72
|
+
- `sqlite3` available on remote hosts (for SSH queries)
|
|
73
|
+
- SSH agent or key-based auth configured (BatchMode is enforced — no password prompts)
|
|
74
|
+
|
|
75
|
+
## Development
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
bundle install
|
|
79
|
+
bundle exec rake test
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Run the integration test:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
bundle exec ruby test/integration/test_local_sweep.rb
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Watch the live progress display with a slow-drip demo:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
ruby test/integration/harness/live_demo.rb
|
|
92
|
+
ruby test/integration/harness/live_demo.rb --count 50 --delay 0.5
|
|
93
|
+
ruby test/integration/harness/live_demo.rb --action list
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Run the benchmark:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
ruby test/integration/harness/sweep_bench.rb --db-count 1000
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Docker-based SSH testing
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
docker compose -f test/integration/harness/docker-compose.yml up -d
|
|
106
|
+
ruby test/integration/harness/sweep_bench.rb --docker --db-count 500
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## License
|
|
110
|
+
|
|
111
|
+
MIT
|
data/exe/sqlitesweep
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
module SQLiteSweep
|
|
2
|
+
# Thread-safe accumulator for query results. Supports three modes:
|
|
3
|
+
#
|
|
4
|
+
# :sum - Running total of the first column value from each database.
|
|
5
|
+
# :average - Running total divided by the number of databases queried.
|
|
6
|
+
# :list - Delegates to a ResultFile that streams rows to a JSONL file.
|
|
7
|
+
#
|
|
8
|
+
# For :sum and :average, the first value of the first row from each query
|
|
9
|
+
# result is extracted and added to a running total. This means your query
|
|
10
|
+
# should return a single numeric value (e.g. SELECT count(*) FROM ...).
|
|
11
|
+
#
|
|
12
|
+
# All methods are mutex-protected and safe to call from multiple worker threads.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# agg = Aggregator.new(:sum)
|
|
16
|
+
# agg.add(Result.new(rows: [{"count" => 10}], source: "db1"))
|
|
17
|
+
# agg.add(Result.new(rows: [{"count" => 20}], source: "db2"))
|
|
18
|
+
# agg.value # => "30"
|
|
19
|
+
# agg.count # => 2
|
|
20
|
+
#
|
|
21
|
+
class Aggregator
|
|
22
|
+
# @return [Integer] Number of databases successfully queried.
|
|
23
|
+
attr_reader :count
|
|
24
|
+
|
|
25
|
+
# @return [Integer] Number of databases that produced errors.
|
|
26
|
+
attr_reader :error_count
|
|
27
|
+
|
|
28
|
+
# @param action [Symbol] One of :sum, :average, or :list.
|
|
29
|
+
# @param result_file [ResultFile, nil] Required for :list action. Receives
|
|
30
|
+
# streamed results so they don't accumulate in memory.
|
|
31
|
+
def initialize(action, result_file: nil)
|
|
32
|
+
@action = action
|
|
33
|
+
@result_file = result_file
|
|
34
|
+
@mutex = Mutex.new
|
|
35
|
+
@total = 0.0
|
|
36
|
+
@count = 0
|
|
37
|
+
@error_count = 0
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Records a successful query result.
|
|
41
|
+
#
|
|
42
|
+
# @param result [Result] The query result to aggregate.
|
|
43
|
+
def add(result)
|
|
44
|
+
@mutex.synchronize do
|
|
45
|
+
case @action
|
|
46
|
+
when :sum, :average
|
|
47
|
+
result.rows.each do |row|
|
|
48
|
+
value = row.values.first
|
|
49
|
+
@total += value.to_f
|
|
50
|
+
end
|
|
51
|
+
when :list
|
|
52
|
+
@result_file&.write(result)
|
|
53
|
+
end
|
|
54
|
+
@count += 1
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Records a failed query (increments error count).
|
|
59
|
+
def record_error
|
|
60
|
+
@mutex.synchronize { @error_count += 1 }
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Returns the final aggregated value as a string.
|
|
64
|
+
#
|
|
65
|
+
# @return [String] For :sum, the total. For :average, the mean.
|
|
66
|
+
# For :list, the path to the JSONL result file.
|
|
67
|
+
def value
|
|
68
|
+
case @action
|
|
69
|
+
when :sum
|
|
70
|
+
format_number(@total)
|
|
71
|
+
when :average
|
|
72
|
+
@count > 0 ? format_number(@total / @count) : "0"
|
|
73
|
+
when :list
|
|
74
|
+
@result_file&.path
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
# Formats a number as a clean string: integers without decimal point,
|
|
81
|
+
# floats with their natural precision.
|
|
82
|
+
def format_number(n)
|
|
83
|
+
n == n.to_i ? n.to_i.to_s : n.to_s
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
require "optparse"
|
|
2
|
+
|
|
3
|
+
module SQLiteSweep
|
|
4
|
+
# Command-line interface. Parses ARGV into a Config and launches the Runner.
|
|
5
|
+
#
|
|
6
|
+
# Designed for pipe-friendly usage: results go to stdout, progress/errors
|
|
7
|
+
# go to stderr. This means you can do:
|
|
8
|
+
#
|
|
9
|
+
# sqlitesweep -q "SELECT count(*) FROM users" -a sum -s "cat uris.txt" > result.txt
|
|
10
|
+
#
|
|
11
|
+
class CLI
|
|
12
|
+
# Parses command-line arguments and runs the sweep.
|
|
13
|
+
#
|
|
14
|
+
# @param argv [Array<String>] Command-line arguments (typically ARGV).
|
|
15
|
+
# @return [void]
|
|
16
|
+
def self.run(argv)
|
|
17
|
+
options = {}
|
|
18
|
+
parser = OptionParser.new do |opts|
|
|
19
|
+
opts.banner = "Usage: sqlitesweep [options]"
|
|
20
|
+
|
|
21
|
+
opts.on("-q", "--query QUERY", "SQL query to execute on each database (required)") do |v|
|
|
22
|
+
options[:query] = v
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
opts.on("-a", "--action ACTION", "sum, average/avg, or list (default: sum)") do |v|
|
|
26
|
+
options[:action] = v
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
opts.on("-s", "--source COMMAND", "Shell command that outputs database URIs (required)") do |v|
|
|
30
|
+
options[:source] = v
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
opts.on("-c", "--concurrency N", Integer, "Max parallel query workers (default: 8)") do |v|
|
|
34
|
+
options[:concurrency] = v
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
opts.on("--max-ssh N", Integer, "Max SSH master connections (default: 50)") do |v|
|
|
38
|
+
options[:max_ssh] = v
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
opts.on("--no-live", "Disable live progress display") do
|
|
42
|
+
options[:live] = false
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
opts.on("--batch-size N", Integer, "Databases to query per SSH call (default: 4)") do |v|
|
|
46
|
+
options[:batch_size] = v
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
opts.on("--ssh-timeout N", Integer, "SSH connect timeout in seconds (default: 10)") do |v|
|
|
50
|
+
options[:ssh_timeout] = v
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
opts.on("--query-timeout N", Integer, "Per-query timeout in seconds (default: 30)") do |v|
|
|
54
|
+
options[:query_timeout] = v
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
opts.on("-h", "--help", "Show this help") do
|
|
58
|
+
$stderr.puts opts
|
|
59
|
+
exit 0
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
opts.on("--version", "Show version") do
|
|
63
|
+
puts "sqlitesweep #{VERSION}"
|
|
64
|
+
exit 0
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
parser.parse!(argv)
|
|
69
|
+
|
|
70
|
+
unless options[:query]
|
|
71
|
+
$stderr.puts "Error: --query is required"
|
|
72
|
+
$stderr.puts parser
|
|
73
|
+
exit 1
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
unless options[:source]
|
|
77
|
+
$stderr.puts "Error: --source is required"
|
|
78
|
+
$stderr.puts parser
|
|
79
|
+
exit 1
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
config = Config.new(**options)
|
|
83
|
+
Runner.new(config).run
|
|
84
|
+
rescue ConfigError => e
|
|
85
|
+
$stderr.puts "Error: #{e.message}"
|
|
86
|
+
exit 1
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
module SQLiteSweep
|
|
2
|
+
# Immutable configuration for a sweep run. Built from CLI flags and passed
|
|
3
|
+
# to every component.
|
|
4
|
+
#
|
|
5
|
+
# Uses Data.define for a frozen value object — once created, a Config
|
|
6
|
+
# cannot be modified.
|
|
7
|
+
#
|
|
8
|
+
# @example
|
|
9
|
+
# config = Config.new(
|
|
10
|
+
# query: "SELECT count(*) FROM users",
|
|
11
|
+
# source: "cat uris.txt",
|
|
12
|
+
# action: :sum,
|
|
13
|
+
# concurrency: 16
|
|
14
|
+
# )
|
|
15
|
+
# config.action # => :sum
|
|
16
|
+
# config.concurrency # => 16
|
|
17
|
+
#
|
|
18
|
+
Config = Data.define(
|
|
19
|
+
:query, # SQL query to run on every database
|
|
20
|
+
:action, # :sum, :average, or :list
|
|
21
|
+
:source, # Shell command that outputs database URIs (one per line)
|
|
22
|
+
:concurrency, # Max worker threads in the pool
|
|
23
|
+
:max_ssh, # Max simultaneous SSH ControlMaster connections
|
|
24
|
+
:live, # Whether to show live ANSI progress on stderr
|
|
25
|
+
:batch_size, # Number of databases to query per SSH call
|
|
26
|
+
:ssh_timeout, # SSH connect timeout in seconds
|
|
27
|
+
:query_timeout # Per-query (or per-batch) timeout in seconds
|
|
28
|
+
) do
|
|
29
|
+
def initialize(
|
|
30
|
+
query:,
|
|
31
|
+
source:,
|
|
32
|
+
action: :sum,
|
|
33
|
+
concurrency: 8,
|
|
34
|
+
max_ssh: 50,
|
|
35
|
+
live: $stderr.tty?,
|
|
36
|
+
batch_size: 4,
|
|
37
|
+
ssh_timeout: 10,
|
|
38
|
+
query_timeout: 30
|
|
39
|
+
)
|
|
40
|
+
action = action.to_sym
|
|
41
|
+
action = :average if action == :avg
|
|
42
|
+
unless %i[sum average list].include?(action)
|
|
43
|
+
raise ConfigError, "Unknown action: #{action}. Must be sum, average/avg, or list"
|
|
44
|
+
end
|
|
45
|
+
super(
|
|
46
|
+
query: query,
|
|
47
|
+
action: action,
|
|
48
|
+
source: source,
|
|
49
|
+
concurrency: concurrency,
|
|
50
|
+
max_ssh: max_ssh,
|
|
51
|
+
live: live,
|
|
52
|
+
batch_size: batch_size,
|
|
53
|
+
ssh_timeout: ssh_timeout,
|
|
54
|
+
query_timeout: query_timeout
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
require "uri"
|
|
2
|
+
|
|
3
|
+
module SQLiteSweep
|
|
4
|
+
# Parses and represents a database location. Supports three URI formats:
|
|
5
|
+
#
|
|
6
|
+
# - Local path: /data/tenants/acme.sqlite3
|
|
7
|
+
# - File URI: file:///data/tenants/acme.sqlite3
|
|
8
|
+
# - SSH URI: ssh://deploy@web1.example.com/data/tenants/acme.sqlite3
|
|
9
|
+
#
|
|
10
|
+
# For SSH URIs, the user portion is optional. The host and path are extracted
|
|
11
|
+
# and used to build SSH commands.
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# uri = DatabaseURI.new("ssh://deploy@web1/data/db.sqlite3")
|
|
15
|
+
# uri.remote? # => true
|
|
16
|
+
# uri.ssh_destination # => "deploy@web1"
|
|
17
|
+
# uri.path # => "/data/db.sqlite3"
|
|
18
|
+
#
|
|
19
|
+
# uri = DatabaseURI.new("/tmp/local.sqlite3")
|
|
20
|
+
# uri.local? # => true
|
|
21
|
+
# uri.path # => "/tmp/local.sqlite3"
|
|
22
|
+
#
|
|
23
|
+
class DatabaseURI
|
|
24
|
+
attr_reader :user, :host, :path
|
|
25
|
+
|
|
26
|
+
def initialize(uri_string)
|
|
27
|
+
uri_string = uri_string.strip
|
|
28
|
+
if uri_string.start_with?("ssh://")
|
|
29
|
+
parsed = URI.parse(uri_string)
|
|
30
|
+
@user = parsed.user
|
|
31
|
+
@host = parsed.host
|
|
32
|
+
@path = parsed.path
|
|
33
|
+
raise ConfigError, "SSH URI missing host: #{uri_string}" if @host.nil? || @host.empty?
|
|
34
|
+
raise ConfigError, "SSH URI missing path: #{uri_string}" if @path.nil? || @path.empty?
|
|
35
|
+
elsif uri_string.start_with?("file://")
|
|
36
|
+
parsed = URI.parse(uri_string)
|
|
37
|
+
@user = nil
|
|
38
|
+
@host = nil
|
|
39
|
+
@path = parsed.path
|
|
40
|
+
else
|
|
41
|
+
@user = nil
|
|
42
|
+
@host = nil
|
|
43
|
+
@path = uri_string
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Returns true if this database is on a remote host (accessed via SSH).
|
|
48
|
+
def remote?
|
|
49
|
+
!@host.nil?
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Returns true if this database is on the local filesystem.
|
|
53
|
+
def local?
|
|
54
|
+
@host.nil?
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Returns the SSH destination string (e.g. "deploy@web1" or "web1").
|
|
58
|
+
# Used as the target for ssh commands. Returns nil for local URIs.
|
|
59
|
+
def ssh_destination
|
|
60
|
+
return nil unless remote?
|
|
61
|
+
@user ? "#{@user}@#{@host}" : @host
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Returns a key that uniquely identifies the remote host (user@host).
|
|
65
|
+
# Used by HostBatcher to group databases on the same host into batches.
|
|
66
|
+
# Returns nil for local URIs.
|
|
67
|
+
def host_key
|
|
68
|
+
ssh_destination
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def to_s
|
|
72
|
+
if remote?
|
|
73
|
+
"ssh://#{ssh_destination}#{@path}"
|
|
74
|
+
else
|
|
75
|
+
@path
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def ==(other)
|
|
80
|
+
other.is_a?(DatabaseURI) && @user == other.user && @host == other.host && @path == other.path
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def hash
|
|
84
|
+
[@user, @host, @path].hash
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
alias_method :eql?, :==
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
module SQLiteSweep
|
|
2
|
+
# Live progress display on stderr. Shows a single-line status that updates
|
|
3
|
+
# in-place using ANSI escape codes (\r to return to line start, \e[2K to
|
|
4
|
+
# clear the line).
|
|
5
|
+
#
|
|
6
|
+
# Output format:
|
|
7
|
+
# Queried: 14523 | Errors: 3 | Rate: 847/s | Elapsed: 17.1s | Result: 2847291
|
|
8
|
+
#
|
|
9
|
+
# When live mode is disabled (--no-live or non-TTY stderr), all rendering
|
|
10
|
+
# is silently skipped. This keeps piped output clean.
|
|
11
|
+
#
|
|
12
|
+
# The display auto-refreshes every 250ms via a background timer thread,
|
|
13
|
+
# and can also be manually refreshed after each query completes.
|
|
14
|
+
#
|
|
15
|
+
class Display
|
|
16
|
+
# @param aggregator [Aggregator] Source of count/error/value data.
|
|
17
|
+
# @param live [Boolean] Whether to render live output.
|
|
18
|
+
def initialize(aggregator, live: true)
|
|
19
|
+
@aggregator = aggregator
|
|
20
|
+
@live = live
|
|
21
|
+
@start_time = nil
|
|
22
|
+
@mutex = Mutex.new
|
|
23
|
+
@timer = nil
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Starts the display timer. Must be called before any refresh calls.
|
|
27
|
+
#
|
|
28
|
+
# @param start_time [Float] Monotonic clock timestamp (from Process.clock_gettime).
|
|
29
|
+
def start(start_time)
|
|
30
|
+
@start_time = start_time
|
|
31
|
+
return unless @live
|
|
32
|
+
|
|
33
|
+
@timer = Thread.new do
|
|
34
|
+
loop do
|
|
35
|
+
sleep 0.25
|
|
36
|
+
refresh
|
|
37
|
+
end
|
|
38
|
+
rescue
|
|
39
|
+
# timer thread exits silently on kill
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Manually triggers a display refresh. Called by workers after each
|
|
44
|
+
# query completes to keep the display responsive.
|
|
45
|
+
def refresh
|
|
46
|
+
return unless @live
|
|
47
|
+
@mutex.synchronize { render }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Stops the timer, renders one final update, and prints a newline
|
|
51
|
+
# so subsequent output starts on a fresh line.
|
|
52
|
+
def finish
|
|
53
|
+
@timer&.kill
|
|
54
|
+
@timer&.join(1)
|
|
55
|
+
if @live
|
|
56
|
+
@mutex.synchronize do
|
|
57
|
+
render
|
|
58
|
+
$stderr.write("\n")
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def render
|
|
66
|
+
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - @start_time
|
|
67
|
+
count = @aggregator.count
|
|
68
|
+
errors = @aggregator.error_count
|
|
69
|
+
rate = elapsed > 0 ? (count / elapsed).round(0).to_i : 0
|
|
70
|
+
|
|
71
|
+
value_display = @aggregator.value
|
|
72
|
+
|
|
73
|
+
line = " Queried: #{count} | Errors: #{errors} | Rate: #{rate}/s | Elapsed: #{elapsed.round(1)}s | Result: #{value_display}"
|
|
74
|
+
$stderr.write("\r\e[2K#{line}")
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module SQLiteSweep
|
|
2
|
+
# Base error class for all SQLiteSweep errors.
|
|
3
|
+
class Error < StandardError; end
|
|
4
|
+
|
|
5
|
+
# Raised when a SQL query fails against a database (local or remote).
|
|
6
|
+
class QueryError < Error; end
|
|
7
|
+
|
|
8
|
+
# Raised when the source command (the -s flag) fails or exits non-zero.
|
|
9
|
+
class SourceError < Error; end
|
|
10
|
+
|
|
11
|
+
# Raised when an SSH connection or master setup fails.
|
|
12
|
+
class SSHError < Error; end
|
|
13
|
+
|
|
14
|
+
# Raised for invalid configuration (bad action name, missing fields, etc.).
|
|
15
|
+
class ConfigError < Error; end
|
|
16
|
+
|
|
17
|
+
# Raised when an SSH command or query exceeds its timeout.
|
|
18
|
+
class TimeoutError < Error; end
|
|
19
|
+
end
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
module SQLiteSweep
|
|
2
|
+
# Groups remote database URIs by host and flushes them as batches.
|
|
3
|
+
#
|
|
4
|
+
# Instead of making one SSH round-trip per database, HostBatcher accumulates
|
|
5
|
+
# URIs for the same host and submits them as a single batch to the worker pool.
|
|
6
|
+
# Each batch becomes one SSH command that queries multiple databases sequentially
|
|
7
|
+
# on the remote host.
|
|
8
|
+
#
|
|
9
|
+
# Batches are flushed when either:
|
|
10
|
+
# - The batch reaches --batch-size (default 4)
|
|
11
|
+
# - A timeout fires (200ms) to avoid stalling on slow source streams
|
|
12
|
+
#
|
|
13
|
+
# Local URIs should NOT be sent here — they bypass batching entirely and go
|
|
14
|
+
# straight to the worker pool (see Runner).
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# batcher = HostBatcher.new(config, pool, aggregator, display, ssh_manager)
|
|
18
|
+
# batcher.add(uri) # accumulates by host
|
|
19
|
+
# batcher.flush_all # drains any remaining partial batches
|
|
20
|
+
#
|
|
21
|
+
class HostBatcher
|
|
22
|
+
# How long to wait before flushing an incomplete batch (seconds).
|
|
23
|
+
# Prevents URIs from sitting in the buffer when the source stream is slow.
|
|
24
|
+
FLUSH_TIMEOUT = 0.2
|
|
25
|
+
|
|
26
|
+
def initialize(config, pool, aggregator, display, ssh_manager)
|
|
27
|
+
@config = config
|
|
28
|
+
@pool = pool
|
|
29
|
+
@aggregator = aggregator
|
|
30
|
+
@display = display
|
|
31
|
+
@ssh_manager = ssh_manager
|
|
32
|
+
@batch_size = config.batch_size
|
|
33
|
+
@mutex = Mutex.new
|
|
34
|
+
@batches = {} # host_key => [uri, ...]
|
|
35
|
+
@timers = {} # host_key => Thread
|
|
36
|
+
@cancelled = false
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Adds a remote URI to the appropriate host batch.
|
|
40
|
+
# May trigger an immediate flush if the batch is full.
|
|
41
|
+
#
|
|
42
|
+
# @param uri [DatabaseURI] A remote database URI.
|
|
43
|
+
def add(uri)
|
|
44
|
+
return if @cancelled
|
|
45
|
+
|
|
46
|
+
host_key = uri.host_key
|
|
47
|
+
flush_batch = nil
|
|
48
|
+
|
|
49
|
+
@mutex.synchronize do
|
|
50
|
+
@batches[host_key] ||= []
|
|
51
|
+
@batches[host_key] << uri
|
|
52
|
+
|
|
53
|
+
if @batches[host_key].length >= @batch_size
|
|
54
|
+
flush_batch = @batches.delete(host_key)
|
|
55
|
+
cancel_timer(host_key)
|
|
56
|
+
else
|
|
57
|
+
start_timer(host_key) unless @timers[host_key]
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
submit_batch(flush_batch) if flush_batch
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Flushes all pending batches regardless of size. Called at the end of a
|
|
65
|
+
# sweep to drain any partial batches that haven't reached batch_size.
|
|
66
|
+
def flush_all
|
|
67
|
+
batches_to_flush = nil
|
|
68
|
+
@mutex.synchronize do
|
|
69
|
+
@timers.each_value(&:kill)
|
|
70
|
+
@timers.clear
|
|
71
|
+
batches_to_flush = @batches.dup
|
|
72
|
+
@batches.clear
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
batches_to_flush.each_value { |batch| submit_batch(batch) }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Cancels all pending batches and timers. Used during shutdown.
|
|
79
|
+
def cancel
|
|
80
|
+
@mutex.synchronize do
|
|
81
|
+
@cancelled = true
|
|
82
|
+
@timers.each_value(&:kill)
|
|
83
|
+
@timers.clear
|
|
84
|
+
@batches.clear
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
# Starts a timeout timer for a host. If the batch hasn't been flushed
|
|
91
|
+
# by the time the timer fires, it flushes whatever is accumulated.
|
|
92
|
+
def start_timer(host_key)
|
|
93
|
+
@timers[host_key] = Thread.new do
|
|
94
|
+
sleep FLUSH_TIMEOUT
|
|
95
|
+
batch = @mutex.synchronize do
|
|
96
|
+
@timers.delete(host_key)
|
|
97
|
+
@batches.delete(host_key)
|
|
98
|
+
end
|
|
99
|
+
submit_batch(batch) if batch
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def cancel_timer(host_key)
|
|
104
|
+
timer = @timers.delete(host_key)
|
|
105
|
+
timer&.kill
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Submits a batch of URIs (all for the same host) to the worker pool.
|
|
109
|
+
# The worker creates a Query::Remote and executes all queries in a
|
|
110
|
+
# single SSH command.
|
|
111
|
+
def submit_batch(uris)
|
|
112
|
+
return if uris.nil? || uris.empty?
|
|
113
|
+
|
|
114
|
+
@pool.submit do
|
|
115
|
+
remote_query = Query::Remote.new(@config, @ssh_manager)
|
|
116
|
+
begin
|
|
117
|
+
results = remote_query.execute_batch(uris)
|
|
118
|
+
results.each { |result| @aggregator.add(result) }
|
|
119
|
+
rescue QueryError => e
|
|
120
|
+
uris.length.times { @aggregator.record_error }
|
|
121
|
+
$stderr.puts "\n#{e.message}" unless @config.live
|
|
122
|
+
end
|
|
123
|
+
@display.refresh
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module SQLiteSweep
|
|
2
|
+
module Query
|
|
3
|
+
# Abstract base class for query executors.
|
|
4
|
+
#
|
|
5
|
+
# Subclasses must implement #execute(uri) which runs the configured SQL
|
|
6
|
+
# query against the given database and returns a Result.
|
|
7
|
+
#
|
|
8
|
+
# Two implementations:
|
|
9
|
+
# - Query::Local — uses the sqlite3 gem for direct file access
|
|
10
|
+
# - Query::Remote — shells out via SSH to run sqlite3 on a remote host
|
|
11
|
+
#
|
|
12
|
+
class Base
|
|
13
|
+
def initialize(config)
|
|
14
|
+
@config = config
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Executes the configured query against the given database.
|
|
18
|
+
#
|
|
19
|
+
# @param uri [DatabaseURI] The database to query.
|
|
20
|
+
# @return [Result] The query result.
|
|
21
|
+
# @raise [QueryError] If the query fails.
|
|
22
|
+
def execute(uri)
|
|
23
|
+
raise NotImplementedError
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|