sqlitesweep 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +111 -0
- data/exe/sqlitesweep +5 -0
- data/lib/sqlitesweep/aggregator.rb +86 -0
- data/lib/sqlitesweep/cli.rb +89 -0
- data/lib/sqlitesweep/config.rb +58 -0
- data/lib/sqlitesweep/database_uri.rb +89 -0
- data/lib/sqlitesweep/display.rb +77 -0
- data/lib/sqlitesweep/errors.rb +19 -0
- data/lib/sqlitesweep/host_batcher.rb +127 -0
- data/lib/sqlitesweep/query/base.rb +27 -0
- data/lib/sqlitesweep/query/local.rb +32 -0
- data/lib/sqlitesweep/query/remote.rb +90 -0
- data/lib/sqlitesweep/result.rb +17 -0
- data/lib/sqlitesweep/result_file.rb +53 -0
- data/lib/sqlitesweep/runner.rb +93 -0
- data/lib/sqlitesweep/source_stream.rb +44 -0
- data/lib/sqlitesweep/ssh/connection_manager.rb +142 -0
- data/lib/sqlitesweep/version.rb +3 -0
- data/lib/sqlitesweep/worker_pool.rb +45 -0
- data/lib/sqlitesweep.rb +53 -0
- metadata +89 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require "sqlite3"
|
|
2
|
+
|
|
3
|
+
module SQLiteSweep
|
|
4
|
+
module Query
|
|
5
|
+
# Executes SQL queries against local SQLite databases using the sqlite3 gem.
|
|
6
|
+
#
|
|
7
|
+
# Opens each database in readonly mode, executes the query, and returns
|
|
8
|
+
# results as an array of hashes. The database connection is always closed
|
|
9
|
+
# after the query, even if an error occurs.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# local = Query::Local.new(config)
|
|
13
|
+
# result = local.execute(DatabaseURI.new("/tmp/test.sqlite3"))
|
|
14
|
+
# result.rows # => [{"count(*)" => 42}]
|
|
15
|
+
#
|
|
16
|
+
class Local < Base
|
|
17
|
+
# @param uri [DatabaseURI] A local database URI.
|
|
18
|
+
# @return [Result] The query result.
|
|
19
|
+
# @raise [QueryError] If the database can't be opened or the query fails.
|
|
20
|
+
def execute(uri)
|
|
21
|
+
db = SQLite3::Database.new(uri.path, readonly: true)
|
|
22
|
+
db.results_as_hash = true
|
|
23
|
+
rows = db.execute(@config.query)
|
|
24
|
+
Result.new(rows: rows, source: uri)
|
|
25
|
+
rescue SQLite3::Exception => e
|
|
26
|
+
raise QueryError, "Local query failed on #{uri}: #{e.message}"
|
|
27
|
+
ensure
|
|
28
|
+
db&.close
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
require "json"
|
|
2
|
+
require "shellwords"
|
|
3
|
+
|
|
4
|
+
module SQLiteSweep
|
|
5
|
+
module Query
|
|
6
|
+
# Executes batched SQL queries on remote hosts via SSH.
|
|
7
|
+
#
|
|
8
|
+
# Instead of one SSH round-trip per database, this runs a single SSH command
|
|
9
|
+
# that queries multiple databases sequentially on the remote host. The remote
|
|
10
|
+
# command iterates over database paths, running sqlite3 -json on each one,
|
|
11
|
+
# and outputs tab-delimited lines:
|
|
12
|
+
#
|
|
13
|
+
# /path/to/db1.sqlite3\t[{"count(*)":42}]
|
|
14
|
+
# /path/to/db2.sqlite3\t[{"count(*)":17}]
|
|
15
|
+
#
|
|
16
|
+
# The output is parsed back into individual Result objects, one per database.
|
|
17
|
+
#
|
|
18
|
+
# @example
|
|
19
|
+
# remote = Query::Remote.new(config, ssh_manager)
|
|
20
|
+
# results = remote.execute_batch([uri1, uri2, uri3])
|
|
21
|
+
# results.each { |r| puts "#{r.source}: #{r.rows}" }
|
|
22
|
+
#
|
|
23
|
+
class Remote < Base
|
|
24
|
+
def initialize(config, ssh_manager)
|
|
25
|
+
super(config)
|
|
26
|
+
@ssh_manager = ssh_manager
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Queries multiple databases on the same remote host in a single SSH call.
|
|
30
|
+
#
|
|
31
|
+
# @param uris [Array<DatabaseURI>] URIs for databases on the same host.
|
|
32
|
+
# All URIs must share the same host_key.
|
|
33
|
+
# @return [Array<Result>] One Result per successfully queried database.
|
|
34
|
+
# @raise [QueryError] If the SSH command fails or JSON parsing fails.
|
|
35
|
+
def execute_batch(uris)
|
|
36
|
+
return [] if uris.empty?
|
|
37
|
+
|
|
38
|
+
host_key = uris.first.host_key
|
|
39
|
+
paths = uris.map(&:path)
|
|
40
|
+
|
|
41
|
+
# Escape single quotes in the SQL for safe embedding in the shell command
|
|
42
|
+
sql = @config.query.gsub("'", "'\\''")
|
|
43
|
+
|
|
44
|
+
# Build a remote script that queries each database and outputs
|
|
45
|
+
# tab-delimited results: <db_path>\t<json_result>
|
|
46
|
+
remote_script = paths.map { |path|
|
|
47
|
+
escaped_path = Shellwords.shellescape(path)
|
|
48
|
+
"printf '%s\\t' #{escaped_path}; sqlite3 -json #{escaped_path} '#{sql}'; echo"
|
|
49
|
+
}.join("; ")
|
|
50
|
+
|
|
51
|
+
output = @ssh_manager.run(host_key, remote_script)
|
|
52
|
+
parse_batch_output(output, uris)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
# Parses the tab-delimited batch output back into individual Results.
|
|
58
|
+
#
|
|
59
|
+
# Each line is expected to be: <db_path>\t<json_array>
|
|
60
|
+
# Lines that don't match this format are silently skipped.
|
|
61
|
+
def parse_batch_output(output, uris)
|
|
62
|
+
results = []
|
|
63
|
+
uri_by_path = uris.each_with_object({}) { |u, h| h[u.path] = u }
|
|
64
|
+
|
|
65
|
+
output.each_line do |line|
|
|
66
|
+
line = line.strip
|
|
67
|
+
next if line.empty?
|
|
68
|
+
|
|
69
|
+
tab_idx = line.index("\t")
|
|
70
|
+
next unless tab_idx
|
|
71
|
+
|
|
72
|
+
db_path = line[0...tab_idx]
|
|
73
|
+
json_str = line[(tab_idx + 1)..]
|
|
74
|
+
|
|
75
|
+
uri = uri_by_path[db_path]
|
|
76
|
+
next unless uri
|
|
77
|
+
|
|
78
|
+
begin
|
|
79
|
+
rows = json_str.empty? ? [] : JSON.parse(json_str)
|
|
80
|
+
results << Result.new(rows: rows, source: uri)
|
|
81
|
+
rescue JSON::ParserError => e
|
|
82
|
+
raise QueryError, "Failed to parse JSON from #{db_path}: #{e.message}"
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
results
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module SQLiteSweep
|
|
2
|
+
# Represents the result of querying a single database.
|
|
3
|
+
#
|
|
4
|
+
# @!attribute [r] rows
|
|
5
|
+
# @return [Array<Hash>] Array of row hashes returned by the SQL query.
|
|
6
|
+
# For example, [{"count(*)" => 42}].
|
|
7
|
+
#
|
|
8
|
+
# @!attribute [r] source
|
|
9
|
+
# @return [String] The URI string of the database that produced this result.
|
|
10
|
+
# Used to annotate list output with _source provenance.
|
|
11
|
+
#
|
|
12
|
+
Result = Data.define(:rows, :source) do
|
|
13
|
+
def initialize(rows:, source:)
|
|
14
|
+
super(rows: rows, source: source.to_s)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
require "json"
|
|
2
|
+
require "tempfile"
|
|
3
|
+
|
|
4
|
+
module SQLiteSweep
|
|
5
|
+
# Thread-safe JSONL file writer for the :list action.
|
|
6
|
+
#
|
|
7
|
+
# Instead of holding all results in memory (which would be problematic when
|
|
8
|
+
# sweeping millions of databases), results are streamed to a JSONL file as
|
|
9
|
+
# they arrive. Each line is a JSON object with the query row data plus a
|
|
10
|
+
# "_source" field indicating which database it came from.
|
|
11
|
+
#
|
|
12
|
+
# The file persists after the process exits so external tools can consume it.
|
|
13
|
+
# The file path is printed to stdout as the final output of a :list sweep.
|
|
14
|
+
#
|
|
15
|
+
# @example Output file contents (one JSON object per line):
|
|
16
|
+
# {"count(*)":42,"_source":"/data/tenant_1.sqlite3"}
|
|
17
|
+
# {"count(*)":17,"_source":"ssh://deploy@web1/data/tenant_2.sqlite3"}
|
|
18
|
+
#
|
|
19
|
+
class ResultFile
|
|
20
|
+
# @return [String] Absolute path to the JSONL output file.
|
|
21
|
+
attr_reader :path
|
|
22
|
+
|
|
23
|
+
# @return [Integer] Total number of rows written so far.
|
|
24
|
+
attr_reader :row_count
|
|
25
|
+
|
|
26
|
+
def initialize
|
|
27
|
+
@path = File.join(Dir.tmpdir, "sqlitesweep_results_#{Process.pid}_#{Time.now.to_i}.jsonl")
|
|
28
|
+
@file = File.open(@path, "w")
|
|
29
|
+
@mutex = Mutex.new
|
|
30
|
+
@row_count = 0
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Writes all rows from a Result to the file. Each row becomes one JSON line,
|
|
34
|
+
# annotated with "_source" for provenance tracking.
|
|
35
|
+
#
|
|
36
|
+
# @param result [Result] The query result to write.
|
|
37
|
+
def write(result)
|
|
38
|
+
@mutex.synchronize do
|
|
39
|
+
result.rows.each do |row|
|
|
40
|
+
line = row.merge("_source" => result.source).to_json
|
|
41
|
+
@file.puts(line)
|
|
42
|
+
@row_count += 1
|
|
43
|
+
end
|
|
44
|
+
@file.flush
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Closes the file handle. The file itself remains on disk for consumption.
|
|
49
|
+
def close
|
|
50
|
+
@file.close unless @file.closed?
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
module SQLiteSweep
|
|
2
|
+
# Main orchestrator. Wires all components together and runs the sweep.
|
|
3
|
+
#
|
|
4
|
+
# Flow:
|
|
5
|
+
# 1. Reads database URIs from SourceStream (which runs the -s command)
|
|
6
|
+
# 2. Local URIs are submitted directly to the WorkerPool for Query::Local
|
|
7
|
+
# 3. Remote URIs are sent to HostBatcher, which groups them by host and
|
|
8
|
+
# flushes batches to the WorkerPool for Query::Remote
|
|
9
|
+
# 4. Results feed into the Aggregator (thread-safe sum/avg/list)
|
|
10
|
+
# 5. Display refreshes a live status line on stderr
|
|
11
|
+
# 6. Final aggregated result is printed to stdout
|
|
12
|
+
#
|
|
13
|
+
class Runner
|
|
14
|
+
def initialize(config)
|
|
15
|
+
@config = config
|
|
16
|
+
@shutting_down = false
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Executes the full sweep. Blocks until all databases are queried and
|
|
20
|
+
# the final result is printed to stdout.
|
|
21
|
+
def run
|
|
22
|
+
result_file = ResultFile.new if @config.action == :list
|
|
23
|
+
aggregator = Aggregator.new(@config.action, result_file: result_file)
|
|
24
|
+
display = Display.new(aggregator, live: @config.live)
|
|
25
|
+
pool = WorkerPool.new(@config.concurrency)
|
|
26
|
+
local_query = Query::Local.new(@config)
|
|
27
|
+
ssh_manager = SSH::ConnectionManager.new(@config)
|
|
28
|
+
batcher = HostBatcher.new(@config, pool, aggregator, display, ssh_manager)
|
|
29
|
+
|
|
30
|
+
setup_signal_handlers(pool, ssh_manager, display, batcher)
|
|
31
|
+
|
|
32
|
+
source = SourceStream.new(@config.source)
|
|
33
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
34
|
+
display.start(start_time)
|
|
35
|
+
|
|
36
|
+
# Main loop: read URIs and dispatch to the appropriate query path
|
|
37
|
+
source.each do |uri|
|
|
38
|
+
break if @shutting_down
|
|
39
|
+
|
|
40
|
+
if uri.local?
|
|
41
|
+
pool.submit do
|
|
42
|
+
begin
|
|
43
|
+
result = local_query.execute(uri)
|
|
44
|
+
aggregator.add(result)
|
|
45
|
+
rescue QueryError => e
|
|
46
|
+
aggregator.record_error
|
|
47
|
+
$stderr.puts "\n#{e.message}" unless @config.live
|
|
48
|
+
end
|
|
49
|
+
display.refresh
|
|
50
|
+
end
|
|
51
|
+
else
|
|
52
|
+
batcher.add(uri)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Drain remaining batches that haven't hit batch_size yet
|
|
57
|
+
batcher.flush_all
|
|
58
|
+
pool.shutdown
|
|
59
|
+
|
|
60
|
+
display.finish
|
|
61
|
+
result_file&.close
|
|
62
|
+
|
|
63
|
+
output = aggregator.value
|
|
64
|
+
puts output
|
|
65
|
+
rescue SourceError => e
|
|
66
|
+
$stderr.puts "\nError: #{e.message}"
|
|
67
|
+
exit 1
|
|
68
|
+
ensure
|
|
69
|
+
ssh_manager&.shutdown
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
# Installs a SIGINT (Ctrl+C) handler for graceful shutdown.
|
|
75
|
+
# First Ctrl+C: cancel pending work, drain pool, clean up SSH.
|
|
76
|
+
# Second Ctrl+C: force exit immediately.
|
|
77
|
+
def setup_signal_handlers(pool, ssh_manager, display, batcher)
|
|
78
|
+
trap("INT") do
|
|
79
|
+
if @shutting_down
|
|
80
|
+
exit! 1
|
|
81
|
+
else
|
|
82
|
+
@shutting_down = true
|
|
83
|
+
$stderr.write "\nShutting down...\n"
|
|
84
|
+
batcher.cancel
|
|
85
|
+
pool.kill
|
|
86
|
+
display.finish
|
|
87
|
+
ssh_manager.shutdown
|
|
88
|
+
exit 1
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
module SQLiteSweep
|
|
2
|
+
# Reads database URIs from a shell command, one per line.
|
|
3
|
+
#
|
|
4
|
+
# The command is run via IO.popen and its stdout is read line-by-line.
|
|
5
|
+
# Each non-empty line is parsed into a DatabaseURI. Blank lines are skipped.
|
|
6
|
+
#
|
|
7
|
+
# The command can be anything: a simple `cat`, a `rails runner` invocation,
|
|
8
|
+
# a script that queries an API, etc. Whatever it is, it must output one
|
|
9
|
+
# database URI per line to stdout.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# stream = SourceStream.new("cat /tmp/uris.txt")
|
|
13
|
+
# stream.each { |uri| puts uri.path }
|
|
14
|
+
#
|
|
15
|
+
# @example with Rails
|
|
16
|
+
# stream = SourceStream.new('rails runner "Tenant.find_each { |t| puts t.db_path }"')
|
|
17
|
+
#
|
|
18
|
+
class SourceStream
|
|
19
|
+
def initialize(command)
|
|
20
|
+
@command = command
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Yields each DatabaseURI from the source command's output.
|
|
24
|
+
# Returns an Enumerator if no block is given.
|
|
25
|
+
#
|
|
26
|
+
# @yieldparam uri [DatabaseURI] A parsed database URI.
|
|
27
|
+
# @raise [SourceError] If the command exits with a non-zero status.
|
|
28
|
+
def each(&block)
|
|
29
|
+
return enum_for(:each) unless block_given?
|
|
30
|
+
|
|
31
|
+
IO.popen(@command, "r") do |io|
|
|
32
|
+
io.each_line do |line|
|
|
33
|
+
line = line.strip
|
|
34
|
+
next if line.empty?
|
|
35
|
+
yield DatabaseURI.new(line)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
unless $?.success?
|
|
40
|
+
raise SourceError, "Source command exited with status #{$?.exitstatus}: #{@command}"
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
require "concurrent"
|
|
2
|
+
require "open3"
|
|
3
|
+
require "fileutils"
|
|
4
|
+
|
|
5
|
+
module SQLiteSweep
|
|
6
|
+
module SSH
|
|
7
|
+
# Manages SSH ControlMaster connections for multiplexed remote access.
|
|
8
|
+
#
|
|
9
|
+
# Why ControlMaster instead of net-ssh?
|
|
10
|
+
# - net-ssh is not thread-safe for concurrent operations on a single session
|
|
11
|
+
# - System ssh with ControlMaster handles multiplexing natively
|
|
12
|
+
# - SSH agent, config files, and known_hosts all work automatically
|
|
13
|
+
#
|
|
14
|
+
# How it works:
|
|
15
|
+
# 1. On first query to a host, establishes a background master connection
|
|
16
|
+
# (ssh -N -f with ControlMaster=yes) that stays alive via ControlPersist
|
|
17
|
+
# 2. Subsequent SSH commands to the same host multiplex over the existing
|
|
18
|
+
# master via ControlPath — no new TCP/auth handshake needed
|
|
19
|
+
# 3. A semaphore enforces --max-ssh to cap total master connections
|
|
20
|
+
# 4. On shutdown, sends "ssh -O exit" to each master and cleans up sockets
|
|
21
|
+
#
|
|
22
|
+
# Socket files live in /tmp/sqlitesweep_ssh_<pid>/ and are cleaned up
|
|
23
|
+
# on shutdown.
|
|
24
|
+
#
|
|
25
|
+
class ConnectionManager
|
|
26
|
+
def initialize(config)
|
|
27
|
+
@config = config
|
|
28
|
+
@socket_dir = File.join("/tmp", "sqlitesweep_ssh_#{Process.pid}")
|
|
29
|
+
FileUtils.mkdir_p(@socket_dir)
|
|
30
|
+
@semaphore = Concurrent::Semaphore.new(config.max_ssh)
|
|
31
|
+
@masters = Concurrent::Map.new # host_key => true (thread-safe set)
|
|
32
|
+
@mutex = Mutex.new
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Returns the Unix socket path for a given host's ControlMaster.
|
|
36
|
+
#
|
|
37
|
+
# @param host_key [String] The SSH destination (e.g. "deploy@web1").
|
|
38
|
+
# @return [String] Path to the socket file.
|
|
39
|
+
def socket_path(host_key)
|
|
40
|
+
File.join(@socket_dir, host_key.gsub("/", "_"))
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Ensures a ControlMaster connection exists for the given host.
|
|
44
|
+
# If one already exists, returns immediately. Otherwise, acquires a
|
|
45
|
+
# semaphore permit and establishes a new master.
|
|
46
|
+
#
|
|
47
|
+
# Uses BatchMode=yes to prevent password prompts (fails fast if
|
|
48
|
+
# key-based auth isn't configured). StrictHostKeyChecking=accept-new
|
|
49
|
+
# auto-accepts new hosts but rejects changed keys.
|
|
50
|
+
#
|
|
51
|
+
# @param host_key [String] The SSH destination (e.g. "deploy@web1").
|
|
52
|
+
# @raise [SSHError] If the master connection fails to establish.
|
|
53
|
+
def ensure_master(host_key)
|
|
54
|
+
return if @masters[host_key]
|
|
55
|
+
|
|
56
|
+
@semaphore.acquire
|
|
57
|
+
begin
|
|
58
|
+
# Double-check after acquiring semaphore (another thread may have
|
|
59
|
+
# established the master while we were waiting)
|
|
60
|
+
return if @masters[host_key]
|
|
61
|
+
|
|
62
|
+
socket = socket_path(host_key)
|
|
63
|
+
cmd = [
|
|
64
|
+
"ssh",
|
|
65
|
+
"-o", "ControlMaster=yes",
|
|
66
|
+
"-o", "ControlPath=#{socket}",
|
|
67
|
+
"-o", "ControlPersist=120",
|
|
68
|
+
"-o", "BatchMode=yes",
|
|
69
|
+
"-o", "StrictHostKeyChecking=accept-new",
|
|
70
|
+
"-o", "ConnectTimeout=#{@config.ssh_timeout}",
|
|
71
|
+
"-N", "-f", # No command, go to background
|
|
72
|
+
host_key
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
_out, err, status = Open3.capture3(*cmd)
|
|
76
|
+
unless status.success?
|
|
77
|
+
@semaphore.release
|
|
78
|
+
raise SSHError, "Failed to establish SSH master to #{host_key}: #{err.strip}"
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
@masters[host_key] = true
|
|
82
|
+
rescue SSHError
|
|
83
|
+
raise
|
|
84
|
+
rescue => e
|
|
85
|
+
@semaphore.release
|
|
86
|
+
raise SSHError, "SSH connection error to #{host_key}: #{e.message}"
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Runs a command on a remote host over the multiplexed SSH connection.
|
|
91
|
+
# Ensures a ControlMaster exists first, then executes via ControlPath.
|
|
92
|
+
#
|
|
93
|
+
# @param host_key [String] The SSH destination.
|
|
94
|
+
# @param remote_command [String] The shell command to run remotely.
|
|
95
|
+
# @param timeout [Integer, nil] Override timeout in seconds (defaults to config.query_timeout).
|
|
96
|
+
# @return [String] The command's stdout.
|
|
97
|
+
# @raise [QueryError] If the remote command exits non-zero.
|
|
98
|
+
# @raise [TimeoutError] If the command exceeds the timeout.
|
|
99
|
+
def run(host_key, remote_command, timeout: nil)
|
|
100
|
+
ensure_master(host_key)
|
|
101
|
+
|
|
102
|
+
socket = socket_path(host_key)
|
|
103
|
+
cmd = [
|
|
104
|
+
"ssh",
|
|
105
|
+
"-o", "ControlPath=#{socket}",
|
|
106
|
+
"-o", "BatchMode=yes",
|
|
107
|
+
host_key,
|
|
108
|
+
remote_command
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
timeout ||= @config.query_timeout
|
|
112
|
+
out, err, status = execute_with_timeout(cmd, timeout)
|
|
113
|
+
|
|
114
|
+
unless status.success?
|
|
115
|
+
raise QueryError, "Remote command failed on #{host_key}: #{err.strip}"
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
out
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Shuts down all ControlMaster connections and cleans up socket files.
|
|
122
|
+
# Sends "ssh -O exit" to each master to gracefully close it.
|
|
123
|
+
def shutdown
|
|
124
|
+
@masters.each_key do |host_key|
|
|
125
|
+
socket = socket_path(host_key)
|
|
126
|
+
system("ssh", "-o", "ControlPath=#{socket}", "-O", "exit", host_key,
|
|
127
|
+
out: File::NULL, err: File::NULL)
|
|
128
|
+
end
|
|
129
|
+
@masters.clear
|
|
130
|
+
FileUtils.rm_rf(@socket_dir) if Dir.exist?(@socket_dir)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
private
|
|
134
|
+
|
|
135
|
+
def execute_with_timeout(cmd, timeout)
|
|
136
|
+
Open3.capture3(*cmd, timeout: timeout)
|
|
137
|
+
rescue Timeout::Error
|
|
138
|
+
raise TimeoutError, "Command timed out after #{timeout}s: #{cmd.last(1).first}"
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
require "concurrent"
|
|
2
|
+
|
|
3
|
+
module SQLiteSweep
|
|
4
|
+
# Thread pool wrapper around Concurrent::FixedThreadPool.
|
|
5
|
+
#
|
|
6
|
+
# Uses a fixed number of threads for IO-bound SSH and SQLite work. The
|
|
7
|
+
# :caller_runs fallback policy provides natural back-pressure: when all
|
|
8
|
+
# threads are busy, the submitting thread runs the work itself rather
|
|
9
|
+
# than queuing unboundedly.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# pool = WorkerPool.new(8)
|
|
13
|
+
# pool.submit { query_database(uri) }
|
|
14
|
+
# pool.shutdown # waits for all work to complete
|
|
15
|
+
#
|
|
16
|
+
class WorkerPool
|
|
17
|
+
# @param size [Integer] Number of worker threads.
|
|
18
|
+
def initialize(size)
|
|
19
|
+
@pool = Concurrent::FixedThreadPool.new(size, fallback_policy: :caller_runs)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Submits a block to be executed by a worker thread.
|
|
23
|
+
# If all workers are busy, the block runs on the calling thread
|
|
24
|
+
# (back-pressure via :caller_runs).
|
|
25
|
+
#
|
|
26
|
+
# @yieldreturn [void]
|
|
27
|
+
def submit(&block)
|
|
28
|
+
@pool.post(&block)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Gracefully shuts down the pool: stops accepting new work and waits
|
|
32
|
+
# for all submitted work to finish.
|
|
33
|
+
#
|
|
34
|
+
# @param timeout [Integer] Max seconds to wait for completion.
|
|
35
|
+
def shutdown(timeout = 60)
|
|
36
|
+
@pool.shutdown
|
|
37
|
+
@pool.wait_for_termination(timeout)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Immediately kills all worker threads. Used for forced shutdown (e.g. Ctrl+C).
|
|
41
|
+
def kill
|
|
42
|
+
@pool.kill
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
data/lib/sqlitesweep.rb
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# SQLiteSweep - Query millions of SQLite databases across remote hosts via SSH.
|
|
2
|
+
#
|
|
3
|
+
# Designed for multi-tenant applications where each tenant has their own SQLite
|
|
4
|
+
# database. Reads database URIs from a shell command, queries each one in parallel,
|
|
5
|
+
# and aggregates results in real-time.
|
|
6
|
+
#
|
|
7
|
+
# Architecture overview:
|
|
8
|
+
#
|
|
9
|
+
# SourceStream reads URIs from a shell command (IO.popen)
|
|
10
|
+
# |
|
|
11
|
+
# Runner main orchestrator, dispatches URIs to the right path
|
|
12
|
+
# |
|
|
13
|
+
# +-- local URIs --> WorkerPool --> Query::Local (sqlite3 gem)
|
|
14
|
+
# |
|
|
15
|
+
# +-- remote URIs --> HostBatcher --> WorkerPool --> Query::Remote (ssh + sqlite3 CLI)
|
|
16
|
+
# |
|
|
17
|
+
# SSH::ConnectionManager (ControlMaster pooling)
|
|
18
|
+
# |
|
|
19
|
+
# Aggregator thread-safe accumulator (sum / average / list)
|
|
20
|
+
# |
|
|
21
|
+
# Display live ANSI progress on stderr
|
|
22
|
+
# |
|
|
23
|
+
# stdout final result
|
|
24
|
+
#
|
|
25
|
+
# Usage:
|
|
26
|
+
# sqlitesweep -q "SELECT count(*) FROM users" -a sum -s "cat uris.txt"
|
|
27
|
+
#
|
|
28
|
+
require_relative "sqlitesweep/version"
|
|
29
|
+
require_relative "sqlitesweep/errors"
|
|
30
|
+
|
|
31
|
+
module SQLiteSweep
|
|
32
|
+
autoload :Config, "sqlitesweep/config"
|
|
33
|
+
autoload :CLI, "sqlitesweep/cli"
|
|
34
|
+
autoload :Runner, "sqlitesweep/runner"
|
|
35
|
+
autoload :SourceStream, "sqlitesweep/source_stream"
|
|
36
|
+
autoload :DatabaseURI, "sqlitesweep/database_uri"
|
|
37
|
+
autoload :Result, "sqlitesweep/result"
|
|
38
|
+
autoload :Aggregator, "sqlitesweep/aggregator"
|
|
39
|
+
autoload :WorkerPool, "sqlitesweep/worker_pool"
|
|
40
|
+
autoload :HostBatcher, "sqlitesweep/host_batcher"
|
|
41
|
+
autoload :Display, "sqlitesweep/display"
|
|
42
|
+
autoload :ResultFile, "sqlitesweep/result_file"
|
|
43
|
+
|
|
44
|
+
module Query
|
|
45
|
+
autoload :Base, "sqlitesweep/query/base"
|
|
46
|
+
autoload :Local, "sqlitesweep/query/local"
|
|
47
|
+
autoload :Remote, "sqlitesweep/query/remote"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
module SSH
|
|
51
|
+
autoload :ConnectionManager, "sqlitesweep/ssh/connection_manager"
|
|
52
|
+
end
|
|
53
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: sqlitesweep
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Chris Maximin
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: sqlite3
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '2.0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '2.0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: concurrent-ruby
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '1.3'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '1.3'
|
|
40
|
+
description: CLI tool that queries SQLite databases across remote hosts via SSH, aggregating
|
|
41
|
+
results in real-time. Designed for multi-tenant apps where each tenant has their
|
|
42
|
+
own SQLite database.
|
|
43
|
+
executables:
|
|
44
|
+
- sqlitesweep
|
|
45
|
+
extensions: []
|
|
46
|
+
extra_rdoc_files: []
|
|
47
|
+
files:
|
|
48
|
+
- README.md
|
|
49
|
+
- exe/sqlitesweep
|
|
50
|
+
- lib/sqlitesweep.rb
|
|
51
|
+
- lib/sqlitesweep/aggregator.rb
|
|
52
|
+
- lib/sqlitesweep/cli.rb
|
|
53
|
+
- lib/sqlitesweep/config.rb
|
|
54
|
+
- lib/sqlitesweep/database_uri.rb
|
|
55
|
+
- lib/sqlitesweep/display.rb
|
|
56
|
+
- lib/sqlitesweep/errors.rb
|
|
57
|
+
- lib/sqlitesweep/host_batcher.rb
|
|
58
|
+
- lib/sqlitesweep/query/base.rb
|
|
59
|
+
- lib/sqlitesweep/query/local.rb
|
|
60
|
+
- lib/sqlitesweep/query/remote.rb
|
|
61
|
+
- lib/sqlitesweep/result.rb
|
|
62
|
+
- lib/sqlitesweep/result_file.rb
|
|
63
|
+
- lib/sqlitesweep/runner.rb
|
|
64
|
+
- lib/sqlitesweep/source_stream.rb
|
|
65
|
+
- lib/sqlitesweep/ssh/connection_manager.rb
|
|
66
|
+
- lib/sqlitesweep/version.rb
|
|
67
|
+
- lib/sqlitesweep/worker_pool.rb
|
|
68
|
+
homepage: https://github.com/chrismaximin/sqlitesweep
|
|
69
|
+
licenses:
|
|
70
|
+
- MIT
|
|
71
|
+
metadata: {}
|
|
72
|
+
rdoc_options: []
|
|
73
|
+
require_paths:
|
|
74
|
+
- lib
|
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
76
|
+
requirements:
|
|
77
|
+
- - ">="
|
|
78
|
+
- !ruby/object:Gem::Version
|
|
79
|
+
version: '4.0'
|
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
81
|
+
requirements:
|
|
82
|
+
- - ">="
|
|
83
|
+
- !ruby/object:Gem::Version
|
|
84
|
+
version: '0'
|
|
85
|
+
requirements: []
|
|
86
|
+
rubygems_version: 4.0.3
|
|
87
|
+
specification_version: 4
|
|
88
|
+
summary: Query millions of SQLite databases across remote hosts via SSH
|
|
89
|
+
test_files: []
|