@intentsolutionsio/dolt-mcp-vcs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +39 -0
- package/.claude-plugin/plugin.json +25 -0
- package/LICENSE +202 -0
- package/README.md +58 -0
- package/agents/bead-dependency-mapper.md +49 -0
- package/agents/bead-epic-auditor.md +49 -0
- package/agents/bead-recovery-specialist.md +50 -0
- package/agents/beads-guru.md +49 -0
- package/agents/dolt-sync-advisor.md +54 -0
- package/package.json +46 -0
- package/scripts/check-agent-safety.sh +65 -0
- package/scripts/dep-graph.sh +44 -0
- package/scripts/descriptor-to-mcp-args.py +121 -0
- package/scripts/dolt-idle-reaper.sh +82 -0
- package/scripts/dolt-mcp-client.py +188 -0
- package/scripts/dolt-push-dolthub.sh +98 -0
- package/scripts/epic-closure-audit.sh +49 -0
- package/scripts/profile_sql.py +97 -0
- package/scripts/resolve-creds-ref.py +136 -0
- package/scripts/server-health.sh +39 -0
- package/scripts/sql_classifier.py +259 -0
- package/skills/dolt-mcp-vcs/SKILL.md +141 -0
- package/skills/dolt-mcp-vcs/eval.yaml +106 -0
- package/skills/dolt-mcp-vcs/references/connection-descriptor-and-profiles.md +109 -0
- package/skills/dolt-mcp-vcs/references/dolt-internals.md +33 -0
- package/skills/dolt-mcp-vcs/safety-eval.yaml +118 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""profile_sql.py — build the version-control audit SQL from a schema PROFILE.
|
|
2
|
+
|
|
3
|
+
This is the seam that proves the use-case-adapter inversion (the panel's
|
|
4
|
+
schema-profile MAJOR / §9): the generic queries (epic-closure drift, dependency
|
|
5
|
+
bottlenecks) are emitted from a profile's table/column/encoding/value names, so a
|
|
6
|
+
SECOND schema (profiles/example-generic.profile.json) runs the same logic with
|
|
7
|
+
zero code change. `beads` stops being hardcoded and becomes profile #1.
|
|
8
|
+
|
|
9
|
+
A profile is untrusted INPUT — names are quoted as identifiers and the type-value
|
|
10
|
+
is quoted as a literal, so a malicious profile cannot inject SQL. (Live schema
|
|
11
|
+
introspection still wins over the profile at agent runtime.)
|
|
12
|
+
|
|
13
|
+
Pure stdlib; importable for tests.
|
|
14
|
+
"""
|
|
15
|
+
import json
|
|
16
|
+
import re
|
|
17
|
+
|
|
18
|
+
_IDENT = re.compile(r"^[A-Za-z_][A-Za-z_0-9]*$")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def load_profile(path):
|
|
22
|
+
with open(path) as fh:
|
|
23
|
+
return json.load(fh)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _ident(name):
|
|
27
|
+
"""Validate a table/column identifier and return it bare. Rejects anything that
|
|
28
|
+
is not a plain identifier, so a hostile profile cannot smuggle SQL through a
|
|
29
|
+
table/column name."""
|
|
30
|
+
if not isinstance(name, str) or not _IDENT.match(name):
|
|
31
|
+
raise ValueError(f"unsafe identifier in profile: {name!r}")
|
|
32
|
+
return name
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _lit(value):
|
|
36
|
+
"""Single-quote a string literal, escaping embedded quotes."""
|
|
37
|
+
if not isinstance(value, str):
|
|
38
|
+
raise ValueError(f"non-string value in profile: {value!r}")
|
|
39
|
+
return "'" + value.replace("'", "''") + "'"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _enc(profile, key):
|
|
43
|
+
enc = profile["encodings"][key]
|
|
44
|
+
return enc, enc.get("value", key) # type-column value defaults to the encoding key
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def epic_closure_sql(profile):
|
|
48
|
+
"""OPEN epics whose entire parent-child child set is already closed."""
|
|
49
|
+
issues = _ident(profile["tables"]["issues"])
|
|
50
|
+
deps = _ident(profile["tables"]["dependencies"])
|
|
51
|
+
col_id = _ident(profile["columns"]["id"])
|
|
52
|
+
col_status = _ident(profile["columns"]["status"])
|
|
53
|
+
col_type = _ident(profile["columns"]["type"])
|
|
54
|
+
col_itype = _ident(profile["columns"]["issue_type"])
|
|
55
|
+
pc, pc_val = _enc(profile, "parent-child")
|
|
56
|
+
child = _ident(pc["child"])
|
|
57
|
+
parent = _ident(pc["parent"])
|
|
58
|
+
closed = _lit(profile["closed-value"])
|
|
59
|
+
epic = _lit(profile["epic-value"])
|
|
60
|
+
pc_lit = _lit(pc_val)
|
|
61
|
+
return (
|
|
62
|
+
f"SELECT e.{col_id} AS epic, COUNT(d.{child}) AS children, "
|
|
63
|
+
f"SUM(CASE WHEN c.{col_status}={closed} THEN 1 ELSE 0 END) AS closed "
|
|
64
|
+
f"FROM {issues} e "
|
|
65
|
+
f"JOIN {deps} d ON d.{parent}=e.{col_id} AND d.{col_type}={pc_lit} "
|
|
66
|
+
f"JOIN {issues} c ON c.{col_id}=d.{child} "
|
|
67
|
+
f"WHERE e.{col_itype}={epic} AND e.{col_status}<>{closed} "
|
|
68
|
+
f"GROUP BY e.{col_id} "
|
|
69
|
+
f"HAVING children>0 AND closed=children "
|
|
70
|
+
f"ORDER BY children DESC"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def bottleneck_sql(profile, top=10):
|
|
75
|
+
"""OPEN issues blocking the most other OPEN issues (the `blocks` encoding)."""
|
|
76
|
+
if not isinstance(top, int) or isinstance(top, bool) or top < 1:
|
|
77
|
+
raise ValueError("top must be a positive integer")
|
|
78
|
+
issues = _ident(profile["tables"]["issues"])
|
|
79
|
+
deps = _ident(profile["tables"]["dependencies"])
|
|
80
|
+
col_id = _ident(profile["columns"]["id"])
|
|
81
|
+
col_status = _ident(profile["columns"]["status"])
|
|
82
|
+
col_type = _ident(profile["columns"]["type"])
|
|
83
|
+
bl, bl_val = _enc(profile, "blocks")
|
|
84
|
+
blocked = _ident(bl["blocked"])
|
|
85
|
+
blocker = _ident(bl["blocker"])
|
|
86
|
+
closed = _lit(profile["closed-value"])
|
|
87
|
+
bl_lit = _lit(bl_val)
|
|
88
|
+
return (
|
|
89
|
+
f"SELECT b.{col_id} AS blocker, b.{col_status} AS status, COUNT(*) AS blocking_open "
|
|
90
|
+
f"FROM {deps} d "
|
|
91
|
+
f"JOIN {issues} b ON b.{col_id}=d.{blocker} "
|
|
92
|
+
f"JOIN {issues} blocked ON blocked.{col_id}=d.{blocked} "
|
|
93
|
+
f"WHERE d.{col_type}={bl_lit} AND b.{col_status}<>{closed} "
|
|
94
|
+
f"AND blocked.{col_status}<>{closed} "
|
|
95
|
+
f"GROUP BY b.{col_id}, b.{col_status} "
|
|
96
|
+
f"ORDER BY blocking_open DESC LIMIT {top}"
|
|
97
|
+
)
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""resolve-creds-ref.py — resolve a connection-descriptor `creds-ref` to a secret.
|
|
3
|
+
|
|
4
|
+
A `creds-ref` is a POINTER to a secret, never the secret itself (blueprint §2 /
|
|
5
|
+
the panel's creds-ref MAJOR). Accepted schemes:
|
|
6
|
+
|
|
7
|
+
env:NAME environment variable NAME
|
|
8
|
+
sops:PATH#KEY KEY from a SOPS-encrypted file at PATH — decrypted to stdout
|
|
9
|
+
only, NEVER to disk (per the SOPS posture)
|
|
10
|
+
pass:PATH `pass show PATH`
|
|
11
|
+
|
|
12
|
+
Resolution is **fail-closed**:
|
|
13
|
+
* an unknown scheme exits non-zero (this is the validator rule — a `creds-ref`
|
|
14
|
+
that is not a known `scheme:` prefix is rejected, never silently treated as a
|
|
15
|
+
literal);
|
|
16
|
+
* an empty/unresolved secret for a NON-loopback endpoint exits non-zero — we
|
|
17
|
+
never connect a remote endpoint with an empty (unauthenticated) password;
|
|
18
|
+
* for a loopback endpoint (127.0.0.1 / localhost / ::1) an empty result IS
|
|
19
|
+
allowed — bd's local dolt server is unauthenticated by default.
|
|
20
|
+
|
|
21
|
+
The resolved secret is printed to stdout for capture into an env var:
|
|
22
|
+
export DOLT_PASSWORD="$(resolve-creds-ref.py --creds-ref env:DOLT_PASSWORD --endpoint 127.0.0.1:3308)"
|
|
23
|
+
NEVER echo or log the captured value.
|
|
24
|
+
|
|
25
|
+
Exit: 0 ok (secret on stdout; may be empty for loopback) · 2 bad usage /
|
|
26
|
+
unknown scheme · 4 resolution failed (fail-closed).
|
|
27
|
+
"""
|
|
28
|
+
import argparse
|
|
29
|
+
import os
|
|
30
|
+
import subprocess
|
|
31
|
+
import sys
|
|
32
|
+
|
|
33
|
+
KNOWN_SCHEMES = ("env", "sops", "pass")
|
|
34
|
+
_LOOPBACK_HOSTS = {"127.0.0.1", "localhost", "::1", ""}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def eprint(*a):
|
|
38
|
+
print(*a, file=sys.stderr)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def is_loopback(endpoint):
|
|
42
|
+
host = (endpoint or "").strip()
|
|
43
|
+
if host.startswith("["): # [::1]:3306
|
|
44
|
+
host = host[1:].split("]", 1)[0]
|
|
45
|
+
elif host.count(":") == 1: # host:port
|
|
46
|
+
host = host.rsplit(":", 1)[0]
|
|
47
|
+
return host.lower() in _LOOPBACK_HOSTS
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def parse_scheme(ref):
|
|
51
|
+
if ":" not in ref:
|
|
52
|
+
return None, None
|
|
53
|
+
scheme, rest = ref.split(":", 1)
|
|
54
|
+
return scheme.lower(), rest
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def resolve_env(rest):
|
|
58
|
+
return os.environ.get(rest, "")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def resolve_pass(rest):
|
|
62
|
+
try:
|
|
63
|
+
out = subprocess.run(["pass", "show", rest], capture_output=True,
|
|
64
|
+
text=True, timeout=15)
|
|
65
|
+
except (OSError, subprocess.SubprocessError):
|
|
66
|
+
return ""
|
|
67
|
+
if out.returncode != 0:
|
|
68
|
+
return ""
|
|
69
|
+
return out.stdout.splitlines()[0] if out.stdout else ""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def resolve_sops(rest):
|
|
73
|
+
if "#" not in rest:
|
|
74
|
+
eprint("error: sops ref must be 'sops:PATH#KEY'")
|
|
75
|
+
return None # signals usage error
|
|
76
|
+
path, key = rest.rsplit("#", 1)
|
|
77
|
+
# dotenv files: decrypt as dotenv, then read KEY=VALUE. Structured (yaml/json):
|
|
78
|
+
# use --extract for a single value. Both stream to stdout only (never to disk).
|
|
79
|
+
is_dotenv = path.endswith((".env", ".env.sops")) or ".env" in os.path.basename(path)
|
|
80
|
+
try:
|
|
81
|
+
if is_dotenv:
|
|
82
|
+
out = subprocess.run(["sops", "-d", "--input-type", "dotenv",
|
|
83
|
+
"--output-type", "dotenv", path],
|
|
84
|
+
capture_output=True, text=True, timeout=30)
|
|
85
|
+
if out.returncode != 0:
|
|
86
|
+
return ""
|
|
87
|
+
for line in out.stdout.splitlines():
|
|
88
|
+
if line.startswith(key + "="):
|
|
89
|
+
return line.split("=", 1)[1].strip().strip('"').strip("'")
|
|
90
|
+
return ""
|
|
91
|
+
out = subprocess.run(["sops", "-d", "--extract", f'["{key}"]', path],
|
|
92
|
+
capture_output=True, text=True, timeout=30)
|
|
93
|
+
return out.stdout.strip() if out.returncode == 0 else ""
|
|
94
|
+
except (OSError, subprocess.SubprocessError):
|
|
95
|
+
return ""
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def resolve(ref):
|
|
99
|
+
"""Return (secret, ok). ok=False on unknown scheme / usage error."""
|
|
100
|
+
scheme, rest = parse_scheme(ref)
|
|
101
|
+
if scheme not in KNOWN_SCHEMES:
|
|
102
|
+
eprint(f"error: unknown creds-ref scheme '{scheme}'. "
|
|
103
|
+
f"Accepted: {', '.join(s + ':' for s in KNOWN_SCHEMES)}")
|
|
104
|
+
return None, False
|
|
105
|
+
if scheme == "env":
|
|
106
|
+
return resolve_env(rest), True
|
|
107
|
+
if scheme == "pass":
|
|
108
|
+
return resolve_pass(rest), True
|
|
109
|
+
secret = resolve_sops(rest)
|
|
110
|
+
if secret is None: # sops usage error
|
|
111
|
+
return None, False
|
|
112
|
+
return secret, True
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def main():
|
|
116
|
+
ap = argparse.ArgumentParser(description="resolve a descriptor creds-ref (fail-closed)")
|
|
117
|
+
ap.add_argument("--creds-ref", required=True, help="env:NAME | sops:PATH#KEY | pass:PATH")
|
|
118
|
+
ap.add_argument("--endpoint", default=os.environ.get("DOLT_ENDPOINT", "127.0.0.1:3308"),
|
|
119
|
+
help="host:port — loopback permits an empty secret")
|
|
120
|
+
args = ap.parse_args()
|
|
121
|
+
|
|
122
|
+
secret, ok = resolve(args.creds_ref)
|
|
123
|
+
if not ok:
|
|
124
|
+
return 2
|
|
125
|
+
if not secret:
|
|
126
|
+
if is_loopback(args.endpoint):
|
|
127
|
+
return 0 # empty is fine for a loopback (unauthenticated) server
|
|
128
|
+
eprint(f"error: creds-ref '{args.creds_ref}' resolved empty for non-loopback "
|
|
129
|
+
f"endpoint '{args.endpoint}' — refusing to connect unauthenticated (fail-closed).")
|
|
130
|
+
return 4
|
|
131
|
+
sys.stdout.write(secret)
|
|
132
|
+
return 0
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
if __name__ == "__main__":
|
|
136
|
+
sys.exit(main())
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# server-health.sh — inventory running `dolt sql-server` processes and flag sprawl.
|
|
3
|
+
# bd starts a per-project server on an auto-detected port; over time that becomes
|
|
4
|
+
# many servers (one per workspace). This maps each running server to its workspace
|
|
5
|
+
# and warns when the count suggests consolidation onto one shared server (:3308 via
|
|
6
|
+
# `bd dolt --global`) is warranted.
|
|
7
|
+
#
|
|
8
|
+
# Usage: server-health.sh [--warn N] (default warn threshold: 5 servers)
|
|
9
|
+
# Requires: pgrep, /proc (Linux). Read-only — never kills anything.
|
|
10
|
+
set -uo pipefail
|
|
11
|
+
|
|
12
|
+
WARN=5
|
|
13
|
+
[ "${1:-}" = "--warn" ] && { WARN="${2:-5}"; }
|
|
14
|
+
|
|
15
|
+
mapfile -t PIDS < <(pgrep -f 'dolt sql-server' 2>/dev/null || true)
|
|
16
|
+
n=0
|
|
17
|
+
rows=""
|
|
18
|
+
for p in "${PIDS[@]}"; do
|
|
19
|
+
[ -n "$p" ] || continue
|
|
20
|
+
port="$(tr '\0' ' ' < "/proc/$p/cmdline" 2>/dev/null | grep -oE '\-P [0-9]+' | grep -oE '[0-9]+' || true)"
|
|
21
|
+
[ -n "$port" ] || continue
|
|
22
|
+
cwd="$(readlink "/proc/$p/cwd" 2>/dev/null || echo '?')"
|
|
23
|
+
ws="$(echo "$cwd" | sed 's#'"$HOME"'/##; s#/.beads.*##')"
|
|
24
|
+
rows+="$(printf "%-8s %-8s %s" "$port" "$p" "$ws")"$'\n'
|
|
25
|
+
n=$((n+1)) # counted in the main shell, not a pipe subshell
|
|
26
|
+
done
|
|
27
|
+
printf "%-8s %-8s %s\n" "PORT" "PID" "WORKSPACE"
|
|
28
|
+
[ -n "$rows" ] && printf '%s' "$rows" | sort -n
|
|
29
|
+
|
|
30
|
+
echo
|
|
31
|
+
echo "# $n running dolt sql-server process(es)."
|
|
32
|
+
if [ "$n" -gt "$WARN" ]; then
|
|
33
|
+
echo "# ⚠ Sprawl: $n servers > threshold $WARN. Consolidate onto ONE shared server:"
|
|
34
|
+
echo "# bd config set dolt.shared-server true # machine-wide"
|
|
35
|
+
echo "# bd dolt killall # per repo (refuses external/other-repo servers)"
|
|
36
|
+
echo "# The next bd command auto-starts a single server at ~/.beads/shared-server/ on :3308."
|
|
37
|
+
else
|
|
38
|
+
echo "# ✓ Server count within threshold ($WARN)."
|
|
39
|
+
fi
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""sql_classifier.py — verb-class statement classifier (the mutation chokepoint).
|
|
2
|
+
|
|
3
|
+
This is the heart of blueprint §3 / D4 blocker B1. Every SQL statement that the
|
|
4
|
+
plugin runs through `dolt-mcp-client.py` is classified into one of three verb
|
|
5
|
+
classes and gated *before* it reaches the dolt-mcp server:
|
|
6
|
+
|
|
7
|
+
read SELECT / SHOW / DESCRIBE / EXPLAIN / dolt read table-funcs …
|
|
8
|
+
-> executes freely.
|
|
9
|
+
safe-write INSERT / UPDATE / DELETE / CREATE TABLE / CALL DOLT_COMMIT …
|
|
10
|
+
-> executes ONLY on an agent-owned branch (never `main`) and
|
|
11
|
+
ONLY with --allow-mutation; refused on pre-GA flavors.
|
|
12
|
+
history-affecting CALL DOLT_PUSH / DOLT_MERGE / DOLT_RESET('--hard') /
|
|
13
|
+
branch-delete / DROP DATABASE / unknown CALL …
|
|
14
|
+
-> ALWAYS refused (recommend-only; a human runs it).
|
|
15
|
+
|
|
16
|
+
Why a *statement* classifier and not a tool allowlist: `query`/`exec` is a single
|
|
17
|
+
MCP tool that carries every SQL verb, so excluding "history-affecting tools" from
|
|
18
|
+
an agent grant does nothing — the dangerous verbs ride inside the one allowed
|
|
19
|
+
tool. The gate therefore has to read the statement, not the tool name.
|
|
20
|
+
|
|
21
|
+
Design bias is fail-safe: anything we cannot positively prove is a read is
|
|
22
|
+
treated as at least safe-write, and any unrecognized `CALL …` (especially an
|
|
23
|
+
unrecognized `CALL DOLT_*`) is treated as history-affecting. A multi-statement
|
|
24
|
+
batch is classified at the severity of its most dangerous statement. Comments are
|
|
25
|
+
stripped before classification so `/* */`-hidden or `--`-trailing verbs cannot
|
|
26
|
+
slip a mutation past a read-looking prefix.
|
|
27
|
+
|
|
28
|
+
Pure stdlib; importable (no side effects on import) so it can be unit-tested
|
|
29
|
+
directly.
|
|
30
|
+
"""
|
|
31
|
+
import re
|
|
32
|
+
|
|
33
|
+
READ = "read"
|
|
34
|
+
SAFE_WRITE = "safe-write"
|
|
35
|
+
HISTORY_AFFECTING = "history-affecting"
|
|
36
|
+
|
|
37
|
+
# Severity ordering for batch (max-wins) classification.
|
|
38
|
+
_SEVERITY = {READ: 0, SAFE_WRITE: 1, HISTORY_AFFECTING: 2}
|
|
39
|
+
|
|
40
|
+
# Leading keywords that are unambiguously read-only.
|
|
41
|
+
_READ_LEADERS = {
|
|
42
|
+
"SELECT", "SHOW", "DESCRIBE", "DESC", "EXPLAIN", "USE", "VALUES", "TABLE",
|
|
43
|
+
"HELP", "PREPARE", "EXECUTE", "DEALLOCATE",
|
|
44
|
+
}
|
|
45
|
+
# `SET` is session config (read-class for our purposes); `SET PASSWORD`/`SET GLOBAL`
|
|
46
|
+
# are handled as exceptions below.
|
|
47
|
+
_READ_SET_EXCEPTIONS = ("PASSWORD", "GLOBAL")
|
|
48
|
+
|
|
49
|
+
# Leading keywords that mutate the working set / commit on a branch. Recoverable
|
|
50
|
+
# through Dolt history as long as no history-affecting op runs, so: safe-write.
|
|
51
|
+
_SAFE_WRITE_LEADERS = {
|
|
52
|
+
"INSERT", "UPDATE", "DELETE", "REPLACE", "MERGE", "TRUNCATE",
|
|
53
|
+
"CREATE", "ALTER", "RENAME", "ANALYZE", "LOAD", "IMPORT",
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Leading keywords that are destructive/admin and never safe from an agent.
|
|
57
|
+
_HISTORY_LEADERS = {
|
|
58
|
+
"GRANT", "REVOKE", "FLUSH", "SHUTDOWN", "KILL", "RESET", "PURGE",
|
|
59
|
+
"LOCK", "UNLOCK", "INSTALL", "UNINSTALL", "BACKUP", "RESTORE",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# CALL DOLT_* procedure verb-classes. Anything not listed -> history-affecting.
|
|
63
|
+
_DOLT_PROC_CLASS = {
|
|
64
|
+
"DOLT_COMMIT": SAFE_WRITE,
|
|
65
|
+
"DOLT_ADD": SAFE_WRITE,
|
|
66
|
+
"DOLT_CHECKOUT": SAFE_WRITE, # incl. -b create; switches working set, non-destructive
|
|
67
|
+
"DOLT_BRANCH": SAFE_WRITE, # create/list; delete is special-cased below
|
|
68
|
+
"DOLT_TAG": SAFE_WRITE, # create; delete is special-cased below
|
|
69
|
+
"DOLT_FETCH": SAFE_WRITE, # network read + remote-tracking refs; no local rewrite
|
|
70
|
+
"DOLT_REMOTE": SAFE_WRITE, # add/list; remove is non-destructive to data
|
|
71
|
+
"DOLT_VERIFY_CONSTRAINTS": SAFE_WRITE,
|
|
72
|
+
# --- history-affecting (always recommend-only) ---
|
|
73
|
+
"DOLT_PUSH": HISTORY_AFFECTING,
|
|
74
|
+
"DOLT_PULL": HISTORY_AFFECTING,
|
|
75
|
+
"DOLT_MERGE": HISTORY_AFFECTING,
|
|
76
|
+
"DOLT_REVERT": HISTORY_AFFECTING,
|
|
77
|
+
"DOLT_CHERRY_PICK": HISTORY_AFFECTING,
|
|
78
|
+
"DOLT_REBASE": HISTORY_AFFECTING,
|
|
79
|
+
"DOLT_CLEAN": HISTORY_AFFECTING, # discards uncommitted working changes
|
|
80
|
+
"DOLT_GC": HISTORY_AFFECTING,
|
|
81
|
+
"DOLT_PURGE_DROPPED_DATABASES": HISTORY_AFFECTING,
|
|
82
|
+
# DOLT_RESET handled specially (soft vs hard) below.
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
_LEADING_TOKEN = re.compile(r"[A-Za-z_][A-Za-z_0-9]*")
|
|
86
|
+
_CALL_PROC = re.compile(r"^CALL\s+([A-Za-z_][A-Za-z_0-9]*)", re.IGNORECASE)
|
|
87
|
+
# A complete quoted `-d` / `-D` / `--delete` flag arg inside a CALL DOLT_BRANCH /
|
|
88
|
+
# DOLT_TAG list — matched as a whole quoted token so a branch literally named
|
|
89
|
+
# e.g. '-delete-me' does not trip it.
|
|
90
|
+
_DELETE_FLAG = re.compile(r"""['"]\s*--?(?:delete|d)\s*['"]""", re.IGNORECASE)
|
|
91
|
+
_WRITE_VERB_ANYWHERE = re.compile(r"\b(INSERT|UPDATE|DELETE|REPLACE|MERGE)\b", re.IGNORECASE)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def strip_sql_comments(sql):
|
|
95
|
+
"""Remove `/* */`, `--`, and `#` comments so a verb hidden behind a comment
|
|
96
|
+
cannot mask the real leading verb — while preserving string literals, so a
|
|
97
|
+
`--`/`#` *inside* a quoted value (e.g. `DOLT_RESET('--hard')`) is NOT mistaken
|
|
98
|
+
for a comment. Quote-aware with backslash- and doubled-quote escapes."""
|
|
99
|
+
out = []
|
|
100
|
+
i, n, quote = 0, len(sql), None
|
|
101
|
+
while i < n:
|
|
102
|
+
ch = sql[i]
|
|
103
|
+
if quote:
|
|
104
|
+
out.append(ch)
|
|
105
|
+
if ch == "\\" and i + 1 < n: # backslash escape inside a string
|
|
106
|
+
out.append(sql[i + 1]); i += 2; continue
|
|
107
|
+
if ch == quote:
|
|
108
|
+
if i + 1 < n and sql[i + 1] == quote: # doubled-quote escape ('')
|
|
109
|
+
out.append(sql[i + 1]); i += 2; continue
|
|
110
|
+
quote = None
|
|
111
|
+
i += 1
|
|
112
|
+
continue
|
|
113
|
+
if ch in ("'", '"', "`"):
|
|
114
|
+
quote = ch; out.append(ch); i += 1; continue
|
|
115
|
+
if ch == "/" and i + 1 < n and sql[i + 1] == "*": # block comment
|
|
116
|
+
j = sql.find("*/", i + 2)
|
|
117
|
+
i = n if j == -1 else j + 2
|
|
118
|
+
out.append(" "); continue
|
|
119
|
+
if (ch == "-" and i + 1 < n and sql[i + 1] == "-") or ch == "#": # line comment
|
|
120
|
+
j = sql.find("\n", i)
|
|
121
|
+
i = n if j == -1 else j
|
|
122
|
+
out.append(" "); continue
|
|
123
|
+
out.append(ch); i += 1
|
|
124
|
+
return "".join(out)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def split_statements(sql):
|
|
128
|
+
"""Naive `;` split (ignoring `;` inside quotes). Conservative: when in doubt a
|
|
129
|
+
fragment is classified, and the batch takes the max severity, so an over-split
|
|
130
|
+
never *lowers* the verdict."""
|
|
131
|
+
out, buf, quote = [], [], None
|
|
132
|
+
for ch in sql:
|
|
133
|
+
if quote:
|
|
134
|
+
buf.append(ch)
|
|
135
|
+
if ch == quote:
|
|
136
|
+
quote = None
|
|
137
|
+
elif ch in ("'", '"', "`"):
|
|
138
|
+
quote = ch
|
|
139
|
+
buf.append(ch)
|
|
140
|
+
elif ch == ";":
|
|
141
|
+
out.append("".join(buf))
|
|
142
|
+
buf = []
|
|
143
|
+
else:
|
|
144
|
+
buf.append(ch)
|
|
145
|
+
if buf:
|
|
146
|
+
out.append("".join(buf))
|
|
147
|
+
return [s for s in (frag.strip() for frag in out) if s]
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _classify_call(stmt_upper):
|
|
151
|
+
m = _CALL_PROC.match(stmt_upper)
|
|
152
|
+
if not m:
|
|
153
|
+
return HISTORY_AFFECTING # CALL with no resolvable proc name -> deny
|
|
154
|
+
proc = m.group(1).upper()
|
|
155
|
+
if proc == "DOLT_RESET":
|
|
156
|
+
return HISTORY_AFFECTING if "HARD" in stmt_upper else SAFE_WRITE
|
|
157
|
+
if proc in ("DOLT_BRANCH", "DOLT_TAG") and _DELETE_FLAG.search(stmt_upper):
|
|
158
|
+
return HISTORY_AFFECTING # branch/tag deletion erases a ref
|
|
159
|
+
if proc.startswith("DOLT_"):
|
|
160
|
+
return _DOLT_PROC_CLASS.get(proc, HISTORY_AFFECTING)
|
|
161
|
+
# Non-dolt stored procedure: unknown effect -> deny from an agent context.
|
|
162
|
+
return HISTORY_AFFECTING
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def classify_statement(sql):
|
|
166
|
+
"""Classify a single SQL statement into read / safe-write / history-affecting."""
|
|
167
|
+
stmt = strip_sql_comments(sql).strip().lstrip("(").strip()
|
|
168
|
+
if not stmt:
|
|
169
|
+
return READ
|
|
170
|
+
m = _LEADING_TOKEN.match(stmt)
|
|
171
|
+
if not m:
|
|
172
|
+
return SAFE_WRITE # cannot identify a leading keyword -> not provably read
|
|
173
|
+
lead = m.group(0).upper()
|
|
174
|
+
stmt_upper = stmt.upper()
|
|
175
|
+
|
|
176
|
+
if lead == "CALL":
|
|
177
|
+
return _classify_call(stmt_upper)
|
|
178
|
+
|
|
179
|
+
if lead == "WITH":
|
|
180
|
+
# A CTE wraps either a read (SELECT) or a write (INSERT/UPDATE/DELETE/...).
|
|
181
|
+
return SAFE_WRITE if _WRITE_VERB_ANYWHERE.search(stmt) else READ
|
|
182
|
+
|
|
183
|
+
if lead == "SET":
|
|
184
|
+
return SAFE_WRITE if any(x in stmt_upper for x in _READ_SET_EXCEPTIONS) else READ
|
|
185
|
+
|
|
186
|
+
if lead == "DROP":
|
|
187
|
+
# DROP DATABASE/SCHEMA/USER rewrites/erases beyond version control; other
|
|
188
|
+
# DROPs (TABLE/VIEW/INDEX/TRIGGER) are recoverable via Dolt history.
|
|
189
|
+
if re.match(r"DROP\s+(DATABASE|SCHEMA|USER|ROLE)\b", stmt_upper):
|
|
190
|
+
return HISTORY_AFFECTING
|
|
191
|
+
return SAFE_WRITE
|
|
192
|
+
|
|
193
|
+
if lead == "CREATE":
|
|
194
|
+
if re.match(r"CREATE\s+(USER|ROLE)\b", stmt_upper):
|
|
195
|
+
return HISTORY_AFFECTING
|
|
196
|
+
return SAFE_WRITE
|
|
197
|
+
|
|
198
|
+
if lead in _READ_LEADERS:
|
|
199
|
+
return READ
|
|
200
|
+
if lead in _HISTORY_LEADERS:
|
|
201
|
+
return HISTORY_AFFECTING
|
|
202
|
+
if lead in _SAFE_WRITE_LEADERS:
|
|
203
|
+
return SAFE_WRITE
|
|
204
|
+
|
|
205
|
+
# Unknown leading keyword: not provably read -> require the mutation gate.
|
|
206
|
+
return SAFE_WRITE
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def classify_sql(sql):
|
|
210
|
+
"""Classify a (possibly multi-statement) SQL string at its max severity."""
|
|
211
|
+
statements = split_statements(strip_sql_comments(sql))
|
|
212
|
+
if not statements:
|
|
213
|
+
return READ
|
|
214
|
+
worst = READ
|
|
215
|
+
for stmt in statements:
|
|
216
|
+
cls = classify_statement(stmt)
|
|
217
|
+
if _SEVERITY[cls] > _SEVERITY[worst]:
|
|
218
|
+
worst = cls
|
|
219
|
+
return worst
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def gate_decision(sql, allow_mutation, branch, maturity):
|
|
223
|
+
"""Decide whether a SQL string may run, and why.
|
|
224
|
+
|
|
225
|
+
Returns (allowed: bool, verb_class: str, reason: str).
|
|
226
|
+
|
|
227
|
+
Rules (blueprint §3):
|
|
228
|
+
* history-affecting -> always refused (recommend-only).
|
|
229
|
+
* pre-GA maturity (alpha/experimental) -> only `read` is allowed at all.
|
|
230
|
+
* safe-write -> allowed only with allow_mutation AND a non-main,
|
|
231
|
+
non-empty branch (agent/<task>).
|
|
232
|
+
* read -> always allowed.
|
|
233
|
+
"""
|
|
234
|
+
verb = classify_sql(sql)
|
|
235
|
+
maturity = (maturity or "ga").strip().lower()
|
|
236
|
+
pre_ga = maturity in ("alpha", "experimental")
|
|
237
|
+
|
|
238
|
+
if verb == HISTORY_AFFECTING:
|
|
239
|
+
return (False, verb,
|
|
240
|
+
"history-affecting statements are recommend-only — a human runs "
|
|
241
|
+
"merge/push/--force/reset --hard/branch-delete/DROP DATABASE, never "
|
|
242
|
+
"an agent. Surface the command for the operator instead of executing it.")
|
|
243
|
+
|
|
244
|
+
if verb == SAFE_WRITE:
|
|
245
|
+
if pre_ga:
|
|
246
|
+
return (False, verb,
|
|
247
|
+
f"maturity '{maturity}' is pre-GA: this flavor is read-only until "
|
|
248
|
+
"dolt-watch reports it has reached GA. No writes, even on a branch.")
|
|
249
|
+
if not allow_mutation:
|
|
250
|
+
return (False, verb,
|
|
251
|
+
"safe-write requires --allow-mutation (the agent default is read-only).")
|
|
252
|
+
b = (branch or "").strip()
|
|
253
|
+
if not b or b.lower() == "main":
|
|
254
|
+
return (False, verb,
|
|
255
|
+
"safe-write must target an agent-owned branch (--branch agent/<task>), "
|
|
256
|
+
"never `main`.")
|
|
257
|
+
return (True, verb, f"safe-write permitted on branch '{b}'.")
|
|
258
|
+
|
|
259
|
+
return (True, verb, "read.")
|