@jaguilar87/gaia 5.0.8 → 5.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +13 -0
- package/bin/README.md +10 -3
- package/bin/cli/_install_helpers.py +0 -3
- package/bin/cli/approvals.py +341 -238
- package/bin/cli/brief.py +45 -4
- package/bin/cli/cleanup.py +304 -4
- package/bin/cli/doctor.py +1 -5
- package/bin/cli/uninstall.py +20 -0
- package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
- package/dist/gaia-ops/hooks/adapters/claude_code.py +19 -85
- package/dist/gaia-ops/hooks/modules/context/context_injector.py +23 -7
- package/dist/gaia-ops/hooks/modules/core/plugin_setup.py +0 -5
- package/dist/gaia-ops/hooks/modules/events/event_writer.py +63 -96
- package/dist/gaia-ops/hooks/modules/security/__init__.py +0 -2
- package/dist/gaia-ops/hooks/modules/security/approval_cleanup.py +238 -69
- package/dist/gaia-ops/hooks/modules/security/approval_grants.py +506 -1103
- package/dist/gaia-ops/hooks/modules/security/capability_classes.py +83 -6
- package/dist/gaia-ops/hooks/modules/security/inline_ast_analyzer.py +237 -0
- package/dist/gaia-ops/hooks/modules/security/mutative_verbs.py +434 -1
- package/dist/gaia-ops/hooks/modules/session/pending_scanner.py +150 -90
- package/dist/gaia-ops/hooks/modules/session/session_manifest.py +257 -28
- package/dist/gaia-ops/hooks/modules/tools/bash_validator.py +177 -20
- package/dist/gaia-ops/hooks/post_compact.py +1 -0
- package/dist/gaia-ops/hooks/pre_compact.py +1 -0
- package/dist/gaia-ops/hooks/user_prompt_submit.py +20 -0
- package/dist/gaia-ops/skills/agent-approval-protocol/SKILL.md +27 -7
- package/dist/gaia-ops/skills/agent-approval-protocol/reference.md +11 -6
- package/dist/gaia-ops/skills/gaia-patterns/reference.md +2 -2
- package/dist/gaia-ops/skills/orchestrator-present-approval/SKILL.md +69 -28
- package/dist/gaia-ops/skills/orchestrator-present-approval/reference.md +16 -3
- package/dist/gaia-ops/skills/orchestrator-present-approval/template.md +10 -5
- package/dist/gaia-ops/skills/pending-approvals/SKILL.md +16 -11
- package/dist/gaia-ops/skills/security-tiers/SKILL.md +1 -1
- package/dist/gaia-ops/skills/subagent-request-approval/SKILL.md +20 -6
- package/dist/gaia-ops/skills/subagent-request-approval/reference.md +23 -15
- package/dist/gaia-ops/tools/migration/README.md +10 -12
- package/dist/gaia-ops/tools/scan/orchestrator.py +194 -10
- package/dist/gaia-ops/tools/scan/tests/test_integration.py +1 -2
- package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
- package/dist/gaia-security/hooks/adapters/claude_code.py +19 -85
- package/dist/gaia-security/hooks/modules/context/context_injector.py +23 -7
- package/dist/gaia-security/hooks/modules/core/plugin_setup.py +0 -5
- package/dist/gaia-security/hooks/modules/events/event_writer.py +63 -96
- package/dist/gaia-security/hooks/modules/security/__init__.py +0 -2
- package/dist/gaia-security/hooks/modules/security/approval_cleanup.py +238 -69
- package/dist/gaia-security/hooks/modules/security/approval_grants.py +506 -1103
- package/dist/gaia-security/hooks/modules/security/capability_classes.py +83 -6
- package/dist/gaia-security/hooks/modules/security/inline_ast_analyzer.py +237 -0
- package/dist/gaia-security/hooks/modules/security/mutative_verbs.py +434 -1
- package/dist/gaia-security/hooks/modules/session/pending_scanner.py +150 -90
- package/dist/gaia-security/hooks/modules/session/session_manifest.py +257 -28
- package/dist/gaia-security/hooks/modules/tools/bash_validator.py +177 -20
- package/dist/gaia-security/hooks/user_prompt_submit.py +20 -0
- package/gaia/approvals/store.py +87 -9
- package/gaia/briefs/__init__.py +4 -0
- package/gaia/briefs/store.py +91 -0
- package/gaia/store/schema.sql +38 -1
- package/gaia/store/writer.py +400 -0
- package/hooks/adapters/claude_code.py +19 -85
- package/hooks/elicitation_result.py +20 -75
- package/hooks/modules/context/context_injector.py +23 -7
- package/hooks/modules/core/plugin_setup.py +0 -5
- package/hooks/modules/events/event_writer.py +63 -96
- package/hooks/modules/security/__init__.py +0 -2
- package/hooks/modules/security/approval_cleanup.py +238 -69
- package/hooks/modules/security/approval_grants.py +506 -1103
- package/hooks/modules/security/capability_classes.py +83 -6
- package/hooks/modules/security/inline_ast_analyzer.py +237 -0
- package/hooks/modules/security/mutative_verbs.py +434 -1
- package/hooks/modules/session/pending_scanner.py +150 -90
- package/hooks/modules/session/session_manifest.py +257 -28
- package/hooks/modules/tools/bash_validator.py +177 -20
- package/hooks/post_compact.py +1 -0
- package/hooks/pre_compact.py +1 -0
- package/hooks/user_prompt_submit.py +20 -0
- package/package.json +1 -1
- package/pyproject.toml +20 -1
- package/scripts/bootstrap_database.sh +66 -17
- package/scripts/migrations/README.md +26 -14
- package/scripts/migrations/schema.checksum +2 -2
- package/scripts/migrations/v18_to_v19.sql +36 -0
- package/scripts/migrations/v19_to_v20.sql +20 -0
- package/skills/agent-approval-protocol/SKILL.md +27 -7
- package/skills/agent-approval-protocol/reference.md +11 -6
- package/skills/gaia-patterns/reference.md +2 -2
- package/skills/orchestrator-present-approval/SKILL.md +69 -28
- package/skills/orchestrator-present-approval/reference.md +16 -3
- package/skills/orchestrator-present-approval/template.md +10 -5
- package/skills/pending-approvals/SKILL.md +16 -11
- package/skills/security-tiers/SKILL.md +1 -1
- package/skills/subagent-request-approval/SKILL.md +20 -6
- package/skills/subagent-request-approval/reference.md +23 -15
- package/tools/migration/README.md +10 -12
- package/tools/scan/orchestrator.py +194 -10
- package/tools/scan/tests/test_integration.py +1 -2
- package/bin/cli/plans.py +0 -517
- package/dist/gaia-ops/tools/context/deep_merge.py +0 -159
- package/dist/gaia-ops/tools/migration/migrate_04_harness_events.py +0 -132
- package/dist/gaia-ops/tools/migration/migrate_04_harness_events.sh +0 -23
- package/dist/gaia-ops/tools/scan/merge.py +0 -213
- package/dist/gaia-ops/tools/scan/tests/test_merge.py +0 -269
- package/tools/context/deep_merge.py +0 -159
- package/tools/migration/migrate_04_harness_events.py +0 -132
- package/tools/migration/migrate_04_harness_events.sh +0 -23
- package/tools/scan/merge.py +0 -213
- package/tools/scan/tests/test_merge.py +0 -269
|
@@ -52,8 +52,12 @@ except ImportError:
|
|
|
52
52
|
|
|
53
53
|
try:
|
|
54
54
|
from .inline_ast_analyzer import analyze_python_inline as _analyze_python_inline
|
|
55
|
+
from .inline_ast_analyzer import (
|
|
56
|
+
is_provably_read_only_python as _is_provably_read_only_python,
|
|
57
|
+
)
|
|
55
58
|
except ImportError: # pragma: no cover -- defensive
|
|
56
59
|
_analyze_python_inline = None
|
|
60
|
+
_is_provably_read_only_python = None
|
|
57
61
|
logging.getLogger(__name__).warning(
|
|
58
62
|
"inline_ast_analyzer.analyze_python_inline not importable; "
|
|
59
63
|
"AST-based Python inline analysis disabled (falling back to regex)"
|
|
@@ -289,6 +293,12 @@ COMMAND_SUBCOMMAND_TIER_EXCEPTIONS: Dict[Tuple[str, str], str] = {
|
|
|
289
293
|
# exemption is explicit and carries the same DENY-verb guard as `gaia brief`:
|
|
290
294
|
# `gaia plan delete` (whole-record destruction) stays T3.
|
|
291
295
|
("gaia", "plan"): CATEGORY_READ_ONLY,
|
|
296
|
+
# `gaia task <verb>` (add/set-status/reorder/show/list): local task-lifecycle
|
|
297
|
+
# bookkeeping in gaia.db — reversible status transitions, no external effects,
|
|
298
|
+
# mirrors the brief/ac/plan exemptions. `gaia task remove` (irreversible row
|
|
299
|
+
# deletion) stays T3 via the per-group deny-verbs guard in
|
|
300
|
+
# COMMAND_SUBCOMMAND_EXTRA_DENY_VERBS below.
|
|
301
|
+
("gaia", "task"): CATEGORY_READ_ONLY,
|
|
292
302
|
}
|
|
293
303
|
|
|
294
304
|
# Verbs that stay gated even under an excepted group above. The exception
|
|
@@ -299,6 +309,18 @@ COMMAND_SUBCOMMAND_EXCEPTION_DENY_VERBS: FrozenSet[str] = frozenset({
|
|
|
299
309
|
"delete", "destroy", "purge", "wipe", "drop", "shred", "erase",
|
|
300
310
|
})
|
|
301
311
|
|
|
312
|
+
# Per-group EXTRA deny verbs that augment the global set above for specific
|
|
313
|
+
# (base_cmd, subcommand) pairs. Use this when a verb is destructive for one
|
|
314
|
+
# group but is a legitimate reversible bookkeeping operation for another
|
|
315
|
+
# (e.g., `gaia ac remove` removes a single reversible AC row and must stay
|
|
316
|
+
# non-T3, but `gaia task remove` deletes the task record permanently and must
|
|
317
|
+
# stay T3). The enforcement logic ORs the global set with this per-group set.
|
|
318
|
+
COMMAND_SUBCOMMAND_EXTRA_DENY_VERBS: Dict[Tuple[str, str], FrozenSet[str]] = {
|
|
319
|
+
# `gaia task remove` is an irreversible row deletion (no un-delete in the
|
|
320
|
+
# tasks store), unlike `gaia ac remove` (AC rows can be re-added).
|
|
321
|
+
("gaia", "task"): frozenset({"remove"}),
|
|
322
|
+
}
|
|
323
|
+
|
|
302
324
|
|
|
303
325
|
# ============================================================================
|
|
304
326
|
# PRINCIPLE: consent-REDUCING operations are not T3.
|
|
@@ -367,6 +389,93 @@ _PYTHON_INTERPRETERS: FrozenSet[str] = frozenset({
|
|
|
367
389
|
"python3.10", "python3.11", "python3.12", "python3.13",
|
|
368
390
|
})
|
|
369
391
|
|
|
392
|
+
# ---------------------------------------------------------------------------
|
|
393
|
+
# Script-file interpreters (Step 3b2)
|
|
394
|
+
# ---------------------------------------------------------------------------
|
|
395
|
+
# Interpreters that take a SCRIPT FILE as a positional argument
|
|
396
|
+
# (``python3 deploy.py``, ``bash setup.sh``, ``node migrate.js``). Without
|
|
397
|
+
# this set the verb scanner sees only the filename token -- which carries a
|
|
398
|
+
# ``.`` and so is rejected as a non-subcommand -- and the command slips through
|
|
399
|
+
# as safe by elimination, executing the file's mutations without approval.
|
|
400
|
+
# The fix reads the file and classifies it by REAL invocation (AST for Python,
|
|
401
|
+
# the blocked/mutative regex layer for shells and other interpreters), never by
|
|
402
|
+
# the bare ``<interp> <file>`` shape. ``ruby``/``perl``/``php``/``node`` have
|
|
403
|
+
# no vendored AST, so their files go through the same regex layer as shells.
|
|
404
|
+
_SCRIPT_FILE_INTERPRETERS: FrozenSet[str] = frozenset({
|
|
405
|
+
"python", "python3",
|
|
406
|
+
"python3.10", "python3.11", "python3.12", "python3.13",
|
|
407
|
+
"bash", "sh", "zsh", "dash", "ksh",
|
|
408
|
+
"node", "ruby", "perl", "php",
|
|
409
|
+
})
|
|
410
|
+
|
|
411
|
+
# File extensions whose interpreter is implied by ``./script`` (no explicit
|
|
412
|
+
# interpreter token). Maps the extension to the analysis lane used for its
|
|
413
|
+
# content: "python" routes through the AST analyzer, "shell" through the
|
|
414
|
+
# blocked/mutative regex layer.
|
|
415
|
+
_SHEBANG_EXT_LANES: Dict[str, str] = {
|
|
416
|
+
".py": "python",
|
|
417
|
+
".sh": "shell",
|
|
418
|
+
".bash": "shell",
|
|
419
|
+
".zsh": "shell",
|
|
420
|
+
".js": "shell",
|
|
421
|
+
".mjs": "shell",
|
|
422
|
+
".cjs": "shell",
|
|
423
|
+
".rb": "shell",
|
|
424
|
+
".pl": "shell",
|
|
425
|
+
".php": "shell",
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
# Cap on bytes read from a script file during classification. A script larger
|
|
429
|
+
# than this is unusual for the inline-evasion case and reading it in full would
|
|
430
|
+
# add latency to every hook invocation; we read a bounded prefix, which is
|
|
431
|
+
# enough to catch the mutative calls an evasion script front-loads.
|
|
432
|
+
_MAX_SCRIPT_READ_BYTES = 256 * 1024
|
|
433
|
+
|
|
434
|
+
# Interpreter flags that CONSUME the next token as their value AND mean the
|
|
435
|
+
# invocation has no script-file positional (the payload is inline code or a
|
|
436
|
+
# module name). When one of these is present the script-file lane defers --
|
|
437
|
+
# the inline path (Step 3b) or ordinary verb scanning handles the command.
|
|
438
|
+
# python -c <code> / -m <module> bash -c <code> node -e <code>
|
|
439
|
+
_INTERP_NON_SCRIPT_VALUE_FLAGS: Dict[str, FrozenSet[str]] = {
|
|
440
|
+
"python": frozenset({"-c", "-m"}),
|
|
441
|
+
"python3": frozenset({"-c", "-m"}),
|
|
442
|
+
"python3.10": frozenset({"-c", "-m"}),
|
|
443
|
+
"python3.11": frozenset({"-c", "-m"}),
|
|
444
|
+
"python3.12": frozenset({"-c", "-m"}),
|
|
445
|
+
"python3.13": frozenset({"-c", "-m"}),
|
|
446
|
+
"bash": frozenset({"-c"}),
|
|
447
|
+
"sh": frozenset({"-c"}),
|
|
448
|
+
"zsh": frozenset({"-c"}),
|
|
449
|
+
"dash": frozenset({"-c"}),
|
|
450
|
+
"ksh": frozenset({"-c"}),
|
|
451
|
+
"node": frozenset({"-e", "--eval", "-p", "--print", "-r", "--require"}),
|
|
452
|
+
"ruby": frozenset({"-e"}),
|
|
453
|
+
"perl": frozenset({"-e", "-E"}),
|
|
454
|
+
"php": frozenset({"-r"}),
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
# ---------------------------------------------------------------------------
|
|
458
|
+
# Python ``-m <package-manager>`` re-dispatch (Brief 91, AC-7)
|
|
459
|
+
# ---------------------------------------------------------------------------
|
|
460
|
+
# ``python3 -m pip install x`` is the SAME operation as ``pip install x`` -- the
|
|
461
|
+
# ``-m`` form merely runs the package manager as a module of the interpreter.
|
|
462
|
+
# Before this guard, the interpreter (``python3``) was the base command, the
|
|
463
|
+
# module name (``pip``) was swallowed into flag_tokens as the value of ``-m``,
|
|
464
|
+
# and classification limped along ONLY when a generic verb (``install``)
|
|
465
|
+
# happened to remain in MUTATIVE_VERBS. That is accidental, not robust:
|
|
466
|
+
# * ``python3 -m poetry add x`` slipped through (``add`` was removed from
|
|
467
|
+
# MUTATIVE_VERBS as a git-add false-positive), bypassing T3 entirely.
|
|
468
|
+
# * the command reported cli_family=runtime, never recognized as ``package``.
|
|
469
|
+
# The fix recognizes ``<python> -m <pkg-mgr> <args...>`` and RE-DISPATCHES it as
|
|
470
|
+
# ``<pkg-mgr> <args...>`` so it classifies identically to the direct CLI form:
|
|
471
|
+
# ``install``/``uninstall``/``add`` -> MUTATIVE/T3, ``list``/``download`` ->
|
|
472
|
+
# READ_ONLY (matching real pip semantics). Scoped to the package-manager
|
|
473
|
+
# modules below so ``python3 -m pytest`` / ``python3 -m http.server`` are NOT
|
|
474
|
+
# rerouted -- they fall through to ordinary detection unchanged.
|
|
475
|
+
_PY_MODULE_PACKAGE_MANAGERS: FrozenSet[str] = frozenset({
|
|
476
|
+
"pip", "pip3", "pipenv", "poetry", "uv",
|
|
477
|
+
})
|
|
478
|
+
|
|
370
479
|
# ---------------------------------------------------------------------------
|
|
371
480
|
# Layer 1: Shell command extraction from string literals
|
|
372
481
|
# ---------------------------------------------------------------------------
|
|
@@ -1055,6 +1164,30 @@ def detect_mutative_command(command: str) -> MutativeResult:
|
|
|
1055
1164
|
reason=cap.reason,
|
|
1056
1165
|
)
|
|
1057
1166
|
|
|
1167
|
+
# --- Step 1c-py: Python ``-m <pkg-mgr>`` re-dispatch (Brief 91, AC-7) ---
|
|
1168
|
+
# ``python3 -m pip install x`` is the same operation as ``pip install x``.
|
|
1169
|
+
# Recognize the ``<python> -m <package-manager> <args...>`` shape and re-run
|
|
1170
|
+
# detection on the rewritten ``<package-manager> <args...>`` command so it
|
|
1171
|
+
# classifies IDENTICALLY to the direct CLI form (install/uninstall -> T3,
|
|
1172
|
+
# list/download -> read-only). Returns None when the command is not a
|
|
1173
|
+
# package-manager module invocation, so detection continues unchanged --
|
|
1174
|
+
# ``python3 -m pytest`` and ``python3 -m http.server`` are NOT rerouted.
|
|
1175
|
+
py_module_result = _check_python_module_runner(base_cmd, semantics)
|
|
1176
|
+
if py_module_result is not None:
|
|
1177
|
+
return py_module_result
|
|
1178
|
+
|
|
1179
|
+
# --- Step 1d: Script-file analysis (python3 deploy.py, bash setup.sh, ./x) ---
|
|
1180
|
+
# An interpreter invoked with a script FILE as a positional argument, or a
|
|
1181
|
+
# direct ``./script`` invocation, hides its mutations inside the file --
|
|
1182
|
+
# the verb scanner sees only the filename. Read the referenced file and
|
|
1183
|
+
# classify it by REAL invocation, the same standard the inline -c path meets.
|
|
1184
|
+
# Placed before the single-token early return so a bare ``./deploy.sh`` (one
|
|
1185
|
+
# token) is still inspected. Returns None when the command is not a
|
|
1186
|
+
# recognized script-file shape, so detection continues normally.
|
|
1187
|
+
script_result = _check_script_file(command, base_cmd, family, semantics)
|
|
1188
|
+
if script_result is not None:
|
|
1189
|
+
return script_result
|
|
1190
|
+
|
|
1058
1191
|
# --- Step 2: Single-token command (no verb to extract) ---
|
|
1059
1192
|
if len(tokens) == 1:
|
|
1060
1193
|
return MutativeResult(
|
|
@@ -1191,10 +1324,15 @@ def detect_mutative_command(command: str) -> MutativeResult:
|
|
|
1191
1324
|
if len(semantics.non_flag_tokens) > 1 else ""
|
|
1192
1325
|
)
|
|
1193
1326
|
# Whole-record destruction (delete/destroy/...) stays gated even within
|
|
1194
|
-
# an excepted group; only reversible bookkeeping is exempted.
|
|
1327
|
+
# an excepted group; only reversible bookkeeping is exempted. Also
|
|
1328
|
+
# check per-group extra deny verbs (COMMAND_SUBCOMMAND_EXTRA_DENY_VERBS)
|
|
1329
|
+
# for verbs that are destructive in one group but reversible in another.
|
|
1330
|
+
_extra_deny = COMMAND_SUBCOMMAND_EXTRA_DENY_VERBS.get(subcommand_key, frozenset())
|
|
1195
1331
|
verb_is_destructive = (
|
|
1196
1332
|
group_verb.split("-", 1)[0] in COMMAND_SUBCOMMAND_EXCEPTION_DENY_VERBS
|
|
1197
1333
|
or group_verb in COMMAND_SUBCOMMAND_EXCEPTION_DENY_VERBS
|
|
1334
|
+
or group_verb.split("-", 1)[0] in _extra_deny
|
|
1335
|
+
or group_verb in _extra_deny
|
|
1198
1336
|
)
|
|
1199
1337
|
if subcommand_key in COMMAND_SUBCOMMAND_TIER_EXCEPTIONS:
|
|
1200
1338
|
if verb_is_destructive:
|
|
@@ -1562,6 +1700,274 @@ def _extract_python_payload(command: str, base_cmd: str) -> str:
|
|
|
1562
1700
|
return ""
|
|
1563
1701
|
|
|
1564
1702
|
|
|
1703
|
+
def _check_python_module_runner(
|
|
1704
|
+
base_cmd: str, semantics: "CommandSemantics",
|
|
1705
|
+
) -> "Optional[MutativeResult]":
|
|
1706
|
+
"""Re-dispatch ``python -m <pkg-mgr> ...`` as the package-manager command.
|
|
1707
|
+
|
|
1708
|
+
Closes the AC-7 evasion (Brief 91): ``python3 -m pip install x`` is the same
|
|
1709
|
+
operation as ``pip install x``, but the verb scanner sees ``python3`` as the
|
|
1710
|
+
base command and the module name (``pip``) gets absorbed into flag_tokens as
|
|
1711
|
+
the value of ``-m`` -- so the command was classified only by whatever generic
|
|
1712
|
+
verb happened to follow, missing cases like ``python3 -m poetry add x``.
|
|
1713
|
+
|
|
1714
|
+
This helper recognizes ``<python> [interp-flags] -m <pkg-mgr> <args...>``,
|
|
1715
|
+
rewrites it to ``<pkg-mgr> <args...>``, and re-runs ``detect_mutative_command``
|
|
1716
|
+
on the rewrite so the result is IDENTICAL to the direct CLI form. The verb
|
|
1717
|
+
``-m`` consumes the immediately following token as the module name (POSIX
|
|
1718
|
+
short-flag-with-value), which ``analyze_command`` already lands as
|
|
1719
|
+
``flag_tokens[i+1]``; here we read the module directly from the raw token
|
|
1720
|
+
stream so the re-dispatch is robust to interpreter switches before ``-m``.
|
|
1721
|
+
|
|
1722
|
+
Returns ``None`` when the command is not a recognized package-manager module
|
|
1723
|
+
invocation, so ordinary detection continues unchanged (``python3 -m pytest``,
|
|
1724
|
+
``python3 -m http.server``, ``python3 -m pip`` with no args).
|
|
1725
|
+
"""
|
|
1726
|
+
if base_cmd not in _PYTHON_INTERPRETERS:
|
|
1727
|
+
return None
|
|
1728
|
+
|
|
1729
|
+
raw_tokens = semantics.tokens
|
|
1730
|
+
# Walk args after the interpreter; find the ``-m`` flag and the module token
|
|
1731
|
+
# it consumes. Standalone interpreter switches (-u, -O, -E, ...) are skipped.
|
|
1732
|
+
module = None
|
|
1733
|
+
module_idx = None
|
|
1734
|
+
for i in range(1, len(raw_tokens)):
|
|
1735
|
+
if raw_tokens[i] == "-m":
|
|
1736
|
+
if i + 1 < len(raw_tokens):
|
|
1737
|
+
module = raw_tokens[i + 1]
|
|
1738
|
+
module_idx = i + 1
|
|
1739
|
+
break
|
|
1740
|
+
# A non-flag token before any ``-m`` means a script-file / positional
|
|
1741
|
+
# invocation, not ``-m`` module mode -- defer to the script-file lane.
|
|
1742
|
+
if not raw_tokens[i].startswith("-"):
|
|
1743
|
+
return None
|
|
1744
|
+
|
|
1745
|
+
if module is None or module_idx is None:
|
|
1746
|
+
return None
|
|
1747
|
+
if module.lower() not in _PY_MODULE_PACKAGE_MANAGERS:
|
|
1748
|
+
return None
|
|
1749
|
+
|
|
1750
|
+
# Rewrite ``python3 [flags] -m <pkg-mgr> <rest...>`` -> ``<pkg-mgr> <rest...>``
|
|
1751
|
+
# and re-classify. ``shlex.quote`` keeps argument boundaries intact so a
|
|
1752
|
+
# rewritten command tokenizes the same way the direct CLI form would.
|
|
1753
|
+
import shlex
|
|
1754
|
+
rest = raw_tokens[module_idx + 1:]
|
|
1755
|
+
rewritten = " ".join(shlex.quote(t) for t in (module, *rest))
|
|
1756
|
+
inner = detect_mutative_command(rewritten)
|
|
1757
|
+
# Re-wrap the reason so the audit trail shows the re-dispatch explicitly,
|
|
1758
|
+
# but preserve the inner classification verbatim (category, verb, flags).
|
|
1759
|
+
return MutativeResult(
|
|
1760
|
+
is_mutative=inner.is_mutative,
|
|
1761
|
+
category=inner.category,
|
|
1762
|
+
verb=inner.verb,
|
|
1763
|
+
dangerous_flags=inner.dangerous_flags,
|
|
1764
|
+
cli_family=inner.cli_family,
|
|
1765
|
+
confidence=inner.confidence,
|
|
1766
|
+
reason=(
|
|
1767
|
+
f"'{base_cmd} -m {module}' re-dispatched as '{module}': {inner.reason}"
|
|
1768
|
+
),
|
|
1769
|
+
)
|
|
1770
|
+
|
|
1771
|
+
|
|
1772
|
+
def _resolve_script_argument(
|
|
1773
|
+
base_cmd: str, semantics: "CommandSemantics",
|
|
1774
|
+
) -> "Optional[Tuple[str, str]]":
|
|
1775
|
+
"""Identify a script-file invocation and return ``(path, lane)``.
|
|
1776
|
+
|
|
1777
|
+
Two shapes are recognized:
|
|
1778
|
+
|
|
1779
|
+
* ``<interpreter> <script-file>`` -- the first positional argument after a
|
|
1780
|
+
known interpreter, whose lane (``"python"`` or ``"shell"``) is decided by
|
|
1781
|
+
the interpreter, not the filename.
|
|
1782
|
+
* ``./script`` / ``path/to/script`` -- a direct executable invocation whose
|
|
1783
|
+
lane is inferred from the file extension via ``_SHEBANG_EXT_LANES``.
|
|
1784
|
+
|
|
1785
|
+
Returns ``None`` when the command is not a script-file invocation, so the
|
|
1786
|
+
caller continues with ordinary verb detection.
|
|
1787
|
+
"""
|
|
1788
|
+
raw_tokens = semantics.tokens
|
|
1789
|
+
if not raw_tokens:
|
|
1790
|
+
return None
|
|
1791
|
+
|
|
1792
|
+
if base_cmd in _SCRIPT_FILE_INTERPRETERS:
|
|
1793
|
+
lane = "python" if base_cmd in _PYTHON_INTERPRETERS else "shell"
|
|
1794
|
+
defer_flags = _INTERP_NON_SCRIPT_VALUE_FLAGS.get(base_cmd, frozenset())
|
|
1795
|
+
# Walk the args (original casing) and return the first true positional
|
|
1796
|
+
# -- the script file. Standalone interpreter switches (-u, -O, -x, ...)
|
|
1797
|
+
# are skipped; flags that consume the next token as inline code or a
|
|
1798
|
+
# module name (-c, -m, -e, ...) mean there is NO script file, so we
|
|
1799
|
+
# defer to the inline path / verb scanner by returning None. The stdin
|
|
1800
|
+
# sentinel "-" likewise defers (heredoc path owns it).
|
|
1801
|
+
for token in raw_tokens[1:]:
|
|
1802
|
+
if token == "-":
|
|
1803
|
+
return None
|
|
1804
|
+
if token in defer_flags:
|
|
1805
|
+
return None
|
|
1806
|
+
if token.startswith("-"):
|
|
1807
|
+
continue
|
|
1808
|
+
return (token, lane)
|
|
1809
|
+
return None
|
|
1810
|
+
|
|
1811
|
+
# Direct ``./script`` or ``path/script.ext`` invocation: the executable
|
|
1812
|
+
# token IS the script. Use the original-case token so the path resolves
|
|
1813
|
+
# correctly on case-sensitive filesystems.
|
|
1814
|
+
invoked = raw_tokens[0]
|
|
1815
|
+
if "/" in invoked:
|
|
1816
|
+
for ext, lane in _SHEBANG_EXT_LANES.items():
|
|
1817
|
+
if invoked.endswith(ext):
|
|
1818
|
+
return (invoked, lane)
|
|
1819
|
+
return None
|
|
1820
|
+
|
|
1821
|
+
|
|
1822
|
+
def _read_script_content(path: str) -> "Optional[str]":
|
|
1823
|
+
"""Read a bounded prefix of a script file for content classification.
|
|
1824
|
+
|
|
1825
|
+
Returns ``None`` when the path cannot be resolved to a readable regular
|
|
1826
|
+
file -- the caller treats that as the conservative (mutative) case, because
|
|
1827
|
+
an interpreter pointed at an un-inspectable payload could do anything.
|
|
1828
|
+
"""
|
|
1829
|
+
import os
|
|
1830
|
+
|
|
1831
|
+
try:
|
|
1832
|
+
if not os.path.isfile(path):
|
|
1833
|
+
return None
|
|
1834
|
+
with open(path, "r", encoding="utf-8", errors="replace") as fh:
|
|
1835
|
+
return fh.read(_MAX_SCRIPT_READ_BYTES)
|
|
1836
|
+
except (OSError, ValueError):
|
|
1837
|
+
return None
|
|
1838
|
+
|
|
1839
|
+
|
|
1840
|
+
def _check_script_file(
|
|
1841
|
+
command: str, base_cmd: str, family: str, semantics: "CommandSemantics",
|
|
1842
|
+
) -> "Optional[MutativeResult]":
|
|
1843
|
+
"""Classify ``<interpreter> <file>`` / ``./script`` by the file's content.
|
|
1844
|
+
|
|
1845
|
+
Closes the file-argument evasion: the verb scanner only sees the filename,
|
|
1846
|
+
so a script that deletes files or calls the network would otherwise pass as
|
|
1847
|
+
safe by elimination. Classification is by REAL invocation, mirroring the
|
|
1848
|
+
inline ``-c`` path -- an analytic Python script with no mutative calls and a
|
|
1849
|
+
read-only shell script both stay non-mutative, so the existing
|
|
1850
|
+
overbroad-classification complaint is not reintroduced.
|
|
1851
|
+
|
|
1852
|
+
Returns ``None`` when the command is not a script-file invocation.
|
|
1853
|
+
"""
|
|
1854
|
+
resolved = _resolve_script_argument(base_cmd, semantics)
|
|
1855
|
+
if resolved is None:
|
|
1856
|
+
return None
|
|
1857
|
+
|
|
1858
|
+
script_path, lane = resolved
|
|
1859
|
+
content = _read_script_content(script_path)
|
|
1860
|
+
if content is None:
|
|
1861
|
+
# Conservative default: an interpreter invoked on a missing or
|
|
1862
|
+
# unreadable file is treated as mutative. We cannot prove the payload
|
|
1863
|
+
# is safe, and an un-inspectable executable payload requires consent.
|
|
1864
|
+
return MutativeResult(
|
|
1865
|
+
is_mutative=True,
|
|
1866
|
+
category=CATEGORY_MUTATIVE,
|
|
1867
|
+
verb="script-file-unreadable",
|
|
1868
|
+
cli_family=family,
|
|
1869
|
+
confidence="medium",
|
|
1870
|
+
reason=(
|
|
1871
|
+
f"Interpreter '{base_cmd}' invoked on script "
|
|
1872
|
+
f"'{script_path}' that is not a readable file -- cannot "
|
|
1873
|
+
f"verify the payload, requiring approval (conservative default)"
|
|
1874
|
+
),
|
|
1875
|
+
)
|
|
1876
|
+
|
|
1877
|
+
if lane == "python" and _analyze_python_inline is not None:
|
|
1878
|
+
ast_result = _analyze_python_inline(content)
|
|
1879
|
+
if ast_result.is_dangerous:
|
|
1880
|
+
return MutativeResult(
|
|
1881
|
+
is_mutative=True,
|
|
1882
|
+
category=CATEGORY_MUTATIVE,
|
|
1883
|
+
verb=ast_result.label,
|
|
1884
|
+
cli_family=family,
|
|
1885
|
+
confidence="high",
|
|
1886
|
+
reason=(
|
|
1887
|
+
f"Script '{script_path}' invokes {ast_result.detail} "
|
|
1888
|
+
f"({ast_result.category})"
|
|
1889
|
+
),
|
|
1890
|
+
)
|
|
1891
|
+
if not ast_result.parse_failed:
|
|
1892
|
+
return MutativeResult(
|
|
1893
|
+
is_mutative=False,
|
|
1894
|
+
category=CATEGORY_READ_ONLY,
|
|
1895
|
+
verb="script-file",
|
|
1896
|
+
cli_family=family,
|
|
1897
|
+
confidence="medium",
|
|
1898
|
+
reason=(
|
|
1899
|
+
f"Python script '{script_path}' has no mutative invocation "
|
|
1900
|
+
f"(AST analysis)"
|
|
1901
|
+
),
|
|
1902
|
+
)
|
|
1903
|
+
# parse_failed -> fall through to the shell/regex lane below.
|
|
1904
|
+
|
|
1905
|
+
return _classify_script_content_by_regex(content, script_path, family)
|
|
1906
|
+
|
|
1907
|
+
|
|
1908
|
+
def _classify_script_content_by_regex(
|
|
1909
|
+
content: str, script_path: str, family: str,
|
|
1910
|
+
) -> MutativeResult:
|
|
1911
|
+
"""Classify shell / non-Python script content via the existing regex layer.
|
|
1912
|
+
|
|
1913
|
+
No AST parser is vendored for bash, node, ruby, perl, or php (see
|
|
1914
|
+
``inline_ast_analyzer`` docstring), so content is scanned line-by-line with
|
|
1915
|
+
the same two engines the inline path uses:
|
|
1916
|
+
|
|
1917
|
+
* ``is_blocked_command`` -- catches permanently-blocked destructive lines
|
|
1918
|
+
(``rm -rf /``, ``dd of=/dev/sda``, ...).
|
|
1919
|
+
* ``detect_mutative_command`` -- the CLI-agnostic mutative engine, reused
|
|
1920
|
+
per logical line so a ``kubectl apply`` or ``curl -X POST`` inside the
|
|
1921
|
+
file is detected the same way it would be on the command line.
|
|
1922
|
+
|
|
1923
|
+
This reuses the existing layers rather than introducing a new parser, per
|
|
1924
|
+
the design constraint.
|
|
1925
|
+
"""
|
|
1926
|
+
for raw_line in content.splitlines():
|
|
1927
|
+
line = raw_line.strip()
|
|
1928
|
+
if not line or line.startswith("#"):
|
|
1929
|
+
continue
|
|
1930
|
+
|
|
1931
|
+
if _is_blocked_command is not None:
|
|
1932
|
+
blocked = _is_blocked_command(line)
|
|
1933
|
+
if blocked.is_blocked:
|
|
1934
|
+
return MutativeResult(
|
|
1935
|
+
is_mutative=True,
|
|
1936
|
+
category=CATEGORY_MUTATIVE,
|
|
1937
|
+
verb="script-blocked-cmd",
|
|
1938
|
+
cli_family=family,
|
|
1939
|
+
confidence="high",
|
|
1940
|
+
reason=(
|
|
1941
|
+
f"Script '{script_path}' contains blocked command: "
|
|
1942
|
+
f"{blocked.category}"
|
|
1943
|
+
),
|
|
1944
|
+
)
|
|
1945
|
+
|
|
1946
|
+
line_result = detect_mutative_command(line)
|
|
1947
|
+
if line_result.is_mutative:
|
|
1948
|
+
return MutativeResult(
|
|
1949
|
+
is_mutative=True,
|
|
1950
|
+
category=CATEGORY_MUTATIVE,
|
|
1951
|
+
verb=line_result.verb,
|
|
1952
|
+
dangerous_flags=line_result.dangerous_flags,
|
|
1953
|
+
cli_family=family,
|
|
1954
|
+
confidence=line_result.confidence,
|
|
1955
|
+
reason=(
|
|
1956
|
+
f"Script '{script_path}' line is mutative: "
|
|
1957
|
+
f"{line_result.reason}"
|
|
1958
|
+
),
|
|
1959
|
+
)
|
|
1960
|
+
|
|
1961
|
+
return MutativeResult(
|
|
1962
|
+
is_mutative=False,
|
|
1963
|
+
category=CATEGORY_READ_ONLY,
|
|
1964
|
+
verb="script-file",
|
|
1965
|
+
cli_family=family,
|
|
1966
|
+
confidence="medium",
|
|
1967
|
+
reason=f"Script '{script_path}' has no mutative or blocked line",
|
|
1968
|
+
)
|
|
1969
|
+
|
|
1970
|
+
|
|
1565
1971
|
def _check_inline_code(command: str, base_cmd: str, family: str, skip_length_check: bool = False) -> MutativeResult:
|
|
1566
1972
|
"""Check inline code for dangerous patterns.
|
|
1567
1973
|
|
|
@@ -1691,6 +2097,33 @@ def _layer3_length_check(
|
|
|
1691
2097
|
break
|
|
1692
2098
|
|
|
1693
2099
|
if not skip_length_check and len(code_portion) > MAX_NORMAL_INLINE_LENGTH:
|
|
2100
|
+
# AC-9 (Brief: endurecimiento-de-tests-del-security-core): the length
|
|
2101
|
+
# heuristic is a *proxy* for "too complex to vet"; it must not flag
|
|
2102
|
+
# inline code that is PROVABLY read-only just because it is long. For
|
|
2103
|
+
# Python payloads we re-parse the exact code and require a positive
|
|
2104
|
+
# allowlist match (import + SELECT/PRAGMA + print + local assignments,
|
|
2105
|
+
# no write call, no attribute/subscript assignment, no dynamic
|
|
2106
|
+
# dispatch). This is the inverse of analyze_python_inline's blocklist:
|
|
2107
|
+
# a mutation never classifies as read-only, so the exemption cannot
|
|
2108
|
+
# open a hole -- an AST-clean-but-mutating payload (``cur.execute(
|
|
2109
|
+
# 'INSERT ...')``, ``con.commit()``) is NOT provably read-only and
|
|
2110
|
+
# stays flagged. Non-Python payloads are never exempted (no AST).
|
|
2111
|
+
if (
|
|
2112
|
+
base_cmd in _PYTHON_INTERPRETERS
|
|
2113
|
+
and _is_provably_read_only_python is not None
|
|
2114
|
+
and _is_provably_read_only_python(_extract_python_payload(command, base_cmd))
|
|
2115
|
+
):
|
|
2116
|
+
return MutativeResult(
|
|
2117
|
+
is_mutative=False,
|
|
2118
|
+
category=CATEGORY_READ_ONLY,
|
|
2119
|
+
verb="inline-code-readonly",
|
|
2120
|
+
cli_family=family,
|
|
2121
|
+
confidence="medium",
|
|
2122
|
+
reason=(
|
|
2123
|
+
f"Inline Python is long ({len(code_portion)} chars) but "
|
|
2124
|
+
"provably read-only (allowlisted constructs only)"
|
|
2125
|
+
),
|
|
2126
|
+
)
|
|
1694
2127
|
return MutativeResult(
|
|
1695
2128
|
is_mutative=True,
|
|
1696
2129
|
category=CATEGORY_MUTATIVE,
|