@jaguilar87/gaia 5.0.9 → 5.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/CHANGELOG.md +17 -0
  4. package/bin/README.md +4 -2
  5. package/bin/cli/_install_helpers.py +0 -3
  6. package/bin/cli/ac.py +2 -2
  7. package/bin/cli/brief.py +42 -7
  8. package/bin/cli/cleanup.py +304 -4
  9. package/bin/cli/doctor.py +1 -5
  10. package/bin/cli/uninstall.py +20 -0
  11. package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
  12. package/dist/gaia-ops/hooks/adapters/__init__.py +12 -2
  13. package/dist/gaia-ops/hooks/adapters/base.py +122 -5
  14. package/dist/gaia-ops/hooks/adapters/claude_code.py +175 -53
  15. package/dist/gaia-ops/hooks/adapters/host_session.py +53 -0
  16. package/dist/gaia-ops/hooks/adapters/host_transcript.py +75 -0
  17. package/dist/gaia-ops/hooks/adapters/registry.py +87 -0
  18. package/dist/gaia-ops/hooks/adapters/types.py +134 -6
  19. package/dist/gaia-ops/hooks/modules/agents/transcript_reader.py +34 -71
  20. package/dist/gaia-ops/hooks/modules/core/hook_entry.py +6 -4
  21. package/dist/gaia-ops/hooks/modules/core/plugin_setup.py +0 -5
  22. package/dist/gaia-ops/hooks/modules/core/state.py +12 -10
  23. package/dist/gaia-ops/hooks/modules/security/approval_cleanup.py +2 -2
  24. package/dist/gaia-ops/hooks/modules/security/approval_grants.py +7 -7
  25. package/dist/gaia-ops/hooks/modules/security/capability_classes.py +83 -6
  26. package/dist/gaia-ops/hooks/modules/security/inline_ast_analyzer.py +237 -0
  27. package/dist/gaia-ops/hooks/modules/security/mutative_verbs.py +414 -3
  28. package/dist/gaia-ops/hooks/modules/session/pending_scanner.py +4 -3
  29. package/dist/gaia-ops/hooks/modules/session/session_manager.py +6 -15
  30. package/dist/gaia-ops/hooks/modules/session/session_manifest.py +3 -3
  31. package/dist/gaia-ops/hooks/modules/session/session_registry.py +3 -3
  32. package/dist/gaia-ops/hooks/modules/tools/bash_validator.py +191 -32
  33. package/dist/gaia-ops/hooks/modules/tools/hook_response.py +14 -12
  34. package/dist/gaia-ops/hooks/post_tool_use.py +2 -2
  35. package/dist/gaia-ops/hooks/pre_tool_use.py +9 -8
  36. package/dist/gaia-ops/hooks/stop_hook.py +2 -2
  37. package/dist/gaia-ops/hooks/subagent_start.py +2 -2
  38. package/dist/gaia-ops/hooks/subagent_stop.py +2 -2
  39. package/dist/gaia-ops/hooks/task_completed.py +2 -2
  40. package/dist/gaia-ops/skills/security-tiers/SKILL.md +1 -1
  41. package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
  42. package/dist/gaia-security/hooks/adapters/__init__.py +12 -2
  43. package/dist/gaia-security/hooks/adapters/base.py +122 -5
  44. package/dist/gaia-security/hooks/adapters/claude_code.py +175 -53
  45. package/dist/gaia-security/hooks/adapters/host_session.py +53 -0
  46. package/dist/gaia-security/hooks/adapters/host_transcript.py +75 -0
  47. package/dist/gaia-security/hooks/adapters/registry.py +87 -0
  48. package/dist/gaia-security/hooks/adapters/types.py +134 -6
  49. package/dist/gaia-security/hooks/modules/agents/transcript_reader.py +34 -71
  50. package/dist/gaia-security/hooks/modules/core/hook_entry.py +6 -4
  51. package/dist/gaia-security/hooks/modules/core/plugin_setup.py +0 -5
  52. package/dist/gaia-security/hooks/modules/core/state.py +12 -10
  53. package/dist/gaia-security/hooks/modules/security/approval_cleanup.py +2 -2
  54. package/dist/gaia-security/hooks/modules/security/approval_grants.py +7 -7
  55. package/dist/gaia-security/hooks/modules/security/capability_classes.py +83 -6
  56. package/dist/gaia-security/hooks/modules/security/inline_ast_analyzer.py +237 -0
  57. package/dist/gaia-security/hooks/modules/security/mutative_verbs.py +414 -3
  58. package/dist/gaia-security/hooks/modules/session/pending_scanner.py +4 -3
  59. package/dist/gaia-security/hooks/modules/session/session_manager.py +6 -15
  60. package/dist/gaia-security/hooks/modules/session/session_manifest.py +3 -3
  61. package/dist/gaia-security/hooks/modules/session/session_registry.py +3 -3
  62. package/dist/gaia-security/hooks/modules/tools/bash_validator.py +191 -32
  63. package/dist/gaia-security/hooks/modules/tools/hook_response.py +14 -12
  64. package/dist/gaia-security/hooks/post_tool_use.py +2 -2
  65. package/dist/gaia-security/hooks/pre_tool_use.py +9 -8
  66. package/dist/gaia-security/hooks/stop_hook.py +2 -2
  67. package/gaia/briefs/__init__.py +4 -0
  68. package/gaia/briefs/store.py +144 -1
  69. package/gaia/state/__init__.py +8 -1
  70. package/gaia/state/transitions.py +18 -4
  71. package/gaia/store/schema.sql +5 -1
  72. package/hooks/adapters/__init__.py +12 -2
  73. package/hooks/adapters/base.py +122 -5
  74. package/hooks/adapters/claude_code.py +175 -53
  75. package/hooks/adapters/host_session.py +53 -0
  76. package/hooks/adapters/host_transcript.py +75 -0
  77. package/hooks/adapters/registry.py +87 -0
  78. package/hooks/adapters/types.py +134 -6
  79. package/hooks/modules/agents/transcript_reader.py +34 -71
  80. package/hooks/modules/core/hook_entry.py +6 -4
  81. package/hooks/modules/core/plugin_setup.py +0 -5
  82. package/hooks/modules/core/state.py +12 -10
  83. package/hooks/modules/security/approval_cleanup.py +2 -2
  84. package/hooks/modules/security/approval_grants.py +7 -7
  85. package/hooks/modules/security/capability_classes.py +83 -6
  86. package/hooks/modules/security/inline_ast_analyzer.py +237 -0
  87. package/hooks/modules/security/mutative_verbs.py +414 -3
  88. package/hooks/modules/session/pending_scanner.py +4 -3
  89. package/hooks/modules/session/session_manager.py +6 -15
  90. package/hooks/modules/session/session_manifest.py +3 -3
  91. package/hooks/modules/session/session_registry.py +3 -3
  92. package/hooks/modules/tools/bash_validator.py +191 -32
  93. package/hooks/modules/tools/hook_response.py +14 -12
  94. package/hooks/post_tool_use.py +2 -2
  95. package/hooks/pre_tool_use.py +9 -8
  96. package/hooks/stop_hook.py +2 -2
  97. package/hooks/subagent_start.py +2 -2
  98. package/hooks/subagent_stop.py +2 -2
  99. package/hooks/task_completed.py +2 -2
  100. package/package.json +1 -1
  101. package/pyproject.toml +20 -1
  102. package/scripts/migrations/schema.checksum +2 -2
  103. package/scripts/migrations/v20_to_v21.sql +68 -0
  104. package/skills/security-tiers/SKILL.md +1 -1
@@ -52,8 +52,12 @@ except ImportError:
52
52
 
53
53
  try:
54
54
  from .inline_ast_analyzer import analyze_python_inline as _analyze_python_inline
55
+ from .inline_ast_analyzer import (
56
+ is_provably_read_only_python as _is_provably_read_only_python,
57
+ )
55
58
  except ImportError: # pragma: no cover -- defensive
56
59
  _analyze_python_inline = None
60
+ _is_provably_read_only_python = None
57
61
  logging.getLogger(__name__).warning(
58
62
  "inline_ast_analyzer.analyze_python_inline not importable; "
59
63
  "AST-based Python inline analysis disabled (falling back to regex)"
@@ -385,6 +389,93 @@ _PYTHON_INTERPRETERS: FrozenSet[str] = frozenset({
385
389
  "python3.10", "python3.11", "python3.12", "python3.13",
386
390
  })
387
391
 
392
+ # ---------------------------------------------------------------------------
393
+ # Script-file interpreters (Step 3b2)
394
+ # ---------------------------------------------------------------------------
395
+ # Interpreters that take a SCRIPT FILE as a positional argument
396
+ # (``python3 deploy.py``, ``bash setup.sh``, ``node migrate.js``). Without
397
+ # this set the verb scanner sees only the filename token -- which carries a
398
+ # ``.`` and so is rejected as a non-subcommand -- and the command slips through
399
+ # as safe by elimination, executing the file's mutations without approval.
400
+ # The fix reads the file and classifies it by REAL invocation (AST for Python,
401
+ # the blocked/mutative regex layer for shells and other interpreters), never by
402
+ # the bare ``<interp> <file>`` shape. ``ruby``/``perl``/``php``/``node`` have
403
+ # no vendored AST, so their files go through the same regex layer as shells.
404
+ _SCRIPT_FILE_INTERPRETERS: FrozenSet[str] = frozenset({
405
+ "python", "python3",
406
+ "python3.10", "python3.11", "python3.12", "python3.13",
407
+ "bash", "sh", "zsh", "dash", "ksh",
408
+ "node", "ruby", "perl", "php",
409
+ })
410
+
411
+ # File extensions whose interpreter is implied by ``./script`` (no explicit
412
+ # interpreter token). Maps the extension to the analysis lane used for its
413
+ # content: "python" routes through the AST analyzer, "shell" through the
414
+ # blocked/mutative regex layer.
415
+ _SHEBANG_EXT_LANES: Dict[str, str] = {
416
+ ".py": "python",
417
+ ".sh": "shell",
418
+ ".bash": "shell",
419
+ ".zsh": "shell",
420
+ ".js": "shell",
421
+ ".mjs": "shell",
422
+ ".cjs": "shell",
423
+ ".rb": "shell",
424
+ ".pl": "shell",
425
+ ".php": "shell",
426
+ }
427
+
428
+ # Cap on bytes read from a script file during classification. A script larger
429
+ # than this is unusual for the inline-evasion case and reading it in full would
430
+ # add latency to every hook invocation; we read a bounded prefix, which is
431
+ # enough to catch the mutative calls an evasion script front-loads.
432
+ _MAX_SCRIPT_READ_BYTES = 256 * 1024
433
+
434
+ # Interpreter flags that CONSUME the next token as their value AND mean the
435
+ # invocation has no script-file positional (the payload is inline code or a
436
+ # module name). When one of these is present the script-file lane defers --
437
+ # the inline path (Step 3b) or ordinary verb scanning handles the command.
438
+ # python -c <code> / -m <module> bash -c <code> node -e <code>
439
+ _INTERP_NON_SCRIPT_VALUE_FLAGS: Dict[str, FrozenSet[str]] = {
440
+ "python": frozenset({"-c", "-m"}),
441
+ "python3": frozenset({"-c", "-m"}),
442
+ "python3.10": frozenset({"-c", "-m"}),
443
+ "python3.11": frozenset({"-c", "-m"}),
444
+ "python3.12": frozenset({"-c", "-m"}),
445
+ "python3.13": frozenset({"-c", "-m"}),
446
+ "bash": frozenset({"-c"}),
447
+ "sh": frozenset({"-c"}),
448
+ "zsh": frozenset({"-c"}),
449
+ "dash": frozenset({"-c"}),
450
+ "ksh": frozenset({"-c"}),
451
+ "node": frozenset({"-e", "--eval", "-p", "--print", "-r", "--require"}),
452
+ "ruby": frozenset({"-e"}),
453
+ "perl": frozenset({"-e", "-E"}),
454
+ "php": frozenset({"-r"}),
455
+ }
456
+
457
+ # ---------------------------------------------------------------------------
458
+ # Python ``-m <package-manager>`` re-dispatch (Brief 91, AC-7)
459
+ # ---------------------------------------------------------------------------
460
+ # ``python3 -m pip install x`` is the SAME operation as ``pip install x`` -- the
461
+ # ``-m`` form merely runs the package manager as a module of the interpreter.
462
+ # Before this guard, the interpreter (``python3``) was the base command, the
463
+ # module name (``pip``) was swallowed into flag_tokens as the value of ``-m``,
464
+ # and classification limped along ONLY when a generic verb (``install``)
465
+ # happened to remain in MUTATIVE_VERBS. That is accidental, not robust:
466
+ # * ``python3 -m poetry add x`` slipped through (``add`` was removed from
467
+ # MUTATIVE_VERBS as a git-add false-positive), bypassing T3 entirely.
468
+ # * the command reported cli_family=runtime, never recognized as ``package``.
469
+ # The fix recognizes ``<python> -m <pkg-mgr> <args...>`` and RE-DISPATCHES it as
470
+ # ``<pkg-mgr> <args...>`` so it classifies identically to the direct CLI form:
471
+ # ``install``/``uninstall``/``add`` -> MUTATIVE/T3, ``list``/``download`` ->
472
+ # READ_ONLY (matching real pip semantics). Scoped to the package-manager
473
+ # modules below so ``python3 -m pytest`` / ``python3 -m http.server`` are NOT
474
+ # rerouted -- they fall through to ordinary detection unchanged.
475
+ _PY_MODULE_PACKAGE_MANAGERS: FrozenSet[str] = frozenset({
476
+ "pip", "pip3", "pipenv", "poetry", "uv",
477
+ })
478
+
388
479
  # ---------------------------------------------------------------------------
389
480
  # Layer 1: Shell command extraction from string literals
390
481
  # ---------------------------------------------------------------------------
@@ -1073,6 +1164,30 @@ def detect_mutative_command(command: str) -> MutativeResult:
1073
1164
  reason=cap.reason,
1074
1165
  )
1075
1166
 
1167
+ # --- Step 1c-py: Python ``-m <pkg-mgr>`` re-dispatch (Brief 91, AC-7) ---
1168
+ # ``python3 -m pip install x`` is the same operation as ``pip install x``.
1169
+ # Recognize the ``<python> -m <package-manager> <args...>`` shape and re-run
1170
+ # detection on the rewritten ``<package-manager> <args...>`` command so it
1171
+ # classifies IDENTICALLY to the direct CLI form (install/uninstall -> T3,
1172
+ # list/download -> read-only). Returns None when the command is not a
1173
+ # package-manager module invocation, so detection continues unchanged --
1174
+ # ``python3 -m pytest`` and ``python3 -m http.server`` are NOT rerouted.
1175
+ py_module_result = _check_python_module_runner(base_cmd, semantics)
1176
+ if py_module_result is not None:
1177
+ return py_module_result
1178
+
1179
+ # --- Step 1d: Script-file analysis (python3 deploy.py, bash setup.sh, ./x) ---
1180
+ # An interpreter invoked with a script FILE as a positional argument, or a
1181
+ # direct ``./script`` invocation, hides its mutations inside the file --
1182
+ # the verb scanner sees only the filename. Read the referenced file and
1183
+ # classify it by REAL invocation, the same standard the inline -c path meets.
1184
+ # Placed before the single-token early return so a bare ``./deploy.sh`` (one
1185
+ # token) is still inspected. Returns None when the command is not a
1186
+ # recognized script-file shape, so detection continues normally.
1187
+ script_result = _check_script_file(command, base_cmd, family, semantics)
1188
+ if script_result is not None:
1189
+ return script_result
1190
+
1076
1191
  # --- Step 2: Single-token command (no verb to extract) ---
1077
1192
  if len(tokens) == 1:
1078
1193
  return MutativeResult(
@@ -1585,6 +1700,274 @@ def _extract_python_payload(command: str, base_cmd: str) -> str:
1585
1700
  return ""
1586
1701
 
1587
1702
 
1703
+ def _check_python_module_runner(
1704
+ base_cmd: str, semantics: "CommandSemantics",
1705
+ ) -> "Optional[MutativeResult]":
1706
+ """Re-dispatch ``python -m <pkg-mgr> ...`` as the package-manager command.
1707
+
1708
+ Closes the AC-7 evasion (Brief 91): ``python3 -m pip install x`` is the same
1709
+ operation as ``pip install x``, but the verb scanner sees ``python3`` as the
1710
+ base command and the module name (``pip``) gets absorbed into flag_tokens as
1711
+ the value of ``-m`` -- so the command was classified only by whatever generic
1712
+ verb happened to follow, missing cases like ``python3 -m poetry add x``.
1713
+
1714
+ This helper recognizes ``<python> [interp-flags] -m <pkg-mgr> <args...>``,
1715
+ rewrites it to ``<pkg-mgr> <args...>``, and re-runs ``detect_mutative_command``
1716
+ on the rewrite so the result is IDENTICAL to the direct CLI form. The verb
1717
+ ``-m`` consumes the immediately following token as the module name (POSIX
1718
+ short-flag-with-value), which ``analyze_command`` already lands as
1719
+ ``flag_tokens[i+1]``; here we read the module directly from the raw token
1720
+ stream so the re-dispatch is robust to interpreter switches before ``-m``.
1721
+
1722
+ Returns ``None`` when the command is not a recognized package-manager module
1723
+ invocation, so ordinary detection continues unchanged (``python3 -m pytest``,
1724
+ ``python3 -m http.server``, ``python3 -m pip`` with no args).
1725
+ """
1726
+ if base_cmd not in _PYTHON_INTERPRETERS:
1727
+ return None
1728
+
1729
+ raw_tokens = semantics.tokens
1730
+ # Walk args after the interpreter; find the ``-m`` flag and the module token
1731
+ # it consumes. Standalone interpreter switches (-u, -O, -E, ...) are skipped.
1732
+ module = None
1733
+ module_idx = None
1734
+ for i in range(1, len(raw_tokens)):
1735
+ if raw_tokens[i] == "-m":
1736
+ if i + 1 < len(raw_tokens):
1737
+ module = raw_tokens[i + 1]
1738
+ module_idx = i + 1
1739
+ break
1740
+ # A non-flag token before any ``-m`` means a script-file / positional
1741
+ # invocation, not ``-m`` module mode -- defer to the script-file lane.
1742
+ if not raw_tokens[i].startswith("-"):
1743
+ return None
1744
+
1745
+ if module is None or module_idx is None:
1746
+ return None
1747
+ if module.lower() not in _PY_MODULE_PACKAGE_MANAGERS:
1748
+ return None
1749
+
1750
+ # Rewrite ``python3 [flags] -m <pkg-mgr> <rest...>`` -> ``<pkg-mgr> <rest...>``
1751
+ # and re-classify. ``shlex.quote`` keeps argument boundaries intact so a
1752
+ # rewritten command tokenizes the same way the direct CLI form would.
1753
+ import shlex
1754
+ rest = raw_tokens[module_idx + 1:]
1755
+ rewritten = " ".join(shlex.quote(t) for t in (module, *rest))
1756
+ inner = detect_mutative_command(rewritten)
1757
+ # Re-wrap the reason so the audit trail shows the re-dispatch explicitly,
1758
+ # but preserve the inner classification verbatim (category, verb, flags).
1759
+ return MutativeResult(
1760
+ is_mutative=inner.is_mutative,
1761
+ category=inner.category,
1762
+ verb=inner.verb,
1763
+ dangerous_flags=inner.dangerous_flags,
1764
+ cli_family=inner.cli_family,
1765
+ confidence=inner.confidence,
1766
+ reason=(
1767
+ f"'{base_cmd} -m {module}' re-dispatched as '{module}': {inner.reason}"
1768
+ ),
1769
+ )
1770
+
1771
+
1772
+ def _resolve_script_argument(
1773
+ base_cmd: str, semantics: "CommandSemantics",
1774
+ ) -> "Optional[Tuple[str, str]]":
1775
+ """Identify a script-file invocation and return ``(path, lane)``.
1776
+
1777
+ Two shapes are recognized:
1778
+
1779
+ * ``<interpreter> <script-file>`` -- the first positional argument after a
1780
+ known interpreter, whose lane (``"python"`` or ``"shell"``) is decided by
1781
+ the interpreter, not the filename.
1782
+ * ``./script`` / ``path/to/script`` -- a direct executable invocation whose
1783
+ lane is inferred from the file extension via ``_SHEBANG_EXT_LANES``.
1784
+
1785
+ Returns ``None`` when the command is not a script-file invocation, so the
1786
+ caller continues with ordinary verb detection.
1787
+ """
1788
+ raw_tokens = semantics.tokens
1789
+ if not raw_tokens:
1790
+ return None
1791
+
1792
+ if base_cmd in _SCRIPT_FILE_INTERPRETERS:
1793
+ lane = "python" if base_cmd in _PYTHON_INTERPRETERS else "shell"
1794
+ defer_flags = _INTERP_NON_SCRIPT_VALUE_FLAGS.get(base_cmd, frozenset())
1795
+ # Walk the args (original casing) and return the first true positional
1796
+ # -- the script file. Standalone interpreter switches (-u, -O, -x, ...)
1797
+ # are skipped; flags that consume the next token as inline code or a
1798
+ # module name (-c, -m, -e, ...) mean there is NO script file, so we
1799
+ # defer to the inline path / verb scanner by returning None. The stdin
1800
+ # sentinel "-" likewise defers (heredoc path owns it).
1801
+ for token in raw_tokens[1:]:
1802
+ if token == "-":
1803
+ return None
1804
+ if token in defer_flags:
1805
+ return None
1806
+ if token.startswith("-"):
1807
+ continue
1808
+ return (token, lane)
1809
+ return None
1810
+
1811
+ # Direct ``./script`` or ``path/script.ext`` invocation: the executable
1812
+ # token IS the script. Use the original-case token so the path resolves
1813
+ # correctly on case-sensitive filesystems.
1814
+ invoked = raw_tokens[0]
1815
+ if "/" in invoked:
1816
+ for ext, lane in _SHEBANG_EXT_LANES.items():
1817
+ if invoked.endswith(ext):
1818
+ return (invoked, lane)
1819
+ return None
1820
+
1821
+
1822
+ def _read_script_content(path: str) -> "Optional[str]":
1823
+ """Read a bounded prefix of a script file for content classification.
1824
+
1825
+ Returns ``None`` when the path cannot be resolved to a readable regular
1826
+ file -- the caller treats that as the conservative (mutative) case, because
1827
+ an interpreter pointed at an un-inspectable payload could do anything.
1828
+ """
1829
+ import os
1830
+
1831
+ try:
1832
+ if not os.path.isfile(path):
1833
+ return None
1834
+ with open(path, "r", encoding="utf-8", errors="replace") as fh:
1835
+ return fh.read(_MAX_SCRIPT_READ_BYTES)
1836
+ except (OSError, ValueError):
1837
+ return None
1838
+
1839
+
1840
+ def _check_script_file(
1841
+ command: str, base_cmd: str, family: str, semantics: "CommandSemantics",
1842
+ ) -> "Optional[MutativeResult]":
1843
+ """Classify ``<interpreter> <file>`` / ``./script`` by the file's content.
1844
+
1845
+ Closes the file-argument evasion: the verb scanner only sees the filename,
1846
+ so a script that deletes files or calls the network would otherwise pass as
1847
+ safe by elimination. Classification is by REAL invocation, mirroring the
1848
+ inline ``-c`` path -- an analytic Python script with no mutative calls and a
1849
+ read-only shell script both stay non-mutative, so the existing
1850
+ overbroad-classification complaint is not reintroduced.
1851
+
1852
+ Returns ``None`` when the command is not a script-file invocation.
1853
+ """
1854
+ resolved = _resolve_script_argument(base_cmd, semantics)
1855
+ if resolved is None:
1856
+ return None
1857
+
1858
+ script_path, lane = resolved
1859
+ content = _read_script_content(script_path)
1860
+ if content is None:
1861
+ # Conservative default: an interpreter invoked on a missing or
1862
+ # unreadable file is treated as mutative. We cannot prove the payload
1863
+ # is safe, and an un-inspectable executable payload requires consent.
1864
+ return MutativeResult(
1865
+ is_mutative=True,
1866
+ category=CATEGORY_MUTATIVE,
1867
+ verb="script-file-unreadable",
1868
+ cli_family=family,
1869
+ confidence="medium",
1870
+ reason=(
1871
+ f"Interpreter '{base_cmd}' invoked on script "
1872
+ f"'{script_path}' that is not a readable file -- cannot "
1873
+ f"verify the payload, requiring approval (conservative default)"
1874
+ ),
1875
+ )
1876
+
1877
+ if lane == "python" and _analyze_python_inline is not None:
1878
+ ast_result = _analyze_python_inline(content)
1879
+ if ast_result.is_dangerous:
1880
+ return MutativeResult(
1881
+ is_mutative=True,
1882
+ category=CATEGORY_MUTATIVE,
1883
+ verb=ast_result.label,
1884
+ cli_family=family,
1885
+ confidence="high",
1886
+ reason=(
1887
+ f"Script '{script_path}' invokes {ast_result.detail} "
1888
+ f"({ast_result.category})"
1889
+ ),
1890
+ )
1891
+ if not ast_result.parse_failed:
1892
+ return MutativeResult(
1893
+ is_mutative=False,
1894
+ category=CATEGORY_READ_ONLY,
1895
+ verb="script-file",
1896
+ cli_family=family,
1897
+ confidence="medium",
1898
+ reason=(
1899
+ f"Python script '{script_path}' has no mutative invocation "
1900
+ f"(AST analysis)"
1901
+ ),
1902
+ )
1903
+ # parse_failed -> fall through to the shell/regex lane below.
1904
+
1905
+ return _classify_script_content_by_regex(content, script_path, family)
1906
+
1907
+
1908
+ def _classify_script_content_by_regex(
1909
+ content: str, script_path: str, family: str,
1910
+ ) -> MutativeResult:
1911
+ """Classify shell / non-Python script content via the existing regex layer.
1912
+
1913
+ No AST parser is vendored for bash, node, ruby, perl, or php (see
1914
+ ``inline_ast_analyzer`` docstring), so content is scanned line-by-line with
1915
+ the same two engines the inline path uses:
1916
+
1917
+ * ``is_blocked_command`` -- catches permanently-blocked destructive lines
1918
+ (``rm -rf /``, ``dd of=/dev/sda``, ...).
1919
+ * ``detect_mutative_command`` -- the CLI-agnostic mutative engine, reused
1920
+ per logical line so a ``kubectl apply`` or ``curl -X POST`` inside the
1921
+ file is detected the same way it would be on the command line.
1922
+
1923
+ This reuses the existing layers rather than introducing a new parser, per
1924
+ the design constraint.
1925
+ """
1926
+ for raw_line in content.splitlines():
1927
+ line = raw_line.strip()
1928
+ if not line or line.startswith("#"):
1929
+ continue
1930
+
1931
+ if _is_blocked_command is not None:
1932
+ blocked = _is_blocked_command(line)
1933
+ if blocked.is_blocked:
1934
+ return MutativeResult(
1935
+ is_mutative=True,
1936
+ category=CATEGORY_MUTATIVE,
1937
+ verb="script-blocked-cmd",
1938
+ cli_family=family,
1939
+ confidence="high",
1940
+ reason=(
1941
+ f"Script '{script_path}' contains blocked command: "
1942
+ f"{blocked.category}"
1943
+ ),
1944
+ )
1945
+
1946
+ line_result = detect_mutative_command(line)
1947
+ if line_result.is_mutative:
1948
+ return MutativeResult(
1949
+ is_mutative=True,
1950
+ category=CATEGORY_MUTATIVE,
1951
+ verb=line_result.verb,
1952
+ dangerous_flags=line_result.dangerous_flags,
1953
+ cli_family=family,
1954
+ confidence=line_result.confidence,
1955
+ reason=(
1956
+ f"Script '{script_path}' line is mutative: "
1957
+ f"{line_result.reason}"
1958
+ ),
1959
+ )
1960
+
1961
+ return MutativeResult(
1962
+ is_mutative=False,
1963
+ category=CATEGORY_READ_ONLY,
1964
+ verb="script-file",
1965
+ cli_family=family,
1966
+ confidence="medium",
1967
+ reason=f"Script '{script_path}' has no mutative or blocked line",
1968
+ )
1969
+
1970
+
1588
1971
  def _check_inline_code(command: str, base_cmd: str, family: str, skip_length_check: bool = False) -> MutativeResult:
1589
1972
  """Check inline code for dangerous patterns.
1590
1973
 
@@ -1714,6 +2097,33 @@ def _layer3_length_check(
1714
2097
  break
1715
2098
 
1716
2099
  if not skip_length_check and len(code_portion) > MAX_NORMAL_INLINE_LENGTH:
2100
+ # AC-9 (Brief: endurecimiento-de-tests-del-security-core): the length
2101
+ # heuristic is a *proxy* for "too complex to vet"; it must not flag
2102
+ # inline code that is PROVABLY read-only just because it is long. For
2103
+ # Python payloads we re-parse the exact code and require a positive
2104
+ # allowlist match (import + SELECT/PRAGMA + print + local assignments,
2105
+ # no write call, no attribute/subscript assignment, no dynamic
2106
+ # dispatch). This is the inverse of analyze_python_inline's blocklist:
2107
+ # a mutation never classifies as read-only, so the exemption cannot
2108
+ # open a hole -- an AST-clean-but-mutating payload (``cur.execute(
2109
+ # 'INSERT ...')``, ``con.commit()``) is NOT provably read-only and
2110
+ # stays flagged. Non-Python payloads are never exempted (no AST).
2111
+ if (
2112
+ base_cmd in _PYTHON_INTERPRETERS
2113
+ and _is_provably_read_only_python is not None
2114
+ and _is_provably_read_only_python(_extract_python_payload(command, base_cmd))
2115
+ ):
2116
+ return MutativeResult(
2117
+ is_mutative=False,
2118
+ category=CATEGORY_READ_ONLY,
2119
+ verb="inline-code-readonly",
2120
+ cli_family=family,
2121
+ confidence="medium",
2122
+ reason=(
2123
+ f"Inline Python is long ({len(code_portion)} chars) but "
2124
+ "provably read-only (allowlisted constructs only)"
2125
+ ),
2126
+ )
1717
2127
  return MutativeResult(
1718
2128
  is_mutative=True,
1719
2129
  category=CATEGORY_MUTATIVE,
@@ -1760,9 +2170,10 @@ def build_t3_block_response(
1760
2170
  ) -> dict:
1761
2171
  """Build an internal block response dict for T3 commands.
1762
2172
 
1763
- Returns an internal dict consumed by bash_validator, which wraps the
1764
- 'message' field into a hookSpecificOutput with permissionDecision: "deny".
1765
- The 'decision' key is internal only and never sent to Claude Code.
2173
+ Returns a CLI-agnostic internal dict ('decision' + 'message'). The adapter
2174
+ layer is responsible for formatting the 'message' into the host-specific
2175
+ deny response; this business module never assembles that host shape itself.
2176
+ The 'decision' key is internal only and never sent to the host.
1766
2177
 
1767
2178
  Args:
1768
2179
  command: The original shell command.
@@ -36,9 +36,10 @@ def scan_pending_db() -> List[Dict]:
36
36
  * The DB is per-machine (~/.gaia/gaia.db), so cross-machine leakage is
37
37
  impossible.
38
38
  * The session_id stored in approvals rows is the main session_id, while
39
- $CLAUDE_SESSION_ID inside a subagent is the subagent's id — filtering
40
- by session would silently drop all subagent-originated pendings (the
41
- known bug owned by another task; see CONFIRMED FINDINGS, Task C).
39
+ the host session id seen inside a subagent is the subagent's id —
40
+ filtering by session would silently drop all subagent-originated
41
+ pendings (the known bug owned by another task; see CONFIRMED FINDINGS,
42
+ Task C).
42
43
 
43
44
  Returns [] on any error (never raises) so the caller's fail-safe catches it.
44
45
  """
@@ -5,10 +5,9 @@ Provides:
5
5
  - get_or_create_session_id(): Get existing session ID or create new one
6
6
  """
7
7
 
8
- import hashlib
9
8
  import logging
10
- import os
11
- from datetime import datetime
9
+
10
+ from adapters.host_session import get_or_create_host_session_id
12
11
 
13
12
  logger = logging.getLogger(__name__)
14
13
 
@@ -16,16 +15,8 @@ logger = logging.getLogger(__name__)
16
15
  def get_or_create_session_id() -> str:
17
16
  """Get existing session ID or create new one.
18
17
 
19
- Checks the CLAUDE_SESSION_ID env var first. If absent, generates a
20
- new session ID from the current time and PID, stores it in the env var,
21
- and returns it.
18
+ Delegates to the adapter-owned host-session helper, which reads the host
19
+ session env var first and, if absent, generates a new session id from the
20
+ current time and PID, stores it back, and returns it.
22
21
  """
23
- session_id = os.environ.get("CLAUDE_SESSION_ID")
24
- if not session_id:
25
- timestamp = datetime.now().strftime("%H%M%S")
26
- hash_input = f"{timestamp}-{os.getpid()}"
27
- session_hash = hashlib.sha256(hash_input.encode()).hexdigest()[:8]
28
- session_id = f"session-{timestamp}-{session_hash}"
29
- os.environ["CLAUDE_SESSION_ID"] = session_id
30
- logger.debug(f"Generated new session_id: {session_id}")
31
- return session_id
22
+ return get_or_create_host_session_id()
@@ -463,8 +463,8 @@ def build_pending_approvals_block() -> str:
463
463
  read source.
464
464
 
465
465
  Scoping: DB query uses all_sessions=True (no session filter). The
466
- session_id stored in approval rows is the main session while
467
- $CLAUDE_SESSION_ID inside a subagent is the subagent id -- filtering by
466
+ session_id stored in approval rows is the main session while the host
467
+ session id seen inside a subagent is the subagent id -- filtering by
468
468
  session would silently drop all subagent pendings. The DB is
469
469
  per-machine so all rows are from the same user.
470
470
 
@@ -526,7 +526,7 @@ def build_pending_approvals_block() -> str:
526
526
  # Scoping: identical to scan_pending_db() / build_pending_approvals_block() --
527
527
  # all_sessions=True (no session filter). The DB is per-machine so every row is
528
528
  # the same user, and pendings are written under the MAIN session while a
529
- # subagent's $CLAUDE_SESSION_ID differs; a session filter would silently drop
529
+ # subagent's host session id differs; a session filter would silently drop
530
530
  # subagent-originated pendings.
531
531
 
532
532
  def build_verified_pending_approvals() -> list:
@@ -1,5 +1,5 @@
1
1
  """
2
- Session Registry — track active Claude sessions by CLAUDE_SESSION_ID.
2
+ Session Registry — track active host sessions by their session id.
3
3
 
4
4
  Provides a user-scoped JSON registry at ~/.claude/session_registry.json that
5
5
  records which sessions are currently alive. Liveness is heartbeat-only: hooks
@@ -170,7 +170,7 @@ def register_session(
170
170
  immediately considered live by get_live_sessions().
171
171
 
172
172
  Args:
173
- session_id: The CLAUDE_SESSION_ID for the session to register.
173
+ session_id: The host session id for the session to register.
174
174
  Must be a non-empty string.
175
175
  started_at: ISO-8601 timestamp for session start. Defaults to now
176
176
  (UTC) when not provided.
@@ -249,7 +249,7 @@ def touch_session(session_id: str) -> None:
249
249
  cleaned up.
250
250
 
251
251
  Args:
252
- session_id: The CLAUDE_SESSION_ID to refresh. Empty/missing is a
252
+ session_id: The host session id to refresh. Empty/missing is a
253
253
  no-op. Failures are swallowed and logged at debug; this is a
254
254
  best-effort liveness signal and must never break the calling
255
255
  hook.