nexo-brain 7.24.0 → 7.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +5 -1
- package/package.json +1 -1
- package/src/auto_update.py +30 -0
- package/src/crons/manifest.json +13 -0
- package/src/db/_fts.py +38 -8
- package/src/db/_schema.py +46 -0
- package/src/doctor/providers/runtime.py +69 -0
- package/src/hook_guardrails.py +41 -0
- package/src/memory_fabric.py +536 -0
- package/src/pre_answer_router.py +4 -3
- package/src/scripts/deep-sleep/phase_protocol_debt_drain.py +29 -6
- package/src/scripts/nexo-backup.sh +30 -0
- package/src/scripts/nexo-daily-self-audit.py +36 -1
- package/src/scripts/nexo-memory-fabric.py +45 -0
- package/src/tools_transcripts.py +50 -8
- package/src/transcript_index.py +105 -2
- package/src/transcript_utils.py +65 -13
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.25.1",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,11 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.
|
|
21
|
+
Version `7.25.1` is the current packaged-runtime line. Patch release over v7.25.0 - shell guardrails skip non-path curl/wget arguments and daily protocol-debt audits keep ERROR classes visible by severity and type.
|
|
22
|
+
|
|
23
|
+
Previously in `7.25.0`: minor release over v7.24.0 - Memory Fabric links transcript lookup, historical backup diary recovery, unified search and knowledge graph evidence so memories are not available only inside expiring snapshots.
|
|
24
|
+
|
|
25
|
+
Previously in `7.24.0`: minor release over v7.23.13 - Home Agents, cognitive quality controls, English operational copy, and non-blocking task-open context are integrated into main.
|
|
22
26
|
|
|
23
27
|
Previously in `7.23.13`: patch over v7.23.12 - release guardrails now audit publish workflows for masked failures and add minimal-delta coverage for punctual UI edits.
|
|
24
28
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.25.1",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/auto_update.py
CHANGED
|
@@ -4715,6 +4715,11 @@ def _run_runtime_post_sync(dest: Path = NEXO_HOME, progress_fn=None) -> tuple[bo
|
|
|
4715
4715
|
"reconcile_scripts = getattr(script_registry, 'reconcile_personal_scripts', None); "
|
|
4716
4716
|
"result = reconcile_scripts(dry_run=False) if callable(reconcile_scripts) else {}; "
|
|
4717
4717
|
"result = result if isinstance(result, dict) else {}; "
|
|
4718
|
+
"exec(\"try:\\n"
|
|
4719
|
+
" import memory_fabric\\n"
|
|
4720
|
+
" result['memory_fabric'] = memory_fabric.repair_memory_fabric(transcript_limit=1000, backup_limit=5000)\\n"
|
|
4721
|
+
"except Exception as exc:\\n"
|
|
4722
|
+
" result['memory_fabric_error'] = repr(exc)\"); "
|
|
4718
4723
|
"result['retired_superseded_scripts'] = retired; "
|
|
4719
4724
|
"result['retired_superseded_skills'] = retired_skills; "
|
|
4720
4725
|
"print(json.dumps(result))"
|
|
@@ -4732,6 +4737,31 @@ def _run_runtime_post_sync(dest: Path = NEXO_HOME, progress_fn=None) -> tuple[bo
|
|
|
4732
4737
|
reconcile_payload = _parse_runtime_init_payload(init_result.stdout or "")
|
|
4733
4738
|
extra_actions, reconcile_message = _personal_schedule_reconcile_summary(reconcile_payload)
|
|
4734
4739
|
actions.extend(extra_actions)
|
|
4740
|
+
memory_fabric_result = reconcile_payload.get("memory_fabric")
|
|
4741
|
+
if isinstance(memory_fabric_result, dict):
|
|
4742
|
+
transcript_indexed = int((memory_fabric_result.get("transcripts") or {}).get("indexed") or 0)
|
|
4743
|
+
historical_inserted = int((memory_fabric_result.get("backups") or {}).get("inserted") or 0)
|
|
4744
|
+
health = memory_fabric_result.get("health") or {}
|
|
4745
|
+
health_issues = health.get("issues") or []
|
|
4746
|
+
historical_health = health.get("historical_diaries") or {}
|
|
4747
|
+
unreconciled = int(historical_health.get("backup_rows_unreconciled") or 0)
|
|
4748
|
+
if transcript_indexed or historical_inserted:
|
|
4749
|
+
actions.append(f"memory-fabric-repaired:{transcript_indexed + historical_inserted}")
|
|
4750
|
+
_emit_progress(
|
|
4751
|
+
progress_fn,
|
|
4752
|
+
f"Memory Fabric: indexed {transcript_indexed} transcript(s), reconciled {historical_inserted} historical diary row(s).",
|
|
4753
|
+
)
|
|
4754
|
+
else:
|
|
4755
|
+
actions.append("memory-fabric-checked")
|
|
4756
|
+
if unreconciled:
|
|
4757
|
+
actions.append(f"memory-fabric-unreconciled:{unreconciled}")
|
|
4758
|
+
if memory_fabric_result.get("ok") is False or any(
|
|
4759
|
+
isinstance(issue, dict) and issue.get("code") == "backup_diaries_not_reconciled"
|
|
4760
|
+
for issue in health_issues
|
|
4761
|
+
):
|
|
4762
|
+
actions.append("memory-fabric-warning")
|
|
4763
|
+
elif reconcile_payload.get("memory_fabric_error"):
|
|
4764
|
+
actions.append("memory-fabric-warning")
|
|
4735
4765
|
if reconcile_message:
|
|
4736
4766
|
_emit_progress(progress_fn, reconcile_message)
|
|
4737
4767
|
except Exception as e:
|
package/src/crons/manifest.json
CHANGED
|
@@ -302,6 +302,19 @@
|
|
|
302
302
|
"run_on_boot": true,
|
|
303
303
|
"run_on_wake": true
|
|
304
304
|
},
|
|
305
|
+
{
|
|
306
|
+
"id": "memory-fabric",
|
|
307
|
+
"script": "scripts/nexo-memory-fabric.py",
|
|
308
|
+
"schedule": {"hour": 2, "minute": 35},
|
|
309
|
+
"description": "Daily Memory Fabric maintenance — refresh transcript search, historical backup diaries, and graph links",
|
|
310
|
+
"core": true,
|
|
311
|
+
"recovery_policy": "catchup",
|
|
312
|
+
"idempotent": true,
|
|
313
|
+
"max_catchup_age": 172800,
|
|
314
|
+
"stuck_after_seconds": 3600,
|
|
315
|
+
"run_on_boot": true,
|
|
316
|
+
"run_on_wake": true
|
|
317
|
+
},
|
|
305
318
|
{
|
|
306
319
|
"id": "local-index",
|
|
307
320
|
"script": "scripts/nexo-local-index.py",
|
package/src/db/_fts.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""NEXO DB — Fts module."""
|
|
2
|
-
import os, pathlib, sqlite3, threading, datetime
|
|
2
|
+
import os, pathlib, re, sqlite3, threading, datetime
|
|
3
3
|
import paths
|
|
4
4
|
from db._core import get_db, now_epoch, DB_PATH
|
|
5
5
|
|
|
@@ -328,22 +328,26 @@ def fts_search(query: str, source_filter: str = None, limit: int = 20) -> list[d
|
|
|
328
328
|
limit: Max results (default 20)
|
|
329
329
|
"""
|
|
330
330
|
conn = get_db()
|
|
331
|
-
|
|
331
|
+
raw_query = query.strip()
|
|
332
|
+
words = raw_query.split()
|
|
332
333
|
if not words:
|
|
333
334
|
return []
|
|
334
335
|
|
|
335
336
|
# Expand with synonyms for cross-language matching
|
|
336
337
|
all_words = _expand_synonyms(words)
|
|
337
338
|
|
|
338
|
-
# Build FTS5 query: each word as quoted term with OR for broad matching
|
|
339
|
+
# Build FTS5 query: each word as quoted term with OR for broad matching.
|
|
340
|
+
# Symbol-heavy identifiers (emails, paths, refs) need deterministic token
|
|
341
|
+
# boundaries so FTS5 never treats punctuation as query syntax.
|
|
339
342
|
fts_terms = []
|
|
340
343
|
for w in all_words:
|
|
341
344
|
# Strip FTS5 special chars to avoid syntax errors
|
|
342
|
-
safe = w.replace('"', '').replace("'", '').replace('*', '').replace('^', '').
|
|
345
|
+
safe = w.replace('"', '').replace("'", '').replace('*', '').replace('^', '').strip()
|
|
346
|
+
safe = re.sub(r"[-@/\\:]+", " ", safe)
|
|
343
347
|
if not safe:
|
|
344
348
|
continue
|
|
345
|
-
# Split on dots (e.g.,
|
|
346
|
-
parts = [p.strip() for p in
|
|
349
|
+
# Split on dots and punctuation boundaries (e.g., emails, paths, files).
|
|
350
|
+
parts = [p.strip() for p in re.split(r"[.\s]+", safe) if p.strip()]
|
|
347
351
|
for part in parts:
|
|
348
352
|
fts_terms.append(f'"{part}"')
|
|
349
353
|
# Add prefix search for camelCase/code identifiers (contains uppercase mid-word)
|
|
@@ -361,6 +365,24 @@ def fts_search(query: str, source_filter: str = None, limit: int = 20) -> list[d
|
|
|
361
365
|
params.append(limit)
|
|
362
366
|
|
|
363
367
|
try:
|
|
368
|
+
exact_rows = []
|
|
369
|
+
if re.search(r"[@/\\:.-]", raw_query):
|
|
370
|
+
exact_where = ""
|
|
371
|
+
exact_params = [f"%{raw_query}%", f"%{raw_query}%", f"%{raw_query}%"]
|
|
372
|
+
if source_filter:
|
|
373
|
+
exact_where = "AND source = ?"
|
|
374
|
+
exact_params.append(source_filter)
|
|
375
|
+
exact_params.append(limit)
|
|
376
|
+
exact_rows = conn.execute(f"""
|
|
377
|
+
SELECT source, source_id, title,
|
|
378
|
+
substr(body, 1, 240) AS snippet,
|
|
379
|
+
category, updated_at, -100.0 AS rank
|
|
380
|
+
FROM unified_search
|
|
381
|
+
WHERE (title LIKE ? OR body LIKE ? OR source_id LIKE ?) {exact_where}
|
|
382
|
+
ORDER BY updated_at DESC
|
|
383
|
+
LIMIT ?
|
|
384
|
+
""", exact_params).fetchall()
|
|
385
|
+
|
|
364
386
|
rows = conn.execute(f"""
|
|
365
387
|
SELECT source, source_id, title,
|
|
366
388
|
snippet(unified_search, 3, '»', '«', '...', 40) AS snippet,
|
|
@@ -370,7 +392,16 @@ def fts_search(query: str, source_filter: str = None, limit: int = 20) -> list[d
|
|
|
370
392
|
ORDER BY rank
|
|
371
393
|
LIMIT ?
|
|
372
394
|
""", params).fetchall()
|
|
373
|
-
|
|
395
|
+
merged = []
|
|
396
|
+
seen = set()
|
|
397
|
+
for row in list(exact_rows) + list(rows):
|
|
398
|
+
item = dict(row)
|
|
399
|
+
key = (item.get("source"), item.get("source_id"))
|
|
400
|
+
if key in seen:
|
|
401
|
+
continue
|
|
402
|
+
seen.add(key)
|
|
403
|
+
merged.append(item)
|
|
404
|
+
return merged[:limit]
|
|
374
405
|
except Exception:
|
|
375
406
|
return []
|
|
376
407
|
|
|
@@ -403,4 +434,3 @@ def _migrate_add_index(conn, index_name: str, table: str, column: str):
|
|
|
403
434
|
"""Create index if it doesn't exist (idempotent)."""
|
|
404
435
|
conn.execute(f"CREATE INDEX IF NOT EXISTS {index_name} ON {table}({column})")
|
|
405
436
|
conn.commit()
|
|
406
|
-
|
package/src/db/_schema.py
CHANGED
|
@@ -2080,6 +2080,51 @@ def _m67_diary_quality_backfill_repair(conn):
|
|
|
2080
2080
|
_migrate_add_index(conn, "idx_diary_archive_quality", "diary_archive", "quality_tier, quality_score, created_at")
|
|
2081
2081
|
|
|
2082
2082
|
|
|
2083
|
+
def _m68_memory_fabric_index(conn):
|
|
2084
|
+
"""Memory Fabric v1 index tables for historical backup memory."""
|
|
2085
|
+
conn.executescript(
|
|
2086
|
+
"""
|
|
2087
|
+
CREATE TABLE IF NOT EXISTS memory_fabric_sources (
|
|
2088
|
+
source_id TEXT PRIMARY KEY,
|
|
2089
|
+
source_type TEXT NOT NULL,
|
|
2090
|
+
source_ref TEXT NOT NULL,
|
|
2091
|
+
status TEXT NOT NULL DEFAULT 'active',
|
|
2092
|
+
item_count INTEGER NOT NULL DEFAULT 0,
|
|
2093
|
+
last_indexed_at TEXT DEFAULT '',
|
|
2094
|
+
metadata_json TEXT NOT NULL DEFAULT '{}'
|
|
2095
|
+
);
|
|
2096
|
+
|
|
2097
|
+
CREATE TABLE IF NOT EXISTS historical_diary_index (
|
|
2098
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
2099
|
+
source_backup_path TEXT NOT NULL,
|
|
2100
|
+
source_table TEXT NOT NULL DEFAULT 'session_diary',
|
|
2101
|
+
source_row_id INTEGER NOT NULL,
|
|
2102
|
+
session_id TEXT NOT NULL DEFAULT '',
|
|
2103
|
+
created_at TEXT NOT NULL DEFAULT '',
|
|
2104
|
+
domain TEXT NOT NULL DEFAULT '',
|
|
2105
|
+
summary TEXT NOT NULL DEFAULT '',
|
|
2106
|
+
decisions TEXT NOT NULL DEFAULT '',
|
|
2107
|
+
pending TEXT NOT NULL DEFAULT '',
|
|
2108
|
+
context_next TEXT NOT NULL DEFAULT '',
|
|
2109
|
+
mental_state TEXT NOT NULL DEFAULT '',
|
|
2110
|
+
self_critique TEXT NOT NULL DEFAULT '',
|
|
2111
|
+
source TEXT NOT NULL DEFAULT '',
|
|
2112
|
+
content_hash TEXT NOT NULL UNIQUE,
|
|
2113
|
+
indexed_at TEXT DEFAULT (datetime('now')),
|
|
2114
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
2115
|
+
UNIQUE(source_backup_path, source_table, source_row_id)
|
|
2116
|
+
);
|
|
2117
|
+
|
|
2118
|
+
CREATE INDEX IF NOT EXISTS idx_historical_diary_session
|
|
2119
|
+
ON historical_diary_index(session_id);
|
|
2120
|
+
CREATE INDEX IF NOT EXISTS idx_historical_diary_created
|
|
2121
|
+
ON historical_diary_index(created_at);
|
|
2122
|
+
CREATE INDEX IF NOT EXISTS idx_historical_diary_domain
|
|
2123
|
+
ON historical_diary_index(domain);
|
|
2124
|
+
"""
|
|
2125
|
+
)
|
|
2126
|
+
|
|
2127
|
+
|
|
2083
2128
|
MIGRATIONS = [
|
|
2084
2129
|
(1, "learnings_columns", _m1_learnings_columns),
|
|
2085
2130
|
(2, "followups_reasoning", _m2_followups_reasoning),
|
|
@@ -2148,6 +2193,7 @@ MIGRATIONS = [
|
|
|
2148
2193
|
(65, "diary_quality", _m65_diary_quality),
|
|
2149
2194
|
(66, "transcript_index", _m66_transcript_index),
|
|
2150
2195
|
(67, "diary_quality_backfill_repair", _m67_diary_quality_backfill_repair),
|
|
2196
|
+
(68, "memory_fabric_index", _m68_memory_fabric_index),
|
|
2151
2197
|
]
|
|
2152
2198
|
|
|
2153
2199
|
|
|
@@ -3900,6 +3900,74 @@ def check_local_index_hygiene(fix: bool = False) -> DoctorCheck:
|
|
|
3900
3900
|
)
|
|
3901
3901
|
|
|
3902
3902
|
|
|
3903
|
+
def check_memory_fabric_health(fix: bool = False) -> DoctorCheck:
|
|
3904
|
+
try:
|
|
3905
|
+
import memory_fabric
|
|
3906
|
+
|
|
3907
|
+
repair = None
|
|
3908
|
+
if fix:
|
|
3909
|
+
repair = memory_fabric.repair_memory_fabric(
|
|
3910
|
+
transcript_hours=720,
|
|
3911
|
+
transcript_limit=1000,
|
|
3912
|
+
backup_limit=5000,
|
|
3913
|
+
)
|
|
3914
|
+
report = memory_fabric.memory_fabric_health(include_backup_scan=True)
|
|
3915
|
+
issues = report.get("issues") or []
|
|
3916
|
+
evidence = [
|
|
3917
|
+
"transcripts=" + json.dumps(report.get("transcripts") or {}, sort_keys=True),
|
|
3918
|
+
"historical_diaries=" + json.dumps(report.get("historical_diaries") or {}, sort_keys=True),
|
|
3919
|
+
"local_context=" + json.dumps(report.get("local_context") or {}, sort_keys=True),
|
|
3920
|
+
"knowledge_graph=" + json.dumps(report.get("knowledge_graph") or {}, sort_keys=True),
|
|
3921
|
+
]
|
|
3922
|
+
evidence.extend(
|
|
3923
|
+
f"issue={item.get('severity')}:{item.get('code')}:{item.get('message')}"
|
|
3924
|
+
for item in issues[:6]
|
|
3925
|
+
if isinstance(item, dict)
|
|
3926
|
+
)
|
|
3927
|
+
if repair:
|
|
3928
|
+
evidence.append("repair=" + json.dumps({
|
|
3929
|
+
"transcripts_indexed": (repair.get("transcripts") or {}).get("indexed"),
|
|
3930
|
+
"historical_diaries_inserted": (repair.get("backups") or {}).get("inserted"),
|
|
3931
|
+
}, sort_keys=True))
|
|
3932
|
+
blocking = [
|
|
3933
|
+
item for item in issues
|
|
3934
|
+
if isinstance(item, dict) and item.get("code") in {"transcript_index_empty", "backup_diaries_not_reconciled"}
|
|
3935
|
+
]
|
|
3936
|
+
if not blocking:
|
|
3937
|
+
return DoctorCheck(
|
|
3938
|
+
id="runtime.memory_fabric",
|
|
3939
|
+
tier="runtime",
|
|
3940
|
+
status="healthy",
|
|
3941
|
+
severity="info",
|
|
3942
|
+
summary="Memory Fabric coverage is queryable",
|
|
3943
|
+
evidence=evidence,
|
|
3944
|
+
repair_plan=[],
|
|
3945
|
+
fixed=bool(repair),
|
|
3946
|
+
)
|
|
3947
|
+
return DoctorCheck(
|
|
3948
|
+
id="runtime.memory_fabric",
|
|
3949
|
+
tier="runtime",
|
|
3950
|
+
status="degraded",
|
|
3951
|
+
severity="warn",
|
|
3952
|
+
summary="Memory Fabric coverage needs repair",
|
|
3953
|
+
evidence=evidence,
|
|
3954
|
+
repair_plan=["Run `nexo doctor --tier runtime --fix` or `nexo update` to warm transcript and historical backup indexes"],
|
|
3955
|
+
escalation_prompt="Some memory sources exist outside the active query indexes, so exact historical lookup may fall back to slow raw scans.",
|
|
3956
|
+
fixed=bool(repair),
|
|
3957
|
+
)
|
|
3958
|
+
except Exception as exc:
|
|
3959
|
+
return DoctorCheck(
|
|
3960
|
+
id="runtime.memory_fabric",
|
|
3961
|
+
tier="runtime",
|
|
3962
|
+
status="degraded",
|
|
3963
|
+
severity="warn",
|
|
3964
|
+
summary="Memory Fabric health could not be checked",
|
|
3965
|
+
evidence=[str(exc)],
|
|
3966
|
+
repair_plan=["Inspect memory_fabric.py and DB migrations"],
|
|
3967
|
+
escalation_prompt="Support cannot verify unified memory coverage.",
|
|
3968
|
+
)
|
|
3969
|
+
|
|
3970
|
+
|
|
3903
3971
|
def run_runtime_checks(fix: bool = False) -> list[DoctorCheck]:
|
|
3904
3972
|
"""Run all runtime-tier checks. Read-only by default."""
|
|
3905
3973
|
return [
|
|
@@ -3922,6 +3990,7 @@ def run_runtime_checks(fix: bool = False) -> list[DoctorCheck]:
|
|
|
3922
3990
|
safe_check(check_automation_caller_coverage),
|
|
3923
3991
|
safe_check(check_state_watchers),
|
|
3924
3992
|
safe_check(check_local_index_hygiene, fix=fix),
|
|
3993
|
+
safe_check(check_memory_fabric_health, fix=fix),
|
|
3925
3994
|
safe_check(check_release_artifact_sync),
|
|
3926
3995
|
safe_check(check_release_trace_hygiene),
|
|
3927
3996
|
safe_check(check_launchagent_inventory),
|
package/src/hook_guardrails.py
CHANGED
|
@@ -76,6 +76,26 @@ SHELL_WRITE_BASES = {
|
|
|
76
76
|
"rsync",
|
|
77
77
|
}
|
|
78
78
|
SHELL_REDIRECT_TOKENS = {">", ">>", "1>", "1>>", "2>", "2>>"}
|
|
79
|
+
# Flags whose *next* token is a non-path argument (User-Agent strings,
|
|
80
|
+
# headers, query payloads, URLs, etc.). Without this whitelist the bash
|
|
81
|
+
# path extractor lifts fragments like ``Mozilla/5.0`` and ``AppleWebKit/
|
|
82
|
+
# 537.36`` as candidate paths, generating a ``g4_guard_check_required``
|
|
83
|
+
# debt for every curl/wget with a ``-A`` flag. Tracked by NF-AUDIT-
|
|
84
|
+
# 20260522-DRAIN-WHITELIST-EXPAND (W2).
|
|
85
|
+
SHELL_FLAGS_WITH_NON_PATH_ARG = {
|
|
86
|
+
"-A", "--user-agent",
|
|
87
|
+
"-H", "--header",
|
|
88
|
+
"-e", "--referer",
|
|
89
|
+
"-X", "--request",
|
|
90
|
+
"-d", "--data", "--data-raw", "--data-binary", "--data-urlencode",
|
|
91
|
+
"-F", "--form",
|
|
92
|
+
"-u", "--user",
|
|
93
|
+
"--url",
|
|
94
|
+
"--cookie", "-b",
|
|
95
|
+
"--connect-timeout", "--max-time",
|
|
96
|
+
"--retry", "--retry-delay", "--retry-max-time",
|
|
97
|
+
"--resolve",
|
|
98
|
+
}
|
|
79
99
|
INLINE_INTERPRETER_BASES = {
|
|
80
100
|
"python",
|
|
81
101
|
"python3",
|
|
@@ -392,6 +412,14 @@ def _looks_like_real_path(path: str) -> bool:
|
|
|
392
412
|
stripped = raw.lstrip("/")
|
|
393
413
|
if stripped and re.fullmatch(r"\d+", stripped):
|
|
394
414
|
return False
|
|
415
|
+
# Version-like fragments (``/5.0``, ``/537.36``, ``/140.0.0.0``) come
|
|
416
|
+
# from User-Agent / library version strings that the EMBEDDED_PATH_RE
|
|
417
|
+
# scanner lifts out of curl/wget commands. Token-level skip
|
|
418
|
+
# (SHELL_FLAGS_WITH_NON_PATH_ARG) does not help here because the regex
|
|
419
|
+
# runs against the raw command string. Filter them out at the path
|
|
420
|
+
# plausibility check instead.
|
|
421
|
+
if stripped and re.fullmatch(r"\d+(?:\.\d+)+", stripped):
|
|
422
|
+
return False
|
|
395
423
|
if raw.lower() in _PATH_DICTIONARY_BLOCKLIST:
|
|
396
424
|
return False
|
|
397
425
|
# Reject single-segment ``/word`` candidates that do not exist on the
|
|
@@ -697,12 +725,25 @@ def _extract_bash_touched_files(tool_input) -> list[str]:
|
|
|
697
725
|
seen.add(normalized)
|
|
698
726
|
candidates.append(resolved)
|
|
699
727
|
|
|
728
|
+
skip_next = False
|
|
700
729
|
for index, token in enumerate(tokens):
|
|
730
|
+
if skip_next:
|
|
731
|
+
skip_next = False
|
|
732
|
+
continue
|
|
701
733
|
if token in SHELL_REDIRECT_TOKENS:
|
|
702
734
|
if index + 1 < len(tokens):
|
|
703
735
|
add(tokens[index + 1])
|
|
704
736
|
continue
|
|
705
737
|
if token.startswith("-"):
|
|
738
|
+
# ``--flag=value`` is self-contained; the value is glued so the
|
|
739
|
+
# extractor already ignores it. For the separated form
|
|
740
|
+
# (``-A "Mozilla/5.0 ..."`` / ``-H "X-Foo: bar"``) we have to
|
|
741
|
+
# skip the NEXT token, otherwise the path extractor lifts
|
|
742
|
+
# User-Agent / header / payload fragments as candidate paths
|
|
743
|
+
# and floods the audit with ``g4_guard_check_required`` noise.
|
|
744
|
+
flag_head = token.split("=", 1)[0]
|
|
745
|
+
if flag_head in SHELL_FLAGS_WITH_NON_PATH_ARG and "=" not in token:
|
|
746
|
+
skip_next = True
|
|
706
747
|
continue
|
|
707
748
|
if (
|
|
708
749
|
token.startswith(("/", "~", ".", "$"))
|