nexo-brain 7.20.8 → 7.20.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +3 -1
- package/bin/windows-wsl-bridge.js +20 -0
- package/package.json +1 -1
- package/src/crons/sync.py +125 -7
- package/src/local_context/api.py +50 -10
- package/src/local_context/extractors.py +3 -0
- package/src/local_context/privacy.py +8 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.20.
|
|
3
|
+
"version": "7.20.9",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,9 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.20.
|
|
21
|
+
Version `7.20.9` is the current packaged-runtime line. Patch release over v7.20.8 — Local Context scans automatic roots at full operational depth, falls back to crontab when Linux/WSL systemd user timers fail, passes Windows AppData email roots into WSL, and blocks Google API keys before HTML cleaning.
|
|
22
|
+
|
|
23
|
+
Previously in `7.20.8`: patch release over v7.20.7 — Local Context recognises Windows Mail package roots and Outlook Mac profile roots as bounded local-email sources instead of rejecting them as generic AppData / Group Containers.
|
|
22
24
|
|
|
23
25
|
Previously in `7.20.7`: patch release over v7.20.6 — Local Context email-root bootstrap is deterministic across CI, WSL and migrated profiles while preserving macOS Mail.app, Windows Outlook, Thunderbird and NEXO email coverage.
|
|
24
26
|
|
|
@@ -93,6 +93,20 @@ function resolveLinuxEnv(env = process.env) {
|
|
|
93
93
|
return linuxEnv;
|
|
94
94
|
}
|
|
95
95
|
|
|
96
|
+
function resolveWindowsHostPathEnv(env = process.env) {
|
|
97
|
+
const result = {};
|
|
98
|
+
for (const key of ["LOCALAPPDATA", "APPDATA"]) {
|
|
99
|
+
const value = String(env[key] || "").trim();
|
|
100
|
+
if (!value) continue;
|
|
101
|
+
if (isWindowsStylePath(value)) {
|
|
102
|
+
result[key] = toWslPath(value);
|
|
103
|
+
} else if (value.startsWith("/")) {
|
|
104
|
+
result[key] = value;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return result;
|
|
108
|
+
}
|
|
109
|
+
|
|
96
110
|
function uniqueValues(values = []) {
|
|
97
111
|
const seen = new Set();
|
|
98
112
|
return values.filter((value) => {
|
|
@@ -242,6 +256,10 @@ function buildWslExecSpec({
|
|
|
242
256
|
for (const [key, value] of Object.entries(linuxEnv)) {
|
|
243
257
|
wslArgs.push(`${key}=${value}`);
|
|
244
258
|
}
|
|
259
|
+
const windowsHostPathEnv = resolveWindowsHostPathEnv(env);
|
|
260
|
+
for (const [key, value] of Object.entries(windowsHostPathEnv)) {
|
|
261
|
+
wslArgs.push(`${key}=${value}`);
|
|
262
|
+
}
|
|
245
263
|
|
|
246
264
|
// Build the staging shell script. Stages the bundle from /mnt/c (DrvFs/9P)
|
|
247
265
|
// to /tmp (native ext4) BEFORE invoking node. Without staging, node hangs
|
|
@@ -296,6 +314,7 @@ function buildWslExecSpec({
|
|
|
296
314
|
command: "wsl.exe",
|
|
297
315
|
args: wslArgs,
|
|
298
316
|
linuxEnv,
|
|
317
|
+
windowsHostPathEnv,
|
|
299
318
|
managedLinuxPath,
|
|
300
319
|
translatedScriptPath,
|
|
301
320
|
};
|
|
@@ -338,6 +357,7 @@ module.exports = {
|
|
|
338
357
|
probeWslUserHome,
|
|
339
358
|
resolveLinuxEnv,
|
|
340
359
|
resolveLinuxUserHome,
|
|
360
|
+
resolveWindowsHostPathEnv,
|
|
341
361
|
runViaWsl,
|
|
342
362
|
toWslPath,
|
|
343
363
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.20.
|
|
3
|
+
"version": "7.20.9",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/crons/sync.py
CHANGED
|
@@ -20,6 +20,7 @@ import json
|
|
|
20
20
|
import os
|
|
21
21
|
import platform
|
|
22
22
|
import plistlib
|
|
23
|
+
import shlex
|
|
23
24
|
import shutil
|
|
24
25
|
import subprocess
|
|
25
26
|
import sys
|
|
@@ -133,6 +134,8 @@ SCHEDULE_FILE = paths.config_dir() / "schedule.json"
|
|
|
133
134
|
CORE_CRON_MANAGED_ENV = "NEXO_MANAGED_CORE_CRON"
|
|
134
135
|
PERSONAL_CRON_MANAGED_ENV = "NEXO_MANAGED_PERSONAL_CRON"
|
|
135
136
|
PERSONAL_CRON_ID_ENV = "NEXO_PERSONAL_CRON_ID"
|
|
137
|
+
CRONTAB_BEGIN = "# >>> NEXO managed core crons >>>"
|
|
138
|
+
CRONTAB_END = "# <<< NEXO managed core crons <<<"
|
|
136
139
|
RETIRED_CORE_FILES = (
|
|
137
140
|
Path("core") / "scripts" / "nexo-day-orchestrator.sh",
|
|
138
141
|
Path("scripts") / "nexo-day-orchestrator.sh",
|
|
@@ -457,6 +460,106 @@ def build_plist(cron: dict) -> dict:
|
|
|
457
460
|
return plist
|
|
458
461
|
|
|
459
462
|
|
|
463
|
+
def _shell_join(args: list[str | Path]) -> str:
|
|
464
|
+
return " ".join(shlex.quote(str(arg)) for arg in args)
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def _cron_schedule(cron: dict) -> str | None:
|
|
468
|
+
if cron.get("keep_alive"):
|
|
469
|
+
return None
|
|
470
|
+
if "interval_seconds" in cron:
|
|
471
|
+
try:
|
|
472
|
+
seconds = int(cron["interval_seconds"])
|
|
473
|
+
except Exception:
|
|
474
|
+
return None
|
|
475
|
+
if seconds <= 0 or seconds % 60 != 0:
|
|
476
|
+
return None
|
|
477
|
+
minutes = max(1, seconds // 60)
|
|
478
|
+
return "* * * * *" if minutes == 1 else f"*/{minutes} * * * *"
|
|
479
|
+
if "schedule" in cron:
|
|
480
|
+
s = resolve_declared_schedule(cron)
|
|
481
|
+
hour, minute = int(s.get("hour", 0)), int(s.get("minute", 0))
|
|
482
|
+
weekday = "*"
|
|
483
|
+
if "weekday" in s:
|
|
484
|
+
raw_weekday = int(s["weekday"])
|
|
485
|
+
weekday = "0" if raw_weekday == 7 else str(raw_weekday)
|
|
486
|
+
return f"{minute} {hour} * * {weekday}"
|
|
487
|
+
return None
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _linux_crontab_entry(cron: dict, exec_cmd: str, stdout_log: Path, stderr_log: Path) -> str | None:
|
|
491
|
+
schedule = _cron_schedule(cron)
|
|
492
|
+
if not schedule:
|
|
493
|
+
return None
|
|
494
|
+
env_prefix = " ".join(
|
|
495
|
+
f"{key}={shlex.quote(str(value))}"
|
|
496
|
+
for key, value in {
|
|
497
|
+
"HOME": Path.home(),
|
|
498
|
+
"NEXO_HOME": NEXO_HOME,
|
|
499
|
+
"NEXO_CODE": _runtime_code_dir(),
|
|
500
|
+
"PYTHONUNBUFFERED": "1",
|
|
501
|
+
}.items()
|
|
502
|
+
)
|
|
503
|
+
return f"{schedule} {env_prefix} {exec_cmd} >> {shlex.quote(str(stdout_log))} 2>> {shlex.quote(str(stderr_log))}"
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def _strip_managed_crontab_block(body: str) -> str:
|
|
507
|
+
lines = body.splitlines()
|
|
508
|
+
kept: list[str] = []
|
|
509
|
+
skipping = False
|
|
510
|
+
for line in lines:
|
|
511
|
+
if line.strip() == CRONTAB_BEGIN:
|
|
512
|
+
skipping = True
|
|
513
|
+
continue
|
|
514
|
+
if line.strip() == CRONTAB_END:
|
|
515
|
+
skipping = False
|
|
516
|
+
continue
|
|
517
|
+
if not skipping:
|
|
518
|
+
kept.append(line)
|
|
519
|
+
return "\n".join(kept).rstrip()
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _install_linux_crontab_fallback(entries: list[str]) -> dict:
|
|
523
|
+
if not entries:
|
|
524
|
+
return {"ok": False, "error": "no_crontab_entries"}
|
|
525
|
+
if not shutil.which("crontab"):
|
|
526
|
+
return {"ok": False, "error": "crontab_missing"}
|
|
527
|
+
|
|
528
|
+
existing = subprocess.run(["crontab", "-l"], capture_output=True, text=True)
|
|
529
|
+
current_body = existing.stdout if existing.returncode == 0 else ""
|
|
530
|
+
unmanaged_body = _strip_managed_crontab_block(current_body)
|
|
531
|
+
managed_body = "\n".join([CRONTAB_BEGIN, *entries, CRONTAB_END])
|
|
532
|
+
next_body = f"{unmanaged_body}\n\n{managed_body}\n" if unmanaged_body else f"{managed_body}\n"
|
|
533
|
+
|
|
534
|
+
tmp_path = None
|
|
535
|
+
try:
|
|
536
|
+
with tempfile.NamedTemporaryFile("w", encoding="utf-8", delete=False) as fh:
|
|
537
|
+
tmp_path = fh.name
|
|
538
|
+
fh.write(next_body)
|
|
539
|
+
proc = subprocess.run(["crontab", tmp_path], capture_output=True, text=True)
|
|
540
|
+
finally:
|
|
541
|
+
if tmp_path:
|
|
542
|
+
try:
|
|
543
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
544
|
+
except Exception:
|
|
545
|
+
pass
|
|
546
|
+
if proc.returncode != 0:
|
|
547
|
+
return {"ok": False, "error": proc.stderr or proc.stdout or "crontab_install_failed"}
|
|
548
|
+
return {"ok": True, "entries": len(entries)}
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _enable_systemd_user_units(units: list[str]) -> dict:
|
|
552
|
+
errors: list[str] = []
|
|
553
|
+
daemon = subprocess.run(["systemctl", "--user", "daemon-reload"], capture_output=True, text=True)
|
|
554
|
+
if daemon.returncode != 0:
|
|
555
|
+
errors.append(daemon.stderr or daemon.stdout or "systemctl daemon-reload failed")
|
|
556
|
+
for unit in units:
|
|
557
|
+
proc = subprocess.run(["systemctl", "--user", "enable", "--now", unit], capture_output=True, text=True)
|
|
558
|
+
if proc.returncode != 0:
|
|
559
|
+
errors.append(f"{unit}: {proc.stderr or proc.stdout or 'enable failed'}")
|
|
560
|
+
return {"ok": not errors, "errors": errors}
|
|
561
|
+
|
|
562
|
+
|
|
460
563
|
def get_installed_nexo_crons() -> dict[str, Path]:
|
|
461
564
|
"""Return dict of cron_id → plist_path for installed NEXO crons."""
|
|
462
565
|
installed = {}
|
|
@@ -670,6 +773,9 @@ def sync_linux(dry_run: bool = False):
|
|
|
670
773
|
python_bin = p
|
|
671
774
|
break
|
|
672
775
|
|
|
776
|
+
enable_units: list[str] = []
|
|
777
|
+
crontab_entries: list[str] = []
|
|
778
|
+
|
|
673
779
|
for cron in manifest_crons:
|
|
674
780
|
cron_id = cron["id"]
|
|
675
781
|
script_src = _resolve_source_artifact(cron["script"])
|
|
@@ -683,9 +789,9 @@ def sync_linux(dry_run: bool = False):
|
|
|
683
789
|
_copy_into_runtime(subdir_src)
|
|
684
790
|
|
|
685
791
|
if script_type == "shell":
|
|
686
|
-
exec_cmd =
|
|
792
|
+
exec_cmd = _shell_join(["/bin/bash", wrapper_dest, cron_id, "/bin/bash", script_dest])
|
|
687
793
|
else:
|
|
688
|
-
exec_cmd =
|
|
794
|
+
exec_cmd = _shell_join(["/bin/bash", wrapper_dest, cron_id, python_bin, script_dest])
|
|
689
795
|
|
|
690
796
|
service_path = unit_dir / f"nexo-{cron_id}.service"
|
|
691
797
|
timer_path = unit_dir / f"nexo-{cron_id}.timer"
|
|
@@ -734,6 +840,7 @@ StandardError=append:{stderr_log}
|
|
|
734
840
|
|
|
735
841
|
service_path.write_text(service_content)
|
|
736
842
|
if cron.get("keep_alive"):
|
|
843
|
+
enable_units.append(f"nexo-{cron_id}.service")
|
|
737
844
|
log(f" Installed keep_alive service: {cron_id}")
|
|
738
845
|
continue
|
|
739
846
|
|
|
@@ -748,14 +855,25 @@ Persistent=true
|
|
|
748
855
|
WantedBy=timers.target
|
|
749
856
|
"""
|
|
750
857
|
timer_path.write_text(timer_content)
|
|
858
|
+
enable_units.append(f"nexo-{cron_id}.timer")
|
|
859
|
+
crontab_entry = _linux_crontab_entry(cron, exec_cmd, stdout_log, stderr_log)
|
|
860
|
+
if crontab_entry:
|
|
861
|
+
crontab_entries.append(crontab_entry)
|
|
751
862
|
log(f" Installed: {cron_id}")
|
|
752
863
|
|
|
753
864
|
if not dry_run:
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
865
|
+
systemd_result = _enable_systemd_user_units(enable_units)
|
|
866
|
+
if systemd_result.get("ok"):
|
|
867
|
+
log("systemd units enabled.")
|
|
868
|
+
else:
|
|
869
|
+
log(f"WARNING: systemd user timers failed; installing crontab fallback: {systemd_result.get('errors')}")
|
|
870
|
+
fallback = _install_linux_crontab_fallback(crontab_entries)
|
|
871
|
+
if not fallback.get("ok"):
|
|
872
|
+
raise RuntimeError(
|
|
873
|
+
"Linux cron activation failed: "
|
|
874
|
+
f"systemd={systemd_result.get('errors')} crontab={fallback.get('error')}"
|
|
875
|
+
)
|
|
876
|
+
log(f"crontab fallback installed ({fallback.get('entries')} entries).")
|
|
759
877
|
|
|
760
878
|
log("Sync complete.")
|
|
761
879
|
|
package/src/local_context/api.py
CHANGED
|
@@ -26,6 +26,9 @@ LOCAL_INDEX_LINUX_UNIT = "nexo-local-index.service"
|
|
|
26
26
|
DEFAULT_LIVE_ASSET_LIMIT = int(os.environ.get("NEXO_LOCAL_INDEX_LIVE_ASSET_LIMIT", "2000") or "2000")
|
|
27
27
|
DEFAULT_LIVE_DIR_LIMIT = int(os.environ.get("NEXO_LOCAL_INDEX_LIVE_DIR_LIMIT", "300") or "300")
|
|
28
28
|
DEFAULT_LIVE_FILE_LIMIT = int(os.environ.get("NEXO_LOCAL_INDEX_LIVE_FILE_LIMIT", "1000") or "1000")
|
|
29
|
+
DEFAULT_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_DEFAULT_DEPTH", "24") or "24")
|
|
30
|
+
DEFAULT_EMAIL_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_EMAIL_ROOT_DEPTH", "24") or "24")
|
|
31
|
+
DEFAULT_MOUNTED_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_MOUNTED_ROOT_DEPTH", "24") or "24")
|
|
29
32
|
|
|
30
33
|
|
|
31
34
|
def ensure_ready() -> None:
|
|
@@ -91,6 +94,21 @@ def _dedupe_roots(roots: list[str]) -> list[str]:
|
|
|
91
94
|
return result
|
|
92
95
|
|
|
93
96
|
|
|
97
|
+
def _dedupe_root_specs(specs: list[tuple[str, int]]) -> list[tuple[str, int]]:
|
|
98
|
+
ordered: list[str] = []
|
|
99
|
+
depths: dict[str, int] = {}
|
|
100
|
+
for root, depth in specs:
|
|
101
|
+
normalized = norm_path(root)
|
|
102
|
+
if not normalized:
|
|
103
|
+
continue
|
|
104
|
+
if normalized not in depths:
|
|
105
|
+
ordered.append(normalized)
|
|
106
|
+
depths[normalized] = int(depth)
|
|
107
|
+
else:
|
|
108
|
+
depths[normalized] = max(depths[normalized], int(depth))
|
|
109
|
+
return [(root, depths[root]) for root in ordered]
|
|
110
|
+
|
|
111
|
+
|
|
94
112
|
def _mounted_volume_roots() -> list[str]:
|
|
95
113
|
candidates: list[Path] = []
|
|
96
114
|
if sys.platform == "darwin":
|
|
@@ -156,23 +174,45 @@ def _local_email_roots() -> list[str]:
|
|
|
156
174
|
|
|
157
175
|
|
|
158
176
|
def default_roots() -> list[str]:
|
|
177
|
+
return [root for root, _depth in default_root_specs()]
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def default_root_specs() -> list[tuple[str, int]]:
|
|
159
181
|
home = Path.home()
|
|
160
182
|
configured = os.environ.get("NEXO_LOCAL_INDEX_DEFAULT_ROOTS", "").strip()
|
|
161
183
|
if configured:
|
|
162
|
-
return
|
|
163
|
-
|
|
184
|
+
return _dedupe_root_specs(
|
|
185
|
+
[(item, DEFAULT_ROOT_DEPTH) for item in configured.split(os.pathsep) if item.strip()]
|
|
186
|
+
)
|
|
187
|
+
return _dedupe_root_specs(
|
|
188
|
+
[(str(home), DEFAULT_ROOT_DEPTH)]
|
|
189
|
+
+ [(root, DEFAULT_EMAIL_ROOT_DEPTH) for root in _local_email_roots()]
|
|
190
|
+
+ [(root, DEFAULT_MOUNTED_ROOT_DEPTH) for root in _mounted_volume_roots()]
|
|
191
|
+
)
|
|
164
192
|
|
|
165
193
|
|
|
166
194
|
def ensure_default_roots() -> dict:
|
|
167
|
-
|
|
195
|
+
existing = {row["root_path"]: row for row in list_roots()}
|
|
168
196
|
created = []
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
continue
|
|
197
|
+
updated = []
|
|
198
|
+
for root, depth in default_root_specs():
|
|
172
199
|
candidate = Path(root).expanduser()
|
|
173
|
-
if candidate.exists()
|
|
174
|
-
|
|
175
|
-
|
|
200
|
+
if not candidate.exists() or not candidate.is_dir():
|
|
201
|
+
continue
|
|
202
|
+
existing_row = existing.get(norm_path(str(candidate)))
|
|
203
|
+
if existing_row:
|
|
204
|
+
current_depth = int(existing_row.get("depth") or 0)
|
|
205
|
+
if current_depth < depth:
|
|
206
|
+
conn = _conn()
|
|
207
|
+
conn.execute(
|
|
208
|
+
"UPDATE local_index_roots SET depth=?, updated_at=? WHERE root_path=?",
|
|
209
|
+
(depth, now(), existing_row["root_path"]),
|
|
210
|
+
)
|
|
211
|
+
conn.commit()
|
|
212
|
+
updated.append({"root_path": existing_row["root_path"], "depth": depth})
|
|
213
|
+
continue
|
|
214
|
+
created.append(add_root(str(candidate), mode="normal", depth=depth))
|
|
215
|
+
return {"ok": True, "created": len(created), "updated": len(updated), "roots": list_roots()}
|
|
176
216
|
|
|
177
217
|
|
|
178
218
|
def _should_skip_mounted_root(candidate: Path) -> bool:
|
|
@@ -1348,7 +1388,7 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1348
1388
|
if job_type == "light_extraction":
|
|
1349
1389
|
text, metadata = extract_text(Path(row["path"]))
|
|
1350
1390
|
version_id = _latest_version_id(conn, asset_id)
|
|
1351
|
-
if contains_secret(text):
|
|
1391
|
+
if metadata.get("content_secret_detected") or contains_secret(text):
|
|
1352
1392
|
_mark_content_secret_assets(conn, [asset_id])
|
|
1353
1393
|
conn.execute(
|
|
1354
1394
|
"UPDATE local_index_jobs SET status='done', updated_at=?, last_error_code='content_secret_blocked' WHERE job_id=?",
|
|
@@ -41,6 +41,7 @@ SECRET_PATTERNS: tuple[re.Pattern, ...] = (
|
|
|
41
41
|
re.compile(r"\bpk-(?:[a-z]+-)?[A-Za-z0-9_\-]{20,}\b"),
|
|
42
42
|
re.compile(r"\b(ghp|gho|ghu|ghs|ghr|github_pat|glpat|xoxb|xoxp|shpat)_[A-Za-z0-9_]{16,}\b", re.I),
|
|
43
43
|
re.compile(r"\b(AKIA|ASIA)[A-Z0-9]{16,}\b"),
|
|
44
|
+
re.compile(r"\bAIza[0-9A-Za-z_-]{30,}\b"),
|
|
44
45
|
re.compile(r"\bey[A-Za-z0-9_-]{10,}\.ey[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b"),
|
|
45
46
|
re.compile(r"-----BEGIN (?:RSA |DSA |EC |OPENSSH |PGP )?PRIVATE KEY-----", re.I),
|
|
46
47
|
re.compile(r"\b([A-Z][A-Z0-9_]*(?:TOKEN|SECRET|KEY|PASSWORD|PASS)\s*[:=]\s*)['\"]?[A-Za-z0-9._/+=\-]{12,}", re.I),
|
|
@@ -290,6 +291,8 @@ def extract_text(path: Path) -> tuple[str, dict]:
|
|
|
290
291
|
text = _extract_xlsx(path)
|
|
291
292
|
else:
|
|
292
293
|
text = ""
|
|
294
|
+
if contains_secret(text):
|
|
295
|
+
metadata["content_secret_detected"] = True
|
|
293
296
|
return clean_text(text), metadata
|
|
294
297
|
|
|
295
298
|
|
|
@@ -89,6 +89,13 @@ EMAIL_ATTACHMENT_SUFFIXES = {
|
|
|
89
89
|
}
|
|
90
90
|
|
|
91
91
|
EMAIL_EXTRACTABLE_SUFFIXES = {".eml", ".emlx", ".msg"}
|
|
92
|
+
OUTLOOK_MAC_INVENTORY_SUFFIXES = {
|
|
93
|
+
".olk15message",
|
|
94
|
+
".olk15msgsource",
|
|
95
|
+
".olk15msgattach",
|
|
96
|
+
".olk15event",
|
|
97
|
+
".olk15contact",
|
|
98
|
+
}
|
|
92
99
|
|
|
93
100
|
NOISY_PARTS = {
|
|
94
101
|
"node_modules",
|
|
@@ -261,7 +268,7 @@ def is_allowed_local_email_file(path: str) -> bool:
|
|
|
261
268
|
"appdata/local/packages/microsoft.windowscommunicationsapps",
|
|
262
269
|
)
|
|
263
270
|
) or _is_inside_windows_mail_package(lowered) or _is_inside_outlook_mac_profile(lowered):
|
|
264
|
-
return suffix in {".eml", ".msg", ".pst", ".ost"}
|
|
271
|
+
return suffix in {".eml", ".msg", ".pst", ".ost"} | OUTLOOK_MAC_INVENTORY_SUFFIXES
|
|
265
272
|
if _is_under_marker(lowered, ".thunderbird") or _is_under_marker(lowered, ".mozilla-thunderbird"):
|
|
266
273
|
return suffix in {".eml", ".mbox", ""}
|
|
267
274
|
return False
|