claude-code-session-sync 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_code_session_sync-0.1.0.dist-info/METADATA +87 -0
- claude_code_session_sync-0.1.0.dist-info/RECORD +31 -0
- claude_code_session_sync-0.1.0.dist-info/WHEEL +5 -0
- claude_code_session_sync-0.1.0.dist-info/entry_points.txt +2 -0
- claude_code_session_sync-0.1.0.dist-info/licenses/LICENSE +21 -0
- claude_code_session_sync-0.1.0.dist-info/top_level.txt +1 -0
- claude_session_sync/__init__.py +11 -0
- claude_session_sync/acks.py +279 -0
- claude_session_sync/anomaly.py +161 -0
- claude_session_sync/apply.py +874 -0
- claude_session_sync/atomicio.py +621 -0
- claude_session_sync/bootstrap.py +370 -0
- claude_session_sync/canonical.py +185 -0
- claude_session_sync/classify.py +133 -0
- claude_session_sync/cli.py +1065 -0
- claude_session_sync/config.py +128 -0
- claude_session_sync/doctor.py +351 -0
- claude_session_sync/fuzzy.py +136 -0
- claude_session_sync/lineset.py +143 -0
- claude_session_sync/memory.py +953 -0
- claude_session_sync/merge.py +836 -0
- claude_session_sync/pathsafe.py +91 -0
- claude_session_sync/py.typed +0 -0
- claude_session_sync/resolve.py +226 -0
- claude_session_sync/scan.py +485 -0
- claude_session_sync/session_merge.py +214 -0
- claude_session_sync/sidecar.py +238 -0
- claude_session_sync/snapshot.py +136 -0
- claude_session_sync/state.py +240 -0
- claude_session_sync/tombstone.py +330 -0
- claude_session_sync/transfer.py +462 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""設定檔(決定 #1/#3/#4):`tomllib` 讀 + 手寫極簡 writer,零第三方相依、跨 OS。
|
|
2
|
+
|
|
3
|
+
config.toml(DESIGN §8.4):
|
|
4
|
+
own_hub = '/media/will/HomeDrive/HomeJSONL'
|
|
5
|
+
force_unsafe_lock = false # 決定 #8:不可靠 FS 預設 best-effort+偵測升級 abort;
|
|
6
|
+
# 設 true 等於明確 --force-unsafe-lock 永久版
|
|
7
|
+
[remotes]
|
|
8
|
+
office = '/media/will/HomeDrive/OfficeJSONL'
|
|
9
|
+
|
|
10
|
+
跨路徑 local↔hub 綁定(A17.4)放 state.json,不在這裡。
|
|
11
|
+
寫入用 **TOML literal string(單引號)** 容納 Windows 反斜線路徑,免轉義(決定 #4 須測)。
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
import tomllib
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
APP = "claude-session-sync"
|
|
22
|
+
_BARE_KEY = re.compile(r"^[A-Za-z0-9_-]+$")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ConfigError(Exception):
|
|
26
|
+
"""config.toml 型別/結構不符。寧可明確報錯,也不靜默吃進危險值(如把 "false" 當 True)。"""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def default_config_path() -> Path:
|
|
30
|
+
"""跨 OS 設定路徑:POSIX 走 XDG_CONFIG_HOME/~/.config;Windows 走 %APPDATA%。"""
|
|
31
|
+
if os.name == "nt":
|
|
32
|
+
base = os.environ.get("APPDATA") or str(Path.home() / "AppData" / "Roaming")
|
|
33
|
+
else:
|
|
34
|
+
base = os.environ.get("XDG_CONFIG_HOME") or str(Path.home() / ".config")
|
|
35
|
+
return Path(base) / APP / "config.toml"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class Config:
|
|
40
|
+
own_hub: str | None = None
|
|
41
|
+
remotes: dict[str, str] = field(default_factory=dict)
|
|
42
|
+
force_unsafe_lock: bool = False
|
|
43
|
+
path: str | None = None # 載入來源(save 預設寫回這裡)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _toml_str(v: str) -> str:
|
|
47
|
+
"""序列化字串值。優先 literal string(單引號)原樣容納反斜線;含單引號/換行/控制字元才退
|
|
48
|
+
basic string 並完整轉義(含 \\uXXXX 控制字元),避免寫出讀不回的 TOML。"""
|
|
49
|
+
has_ctrl = any(ord(c) < 0x20 for c in v)
|
|
50
|
+
if "'" not in v and not has_ctrl and "\n" not in v and "\r" not in v:
|
|
51
|
+
return f"'{v}'"
|
|
52
|
+
out = []
|
|
53
|
+
for c in v:
|
|
54
|
+
if c == "\\":
|
|
55
|
+
out.append("\\\\")
|
|
56
|
+
elif c == '"':
|
|
57
|
+
out.append('\\"')
|
|
58
|
+
elif c == "\n":
|
|
59
|
+
out.append("\\n")
|
|
60
|
+
elif c == "\r":
|
|
61
|
+
out.append("\\r")
|
|
62
|
+
elif c == "\t":
|
|
63
|
+
out.append("\\t")
|
|
64
|
+
elif ord(c) < 0x20:
|
|
65
|
+
out.append(f"\\u{ord(c):04X}")
|
|
66
|
+
else:
|
|
67
|
+
out.append(c)
|
|
68
|
+
return '"' + "".join(out) + '"'
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _toml_key(name: str) -> str:
|
|
72
|
+
"""bare key([A-Za-z0-9_-])直接用;否則用 quoted key,避免含點/空白破壞結構。"""
|
|
73
|
+
return name if _BARE_KEY.match(name) else _toml_str(name)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def to_toml(c: Config) -> str:
|
|
77
|
+
lines: list[str] = []
|
|
78
|
+
if c.own_hub is not None:
|
|
79
|
+
lines.append(f"own_hub = {_toml_str(c.own_hub)}")
|
|
80
|
+
lines.append(f"force_unsafe_lock = {'true' if c.force_unsafe_lock else 'false'}")
|
|
81
|
+
if c.remotes:
|
|
82
|
+
lines.append("")
|
|
83
|
+
lines.append("[remotes]")
|
|
84
|
+
for name in sorted(c.remotes):
|
|
85
|
+
lines.append(f"{_toml_key(name)} = {_toml_str(c.remotes[name])}")
|
|
86
|
+
return "\n".join(lines) + "\n"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def load(path: str | os.PathLike | None = None) -> Config:
|
|
90
|
+
"""讀設定。檔不存在 → 回空 Config(不報錯;首次 config set 才會建檔)。"""
|
|
91
|
+
p = Path(path) if path is not None else default_config_path()
|
|
92
|
+
if not p.exists():
|
|
93
|
+
return Config(path=str(p))
|
|
94
|
+
with open(p, "rb") as f:
|
|
95
|
+
try:
|
|
96
|
+
data = tomllib.load(f)
|
|
97
|
+
except Exception as e: # noqa: BLE001
|
|
98
|
+
raise ConfigError(f"config.toml 無法解析:{e}") from e
|
|
99
|
+
|
|
100
|
+
own_hub = data.get("own_hub")
|
|
101
|
+
if own_hub is not None and not isinstance(own_hub, str):
|
|
102
|
+
raise ConfigError("own_hub 必須是字串")
|
|
103
|
+
|
|
104
|
+
ful = data.get("force_unsafe_lock", False)
|
|
105
|
+
if not isinstance(ful, bool): # 注意:TOML 的 "false"(字串) 不是 bool → 擋下,不靜默變 True
|
|
106
|
+
raise ConfigError('force_unsafe_lock 必須是布林 true/false(不可加引號)')
|
|
107
|
+
|
|
108
|
+
remotes_raw = data.get("remotes", {})
|
|
109
|
+
if not isinstance(remotes_raw, dict):
|
|
110
|
+
raise ConfigError("remotes 必須是表(table)")
|
|
111
|
+
remotes: dict[str, str] = {}
|
|
112
|
+
for k, v in remotes_raw.items():
|
|
113
|
+
if not isinstance(v, str):
|
|
114
|
+
raise ConfigError(f"remote '{k}' 的值必須是字串路徑")
|
|
115
|
+
remotes[str(k)] = v
|
|
116
|
+
|
|
117
|
+
return Config(own_hub=own_hub, remotes=remotes, force_unsafe_lock=ful, path=str(p))
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def save(c: Config, path: str | os.PathLike | None = None) -> str:
|
|
121
|
+
"""原子寫(同目錄 temp + os.replace)。回寫出的路徑。"""
|
|
122
|
+
p = Path(path) if path is not None else Path(c.path or default_config_path())
|
|
123
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
124
|
+
tmp = p.with_name(f"{p.name}.{os.getpid()}.tmp")
|
|
125
|
+
tmp.write_text(to_toml(c), encoding="utf-8")
|
|
126
|
+
os.replace(tmp, p) # 同目錄 rename:POSIX 原子;Windows os.replace 可覆蓋
|
|
127
|
+
c.path = str(p)
|
|
128
|
+
return str(p)
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
"""doctor:維護工具——唯讀診斷 + `--rebuild-state` + `--break-lock`(DESIGN §8.5/§9/A6/A15)。
|
|
2
|
+
|
|
3
|
+
- **診斷(無參)**:掛載/FS 可靠度/state 狀態/per-project coverage/同側 casefold 撞名/lock 概況。**純唯讀**。
|
|
4
|
+
- **`--rebuild-state`**:state 損壞/遺失時由**磁碟**重建(§8.5)。hub 側基線(known_sessions + hub_fingerprint)
|
|
5
|
+
對所有 **已 coverage-initialized** 專案無條件重建;local 側(local_sessions/bindings/local_dir_bindings)
|
|
6
|
+
需 `--map`(無 `_project.json` 時 git 指紋無法配對,決定 #7 不弱猜)→ 未 map 的專案 local 基線留空(下次
|
|
7
|
+
sync 對該專案 present=hub 走 blocked-no-local-baseline,fail-closed,不復活)。**永不**讀寫/重建 tombstone
|
|
8
|
+
(只從 hub 讀以排除已刪 sid),故 rebuild 不丟 tombstone、不復活已刪(§14 DoD)。preview 預設、`--yes` 落地。
|
|
9
|
+
- **`--break-lock`**:列出 `*.lock`;`--yes` 只移除「**同 host 且 PID 已死**」的 stale 鎖(移除前再驗一次)。
|
|
10
|
+
跨 host / 仍存活 / 無法解析 → **不自動刪**(網路 FS 不可信 PID/時鐘,A6),列出交人工確認後手動刪。
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
from collections import Counter
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
from . import acks, anomaly, atomicio, scan, state as state_mod, tombstone
|
|
20
|
+
from .config import Config
|
|
21
|
+
from .state import State
|
|
22
|
+
|
|
23
|
+
_LOCK_SUFFIX = ".lock"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _safe_name(name: str) -> bool:
|
|
27
|
+
"""`--map` 夾名須是 root 底下單一安全夾名(非空、無分隔、非 . / ..、非絕對),擋逃出信任根(比照 bootstrap)。"""
|
|
28
|
+
return bool(name) and name == Path(name).name and name not in (".", "..")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _safe_dir(root: Path, d: Path) -> bool:
|
|
32
|
+
"""專案夾須真的在 root 內(非 symlink、解析後落在 root 內)→ 委派 `scan._safe_project_dir`(單一真相源,
|
|
33
|
+
與 transfer/bootstrap 共用同一把逃逸檢查;e2e 整合審消除各自實作的重複與漂移)。"""
|
|
34
|
+
return scan._safe_project_dir(root, d)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def hub_project_dirs(hub_root) -> list[Path]:
|
|
38
|
+
"""hub 底下**安全**(非 symlink/逃逸)的專案夾,排序。供 A15 ack/unack/show 列舉 ledger(與 diagnose 同一把
|
|
39
|
+
`_safe_dir` 過濾,不跟隨逃逸夾)。hub 不存在/非目錄 → 空。"""
|
|
40
|
+
hub_root = Path(hub_root)
|
|
41
|
+
if not hub_root.is_dir():
|
|
42
|
+
return []
|
|
43
|
+
return sorted(d for d in hub_root.iterdir() if d.is_dir() and _safe_dir(hub_root, d))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ── 診斷(唯讀)────────────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class DoctorReport:
|
|
50
|
+
lines: list[str] = field(default_factory=list)
|
|
51
|
+
problems: int = 0
|
|
52
|
+
|
|
53
|
+
def ok(self, msg: str) -> None:
|
|
54
|
+
self.lines.append(f" ✓ {msg}")
|
|
55
|
+
|
|
56
|
+
def warn(self, msg: str) -> None:
|
|
57
|
+
self.lines.append(f" ⚠ {msg}")
|
|
58
|
+
self.problems += 1
|
|
59
|
+
|
|
60
|
+
def info(self, msg: str) -> None:
|
|
61
|
+
self.lines.append(f" · {msg}")
|
|
62
|
+
|
|
63
|
+
def head(self, msg: str) -> None:
|
|
64
|
+
self.lines.append(f"\n{msg}")
|
|
65
|
+
|
|
66
|
+
def text(self) -> str:
|
|
67
|
+
return "\n".join(self.lines) if self.lines else "(無)"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _assess(report: DoctorReport, label: str, d: Path) -> None:
|
|
71
|
+
"""**唯讀**檢查存在/目錄/可寫(`os.access`,不寫探測檔,故 diagnose 真正無副作用,codex r-doctor-1)。
|
|
72
|
+
FS crash-safe 可靠度需寫探測,留給實際 sync 的 assess_fs 警告,doctor 不在此寫任何東西。"""
|
|
73
|
+
if not d.exists():
|
|
74
|
+
report.warn(f"{label}:不存在 {d}")
|
|
75
|
+
elif not d.is_dir():
|
|
76
|
+
report.warn(f"{label}:非目錄 {d}")
|
|
77
|
+
elif os.access(d, os.W_OK):
|
|
78
|
+
report.ok(f"{label}:存在可寫 {d}")
|
|
79
|
+
else:
|
|
80
|
+
report.warn(f"{label}:無寫入權限 {d}")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def diagnose(local_root, hub_root, state_path, config: Config | None = None) -> DoctorReport:
|
|
84
|
+
"""唯讀健康檢查。不改任何檔。"""
|
|
85
|
+
local_root, hub_root = Path(local_root), Path(hub_root)
|
|
86
|
+
r = DoctorReport()
|
|
87
|
+
|
|
88
|
+
r.head("掛載 / 檔案系統")
|
|
89
|
+
_assess(r, "hub", hub_root)
|
|
90
|
+
_assess(r, "local", local_root)
|
|
91
|
+
_assess(r, "state 目錄", Path(state_path).parent)
|
|
92
|
+
|
|
93
|
+
r.head("state")
|
|
94
|
+
try:
|
|
95
|
+
st = state_mod.load_or_none(state_path)
|
|
96
|
+
if st is None:
|
|
97
|
+
r.info("state.json 不存在(首次同步前正常;已同步過則異常 → 可 --rebuild-state)")
|
|
98
|
+
else:
|
|
99
|
+
r.ok(f"state.json 正常({len(st.known_sessions)} 專案 known、{len(st.local_sessions)} 專案 local 基線)")
|
|
100
|
+
except state_mod.StateCorruptError as e:
|
|
101
|
+
st = None
|
|
102
|
+
r.warn(f"state.json 損壞:{e} → 可 doctor --rebuild-state")
|
|
103
|
+
|
|
104
|
+
if st is not None and hub_root.exists():
|
|
105
|
+
for a in anomaly.check(st, hub_root):
|
|
106
|
+
(r.warn if a.severity == "halt" else r.info)(f"anomaly {a.code}: {a.message}")
|
|
107
|
+
|
|
108
|
+
r.head("hub 專案")
|
|
109
|
+
if hub_root.is_dir():
|
|
110
|
+
for hd in sorted(d for d in hub_root.iterdir() if d.is_dir() and _safe_dir(hub_root, d)):
|
|
111
|
+
stems = list(scan._session_files(hd))
|
|
112
|
+
cov = "已bootstrap" if tombstone.is_initialized(hd) else "未bootstrap"
|
|
113
|
+
dup = scan._collision_casefolds(stems, [])
|
|
114
|
+
note = f",⚠ casefold 撞名 {len(dup)}" if dup else ""
|
|
115
|
+
tn = len(tombstone.read_tombstones(hd))
|
|
116
|
+
# A15:diagnose **只 surface ack 記錄數 / 壞帳本警告,不據此降級撞名**。diagnose 是 hub-only 檢查、看不到
|
|
117
|
+
# local 端,無法安全驗證 merged 撞名的 ack 是否仍成立——若「acked 後 local 又新增同 casefold 拼法」,
|
|
118
|
+
# 該撞名在 sync 已因指紋不符重報,但 diagnose 用舊 hub 集合仍命中舊 ack → 誤把真撞名降級(R1 High#2)。
|
|
119
|
+
# 故 diagnose 誠實計為問題;ack-aware 隱藏交給看得到 merged 證據的 sync/status 與 doctor --show-acked。
|
|
120
|
+
led = acks.load_ledger(hd)
|
|
121
|
+
if led.by_key:
|
|
122
|
+
note += f"({len(led.by_key)} 筆 ack;sync 依此隱藏、doctor --show-acked 可查)"
|
|
123
|
+
r.info(f"{hd.name}:{len(stems)} session、{cov}、tombstone {tn}{note}")
|
|
124
|
+
if dup:
|
|
125
|
+
r.problems += 1
|
|
126
|
+
if not led.ok:
|
|
127
|
+
r.warn(f"{hd.name}:acks.json 損壞(已忽略、全部照常回報)")
|
|
128
|
+
else:
|
|
129
|
+
r.warn("hub 不存在,無法列專案")
|
|
130
|
+
|
|
131
|
+
r.head("鎖(*.lock)")
|
|
132
|
+
# 只遞迴掃 hub(per-session 鎖在那)+ **明確的** state 鎖檔;不遞迴掃 state 整個父夾(否則會列到
|
|
133
|
+
# 該夾下無關的 *.lock,codex r-doctor-3)。
|
|
134
|
+
locks = find_locks([hub_root], [Path(str(state_path) + _LOCK_SUFFIX)])
|
|
135
|
+
if not locks:
|
|
136
|
+
r.ok("無殘留鎖")
|
|
137
|
+
for lk in locks:
|
|
138
|
+
(r.warn if lk.status in ("stale", "foreign", "unparseable") else r.info)(
|
|
139
|
+
f"[{lk.status}] {atomicio._disp(lk.path)}(host={atomicio._disp(lk.host)} pid={atomicio._disp(lk.pid)})")
|
|
140
|
+
return r
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# ── 鎖檢查 / break-lock ─────────────────────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
@dataclass
|
|
146
|
+
class LockEntry:
|
|
147
|
+
path: Path
|
|
148
|
+
status: str # held(同host存活)/ stale(同host已死)/ foreign(他host)/ unparseable
|
|
149
|
+
host: str | None
|
|
150
|
+
pid: int | None
|
|
151
|
+
token: str | None = None # 列出時捕捉的唯一憑證;break-lock unlink 前據此確認仍是同一把鎖(擋 check→unlink race)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _classify_lock(lock_path: Path) -> LockEntry:
|
|
155
|
+
"""讀一個 .lock 檔判定狀態。借用 FileLock 的 _read_info/_is_stale(同套規則,不漂移)。"""
|
|
156
|
+
resource = str(lock_path)[: -len(_LOCK_SUFFIX)]
|
|
157
|
+
fl = atomicio.FileLock(resource)
|
|
158
|
+
info = fl._read_info()
|
|
159
|
+
tok = info.token
|
|
160
|
+
if info.host is None and info.pid is None:
|
|
161
|
+
return LockEntry(lock_path, "unparseable", info.host, info.pid, tok)
|
|
162
|
+
if fl._is_stale(info):
|
|
163
|
+
return LockEntry(lock_path, "stale", info.host, info.pid, tok)
|
|
164
|
+
status = "held" if info.host == atomicio._local_host() else "foreign"
|
|
165
|
+
return LockEntry(lock_path, status, info.host, info.pid, tok)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def find_locks(scan_roots, lock_paths=()) -> list[LockEntry]:
|
|
169
|
+
"""分類鎖:`scan_roots` **遞迴**找 *.lock(hub);`lock_paths` 為**明確**的 .lock 檔(如 state 鎖,避免
|
|
170
|
+
遞迴掃到無關目錄,codex r-doctor-3)。排序穩定、去重。"""
|
|
171
|
+
out: list[LockEntry] = []
|
|
172
|
+
seen: set[Path] = set()
|
|
173
|
+
candidates: list[Path] = []
|
|
174
|
+
for root in scan_roots:
|
|
175
|
+
root = Path(root)
|
|
176
|
+
if not root.is_dir():
|
|
177
|
+
continue
|
|
178
|
+
# **不 rglob**(rglob 會遞迴進巢狀逃逸 junction、讀界外目錄 entries,e2e gate2 #5 / gate3 #5):工具的鎖只在
|
|
179
|
+
# 兩個已知位置——專案夾根(`<sid>.jsonl.lock`)與 `.tombstones/`(`memory.lock` 等)。只掃安全專案夾的這兩處
|
|
180
|
+
# (`_list_project_dirs` 已濾逃逸;`.tombstones` 另驗非逃逸),並以 `_within_root` 過濾。
|
|
181
|
+
safe_dirs, _ = scan._list_project_dirs(root)
|
|
182
|
+
for sd in safe_dirs:
|
|
183
|
+
candidates.extend(p for p in sorted(sd.glob("*" + _LOCK_SUFFIX)) if scan._within_root(root, p))
|
|
184
|
+
tdir = sd / tombstone.TOMB_DIR
|
|
185
|
+
if scan._safe_project_dir(sd, tdir):
|
|
186
|
+
candidates.extend(p for p in sorted(tdir.glob("*" + _LOCK_SUFFIX)) if scan._within_root(root, p))
|
|
187
|
+
# root 頂層直屬的 .lock(不在專案夾內):glob(非 rglob)+ 過濾。
|
|
188
|
+
candidates.extend(p for p in sorted(root.glob("*" + _LOCK_SUFFIX)) if scan._within_root(root, p))
|
|
189
|
+
candidates.extend(Path(lp) for lp in lock_paths) # 明確鎖(state 鎖)為信任、不過濾
|
|
190
|
+
for p in candidates:
|
|
191
|
+
if p in seen or not p.is_file():
|
|
192
|
+
continue
|
|
193
|
+
seen.add(p)
|
|
194
|
+
out.append(_classify_lock(p))
|
|
195
|
+
return out
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@dataclass
|
|
199
|
+
class BreakReport:
|
|
200
|
+
removed: list[str] = field(default_factory=list)
|
|
201
|
+
kept: list[str] = field(default_factory=list) # (path, 原因)
|
|
202
|
+
errors: list[str] = field(default_factory=list) # 移除失敗(呼叫端據此非零退出,codex r-doctor-4)
|
|
203
|
+
lines: list[str] = field(default_factory=list)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def break_locks(scan_roots, lock_paths=(), *, apply: bool) -> BreakReport:
|
|
207
|
+
"""列出鎖;apply=True 時**只**移除「同 host 且 PID 已死」的 stale 鎖。跨 host / 存活 / 無法解析一律保留,
|
|
208
|
+
列出交人工(不自動信 PID/時鐘,A6)。
|
|
209
|
+
|
|
210
|
+
移除前**再讀一次**且要求「仍是 stale **且 token 與列出時相同**」才 unlink——`os.unlink(path)` 刪的是
|
|
211
|
+
路徑當下的檔、非剛才驗過的那個 inode(與 `FileLock.release` 的 token 身分核對同一紀律)。
|
|
212
|
+
|
|
213
|
+
**單一 break-lock 呼叫在此已完全安全**:foreign stale 鎖只會被 break_locks 移除(`FileLock.acquire`
|
|
214
|
+
遇 stale 一律 raise、**永不自動奪鎖**;`release` 只憑 token 刪自己那把)。故只要沒有第二個 break-lock
|
|
215
|
+
介入,check→unlink 間該路徑不會被清空、writer 也無法在原地 O_EXCL 重建活鎖 → unlink 必定只刪到剛驗過
|
|
216
|
+
的那把 stale 鎖(即使被排程 preempt 任意久亦然)。
|
|
217
|
+
**有界殘留(單一操作者約束)**:若**同一 hub 同時跑兩個 `break-lock --yes`**——B 在 A 的 check→unlink 窗內
|
|
218
|
+
(此窗可被排程任意拉長、**非** µs)刪掉 stale 鎖、writer C 立刻重取一把活鎖,A 之後的 unlink 可能誤刪 C 的
|
|
219
|
+
活鎖=雙 writer。故 break-lock 是**單一操作者的復原指令**:勿並行、勿於 sync 進行中執行(CLI 另有提醒;
|
|
220
|
+
docs 原就要求「確認無其他同步在跑」)。此殘留與整套 O_EXCL+token 鎖在跨機網路 hub 上的可靠度同界,
|
|
221
|
+
非對抗、可由操作紀律避免——codex breaklock-r1/r2 之 High 收斂取捨(未上 hot-path maintenance lock)。"""
|
|
222
|
+
rep = BreakReport()
|
|
223
|
+
for lk in find_locks(scan_roots, lock_paths):
|
|
224
|
+
# 顯示用(中和 malformed 鎖的 surrogate/控制字元,免 print 崩);unlink/removed/kept/errors 仍用**原始** lk.path。
|
|
225
|
+
pa, h, pd = atomicio._disp(lk.path), atomicio._disp(lk.host), atomicio._disp(lk.pid)
|
|
226
|
+
if lk.status == "stale" and apply:
|
|
227
|
+
resource = str(lk.path)[: -len(_LOCK_SUFFIX)]
|
|
228
|
+
fl = atomicio.FileLock(resource)
|
|
229
|
+
fresh = fl._read_info()
|
|
230
|
+
# 仍 stale 且仍是「剛才列出的同一把鎖」(token 相同)才刪;否則疑被重取成活鎖 → 不動。
|
|
231
|
+
if fl._is_stale(fresh) and fresh.token == lk.token:
|
|
232
|
+
try:
|
|
233
|
+
os.unlink(lk.path)
|
|
234
|
+
rep.removed.append(str(lk.path))
|
|
235
|
+
rep.lines.append(f" ✓ 已移除 stale 鎖:{pa}(host={h} pid={pd} 已死)")
|
|
236
|
+
continue
|
|
237
|
+
except OSError as e:
|
|
238
|
+
rep.errors.append(str(lk.path))
|
|
239
|
+
rep.lines.append(f" ⚠ 移除失敗:{pa}:{atomicio._disp(e)}")
|
|
240
|
+
else:
|
|
241
|
+
rep.lines.append(f" · 取消移除(鎖狀態已變,疑被重取):{pa}")
|
|
242
|
+
verb = "可移除(--yes)" if lk.status == "stale" else "保留(人工確認後手動刪)"
|
|
243
|
+
if lk.status == "stale" and not apply:
|
|
244
|
+
rep.kept.append(str(lk.path))
|
|
245
|
+
rep.lines.append(f" · [stale] {pa}(host={h} pid={pd} 已死)→ {verb}")
|
|
246
|
+
elif lk.status != "stale":
|
|
247
|
+
rep.kept.append(str(lk.path))
|
|
248
|
+
reason = {"held": "同機進程持有中(存活)", "foreign": "他機持有,無法判存活",
|
|
249
|
+
"unparseable": "內容無法解析"}.get(lk.status, lk.status)
|
|
250
|
+
rep.lines.append(f" · [{lk.status}] {pa}(host={h} pid={pd})→ 保留:{reason}")
|
|
251
|
+
if not rep.lines:
|
|
252
|
+
rep.lines.append(" ✓ 無殘留鎖")
|
|
253
|
+
return rep
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
# ── rebuild-state ───────────────────────────────────────────────────────────
|
|
257
|
+
|
|
258
|
+
@dataclass
|
|
259
|
+
class RebuildResult:
|
|
260
|
+
state: State
|
|
261
|
+
lines: list[str] = field(default_factory=list)
|
|
262
|
+
fatal: bool = False # hub 不存在/非目錄 → 無法重建,呼叫端**不可**落地(否則寫出空 state)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _live_stems(stems_dir: Path, tomb_dir: Path | None = None) -> set[str]:
|
|
266
|
+
"""stems_dir 現有 session stem 扣掉**已 tombstone** 者(基線只記活的;tombstone 永不重建、只讀)。
|
|
267
|
+
|
|
268
|
+
tombstone 為 **hub 所有**(codex r-doctor-2):hub 側 stems_dir==tomb_dir;**local 側 tomb_dir 須傳對應
|
|
269
|
+
hub 夾**(local 夾本身沒有 tombstone,否則 local 端會漏扣 hub 已刪者)。"""
|
|
270
|
+
tomb_dir = tomb_dir if tomb_dir is not None else stems_dir
|
|
271
|
+
stems = set(scan._session_files(stems_dir))
|
|
272
|
+
tombs = {t for (k, t) in tombstone.read_tombstones(tomb_dir) if k == "session"}
|
|
273
|
+
return stems - tombs
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def rebuild_state(
|
|
277
|
+
local_root, hub_root, *, mappings: dict[str, str] | None = None,
|
|
278
|
+
identity_fn=None,
|
|
279
|
+
) -> RebuildResult:
|
|
280
|
+
"""由磁碟重建一份**全新** State(不讀舊 state,故損壞亦可救)。hub 側無條件、local 側需 --map。
|
|
281
|
+
永不讀寫 tombstone(只讀以排除已刪 sid)。回 RebuildResult(state + 預覽行);落地由呼叫端加鎖寫。"""
|
|
282
|
+
local_root, hub_root = Path(local_root), Path(hub_root)
|
|
283
|
+
mappings = mappings or {}
|
|
284
|
+
st = State()
|
|
285
|
+
res = RebuildResult(state=st)
|
|
286
|
+
|
|
287
|
+
if not hub_root.is_dir():
|
|
288
|
+
res.fatal = True
|
|
289
|
+
res.lines.append(f"⚠ hub 不存在或非目錄:{hub_root} → 無法重建(不寫 state)")
|
|
290
|
+
return res
|
|
291
|
+
st.hub_fingerprint = anomaly.hub_fingerprint(hub_root)
|
|
292
|
+
|
|
293
|
+
# 排除 symlink/逃逸的專案夾(不從 root 外的夾建基線,codex r-doctor-2)。
|
|
294
|
+
hub_dirs = [d for d in sorted(hub_root.iterdir()) if d.is_dir() and _safe_dir(hub_root, d)]
|
|
295
|
+
initialized = {hd.name: hd for hd in hub_dirs if tombstone.is_initialized(hd)}
|
|
296
|
+
for pk, hd in initialized.items():
|
|
297
|
+
# 不可列舉 hub 夾 fail-stop(e2e gate11 finding2):`_live_stems`→`_session_files` glob **fail-open** 回空 →
|
|
298
|
+
# 基線漏現存 session → 日後復活。可讀但真空 → 照常。(.tombstones/ 不可列舉已由 is_initialized False 濾掉。)
|
|
299
|
+
if not scan._dir_scannable(hd):
|
|
300
|
+
res.lines.append(f" ⚠ hub 專案夾不可列舉(權限)→ 略過基線(fail-closed):{pk}")
|
|
301
|
+
continue
|
|
302
|
+
st.known_sessions[pk] = _live_stems(hd)
|
|
303
|
+
res.lines.append(f" · hub 基線 {pk}:known={len(st.known_sessions[pk])}")
|
|
304
|
+
skipped = [hd.name for hd in hub_dirs if hd.name not in initialized]
|
|
305
|
+
if skipped:
|
|
306
|
+
res.lines.append(f" · 略過未 bootstrap 的 hub 專案(不重建基線):{', '.join(skipped)}")
|
|
307
|
+
|
|
308
|
+
# local 側:僅 --map 明示(local夾名=hub夾名)。未 map → 該專案無 local 基線(下次 sync fail-closed)。
|
|
309
|
+
# 先驗夾名安全(擋 ../ 絕對路徑逃出 root,codex r-doctor-3)+ 拒多 local 對同一 hub(避免互覆)。
|
|
310
|
+
valid_map: dict[str, str] = {}
|
|
311
|
+
for ln, hn in sorted(mappings.items()):
|
|
312
|
+
if not _safe_name(ln) or not _safe_name(hn):
|
|
313
|
+
res.lines.append(f" ⚠ --map {ln}={hn}:夾名不安全(須單一夾名),略過")
|
|
314
|
+
continue
|
|
315
|
+
valid_map[ln] = hn
|
|
316
|
+
dup_hub = {h for h, c in Counter(valid_map.values()).items() if c > 1}
|
|
317
|
+
for local_name, hub_name in valid_map.items():
|
|
318
|
+
if hub_name in dup_hub:
|
|
319
|
+
res.lines.append(f" ⚠ --map …={hub_name}:多個 local 對到同一 hub 夾 → 全數略過 local 基線")
|
|
320
|
+
continue
|
|
321
|
+
ld = local_root / local_name
|
|
322
|
+
hd = initialized.get(hub_name)
|
|
323
|
+
if hd is None:
|
|
324
|
+
res.lines.append(f" ⚠ --map {local_name}={hub_name}:hub 專案不存在或未 bootstrap,略過 local 基線")
|
|
325
|
+
continue
|
|
326
|
+
if not ld.is_dir() or not _safe_dir(local_root, ld) or not scan._dir_scannable(ld):
|
|
327
|
+
res.lines.append(f" ⚠ --map {local_name}={hub_name}:local 夾不存在/非目錄/symlink逃逸/不可列舉 {ld},略過 local 基線")
|
|
328
|
+
continue
|
|
329
|
+
st.local_sessions[hub_name] = _live_stems(ld, hd) # tombstone 由 **hub** 夾讀(codex r-doctor-2)
|
|
330
|
+
st.local_dir_bindings[local_name] = hub_name
|
|
331
|
+
cwds = scan._project_cwds(ld)
|
|
332
|
+
if len(cwds) == 1:
|
|
333
|
+
st.bindings[next(iter(cwds))] = hub_name
|
|
334
|
+
res.lines.append(f" · local 基線 {local_name}→{hub_name}:local={len(st.local_sessions[hub_name])}")
|
|
335
|
+
|
|
336
|
+
no_local = sorted(set(initialized) - set(st.local_sessions))
|
|
337
|
+
if no_local:
|
|
338
|
+
res.lines.append(" · 無 local 基線(需 --map 才能對該專案雙向同步;否則 present=hub 走 "
|
|
339
|
+
f"blocked-no-local-baseline):{', '.join(no_local)}")
|
|
340
|
+
return res
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def write_rebuilt_state(result: RebuildResult, state_path, *, lock_timeout_s: float = 5.0) -> str:
|
|
344
|
+
"""把重建的 State **覆寫**落地(加鎖;不讀舊內容,故損壞 state 亦可救)。回路徑。"""
|
|
345
|
+
p = Path(state_path)
|
|
346
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
347
|
+
lock = atomicio.FileLock(p).acquire_blocking(timeout_s=lock_timeout_s)
|
|
348
|
+
try:
|
|
349
|
+
return state_mod.save(result.state, p)
|
|
350
|
+
finally:
|
|
351
|
+
lock.release()
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""fuzzy:memory「同事實、不同檔名」的**模糊近似候選偵測**(P2,最高風險——動「永不把兩則語意不同的
|
|
2
|
+
memory 判同」的 cardinal 不變量)。
|
|
3
|
+
|
|
4
|
+
定位(DESIGN §7「不同檔名、同一事實」+ HANDOFF P2):
|
|
5
|
+
- exact 層(`memory.plan_memory_pair` duty a/b)只認 frontmatter `name` **完全相同**的跨檔身分;兩台機器各自
|
|
6
|
+
為同一件事取了**不同 slug**(如 `codex-run-stall-handling` vs `codex-stall-triage`)時,exact 層看不出關聯 →
|
|
7
|
+
兩則都留、互不干涉(**安全、不丟資料**,但使用者不會被提醒「這兩則其實同一件事、可考慮合併」)。
|
|
8
|
+
- 本模組補這個洞:以**純字面**(決定性、零第三方相依 → 跨機可重現;**不用 embedding/ML**)算 name slug 詞元
|
|
9
|
+
+ description 詞元的相似度,把疑似同一事實的**候選對**列出交人複核。
|
|
10
|
+
|
|
11
|
+
**極性鐵則(cardinal)**:本模組**永遠只建議、絕不裁定**。fuzzy 分數**不進** `classify`/`apply`/`sync`、**絕不**
|
|
12
|
+
自動合併或改寫任何 memory——誤判(把兩則不同事實判「疑似同一」)在此**只多印一行提示**、零資料危害。真正的
|
|
13
|
+
保留兩版/合併一律由使用者逐對放行後才發生(memory-merge 的 stage/interactive=後續 Block B)。本模組本身
|
|
14
|
+
**不做任何 I/O**(純函式;讀檔在 CLI,餵進 `FuzzyEntry`)。
|
|
15
|
+
|
|
16
|
+
**訊號選擇(evidence-based,2026-07-04 對使用者真實 memory 實測)**:name slug 詞元是**主**訊號(同一事實兩台常
|
|
17
|
+
重用關鍵名詞〔`codex`、`stall`〕),description 詞元為**次**;**刻意不比 body**——實測「換句話說」的重複其 body
|
|
18
|
+
字元 n-gram 幾乎不重疊(真重複 body 相似度≈0),反而不相干的兩則因共用領域詞彙 body 分數更高(訊號是反的)→
|
|
19
|
+
比 body 只會增誤報。故只用 name+desc,權重 0.7/0.3;真重複與雜訊的天生間距窄(實測真重複≈0.29 vs 最高雜訊≈0.11)
|
|
20
|
+
→ 閾值保守 + **一律 advisory + 人工確認** 是數字逼出來的必要條件,非潔癖。
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import re
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
|
|
27
|
+
from . import scan
|
|
28
|
+
|
|
29
|
+
# name/desc 加權與預設閾值(evidence-based,見模組 docstring)。閾值可由 CLI `--fuzzy-threshold` 覆寫供校準。
|
|
30
|
+
WEIGHT_NAME = 0.7
|
|
31
|
+
WEIGHT_DESC = 0.3
|
|
32
|
+
DEFAULT_THRESHOLD = 0.25
|
|
33
|
+
|
|
34
|
+
# name slug 分詞:slug 合法字元為 [A-Za-z0-9_.-](見 `memory._SLUG_RE`)→ 以 - _ . 斷詞。
|
|
35
|
+
_SLUG_SPLIT = re.compile(r"[-_.]+")
|
|
36
|
+
# description 分詞:拉丁/數字連續段 + 個別 CJK 字(description 常中英混)。CJK bigram 可提升召回,屬未來調校、
|
|
37
|
+
# 現以單字保守(union 較大 → Jaccard 偏低 → 偏少誤報,符合 advisory 安全方向)。
|
|
38
|
+
_LATIN_RUN = re.compile(r"[a-z0-9]+")
|
|
39
|
+
_CJK = re.compile(r"[㐀-鿿]")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _norm(s: str) -> str:
|
|
43
|
+
"""caseless + Unicode 正規化(復用 `scan._name_key`=NFC∘casefold∘NFC,全 codebase 單一正規化真相源——與
|
|
44
|
+
檔名別名/memory-merge 路徑包含判定同源,免各自實作漂移)。"""
|
|
45
|
+
return scan._name_key(s)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def name_tokens(name: str | None) -> frozenset[str]:
|
|
49
|
+
"""name slug → 正規化詞元集。None/空 → 空集。"""
|
|
50
|
+
if not name:
|
|
51
|
+
return frozenset()
|
|
52
|
+
return frozenset(t for t in _SLUG_SPLIT.split(_norm(name)) if t)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def desc_tokens(desc: str | None) -> frozenset[str]:
|
|
56
|
+
"""description → 正規化詞元集(拉丁/數字段 + 個別 CJK 字)。None/空 → 空集。"""
|
|
57
|
+
if not desc:
|
|
58
|
+
return frozenset()
|
|
59
|
+
low = _norm(desc)
|
|
60
|
+
return frozenset(_LATIN_RUN.findall(low)) | frozenset(_CJK.findall(low))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def jaccard(a: frozenset[str], b: frozenset[str]) -> float:
|
|
64
|
+
"""|a∩b| / |a∪b|。任一側空 → 0.0(無可比詞元 → 不判相似,fail-open-to-not-similar=安全方向)。"""
|
|
65
|
+
if not a or not b:
|
|
66
|
+
return 0.0
|
|
67
|
+
return len(a & b) / len(a | b)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(frozen=True)
|
|
71
|
+
class FuzzyEntry:
|
|
72
|
+
"""一個 memory 檔的 fuzzy 比對輸入(由 CLI 讀檔後建;本模組不做 I/O)。
|
|
73
|
+
|
|
74
|
+
`name`=frontmatter `name` slug(`memory.MemoryDoc.name`;非 fm_ok → None);`description`=frontmatter
|
|
75
|
+
`description`(非 fm_ok/無/非字串 → None)。"""
|
|
76
|
+
|
|
77
|
+
filename: str
|
|
78
|
+
name: str | None
|
|
79
|
+
description: str | None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass(frozen=True)
|
|
83
|
+
class FuzzyCandidate:
|
|
84
|
+
"""一對疑似同一事實的候選(advisory)。`a`/`b`=檔名(決定性排序,`a` 較小)。"""
|
|
85
|
+
|
|
86
|
+
project_key: str
|
|
87
|
+
a: str
|
|
88
|
+
b: str
|
|
89
|
+
name_a: str | None
|
|
90
|
+
name_b: str | None
|
|
91
|
+
score: float
|
|
92
|
+
name_sim: float
|
|
93
|
+
desc_sim: float
|
|
94
|
+
shared_name_tokens: tuple[str, ...]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def similarity(a: FuzzyEntry, b: FuzzyEntry) -> tuple[float, float, float]:
|
|
98
|
+
"""兩檔加權相似度。回 (score, name_sim, desc_sim)。純 name+desc(不碰 body,見模組 docstring)。"""
|
|
99
|
+
name_sim = jaccard(name_tokens(a.name), name_tokens(b.name))
|
|
100
|
+
desc_sim = jaccard(desc_tokens(a.description), desc_tokens(b.description))
|
|
101
|
+
return (WEIGHT_NAME * name_sim + WEIGHT_DESC * desc_sim, name_sim, desc_sim)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def find_candidates(project_key: str, entries: list[FuzzyEntry], *,
|
|
105
|
+
threshold: float = DEFAULT_THRESHOLD) -> list[FuzzyCandidate]:
|
|
106
|
+
"""列出**不同檔名**且相似度 ≥ threshold 的疑似同一事實候選對。
|
|
107
|
+
|
|
108
|
+
**排除已由 exact 層處理者**:兩檔 frontmatter `name` **完全相同**(且皆可判)= exact cross-file-identity,已是
|
|
109
|
+
`memory-merge` 衝突 → 不重複列(fuzzy 專補 exact 認不出的「不同 name」洞)。其餘(含一/兩側 name 不可判)照算
|
|
110
|
+
——desc 仍可能命中。**同一檔的別名拼寫**(`_name_key` 相同:僅大小寫/NFC-NFD 不同)先去重(非「兩檔」;保排序後
|
|
111
|
+
第一個,決定性)——否則第三檔會與別名兩者各配一次、重複列同一組。決定性:`a` ≤ `b`(raw 檔名)、排序 score 高→低
|
|
112
|
+
同分依檔名 → 跨機/跨次結果逐位元組一致。"""
|
|
113
|
+
uniq: list[FuzzyEntry] = []
|
|
114
|
+
seen: set[str] = set()
|
|
115
|
+
for e in sorted(entries, key=lambda e: scan._name_key(e.filename)):
|
|
116
|
+
k = scan._name_key(e.filename)
|
|
117
|
+
if k not in seen:
|
|
118
|
+
seen.add(k)
|
|
119
|
+
uniq.append(e)
|
|
120
|
+
out: list[FuzzyCandidate] = []
|
|
121
|
+
for i in range(len(uniq)):
|
|
122
|
+
for j in range(i + 1, len(uniq)):
|
|
123
|
+
ea, eb = uniq[i], uniq[j]
|
|
124
|
+
if ea.name and eb.name and ea.name == eb.name:
|
|
125
|
+
continue # exact cross-file-identity(exact 層已處理 → 不重複列)
|
|
126
|
+
score, name_sim, desc_sim = similarity(ea, eb)
|
|
127
|
+
if score < threshold:
|
|
128
|
+
continue
|
|
129
|
+
if eb.filename < ea.filename: # 決定性:c.a ≤ c.b(raw 檔名,與顯示/最終排序鍵一致)
|
|
130
|
+
ea, eb = eb, ea
|
|
131
|
+
shared = tuple(sorted(name_tokens(ea.name) & name_tokens(eb.name)))
|
|
132
|
+
out.append(FuzzyCandidate(
|
|
133
|
+
project_key, ea.filename, eb.filename, ea.name, eb.name,
|
|
134
|
+
round(score, 4), round(name_sim, 4), round(desc_sim, 4), shared))
|
|
135
|
+
out.sort(key=lambda c: (-c.score, c.a, c.b))
|
|
136
|
+
return out
|