total-recall-memory 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,366 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import datetime as dt
4
+ import glob
5
+ import json
6
+ import os
7
+ import re
8
+ import socket
9
+ import subprocess
10
+ import sys
11
+ from collections import defaultdict
12
+ from pathlib import Path
13
+
14
+
15
+ def get_machine_id() -> str:
16
+ """Get a stable, human-readable machine identifier."""
17
+ try:
18
+ result = subprocess.check_output(
19
+ ["scutil", "--get", "ComputerName"],
20
+ stderr=subprocess.DEVNULL, text=True,
21
+ ).strip()
22
+ if result:
23
+ return result
24
+ except Exception:
25
+ pass
26
+ return socket.gethostname().split(".")[0] or "unknown"
27
+
28
+
29
+ def slugify(value: str) -> str:
30
+ slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
31
+ return slug or "memory"
32
+
33
+
34
+ def ensure_dir(path: Path) -> None:
35
+ path.mkdir(parents=True, exist_ok=True)
36
+
37
+
38
+ def load_json(path: Path, default):
39
+ if not path.exists():
40
+ return default
41
+ try:
42
+ with path.open("r", encoding="utf-8") as f:
43
+ return json.load(f)
44
+ except Exception:
45
+ return default
46
+
47
+
48
+ def save_json(path: Path, payload) -> None:
49
+ ensure_dir(path.parent)
50
+ with path.open("w", encoding="utf-8") as f:
51
+ json.dump(payload, f, indent=2, sort_keys=True)
52
+ f.write("\n")
53
+
54
+
55
+ def clean_text(value: str) -> str:
56
+ text = (value or "").replace("\r", " ").replace("\n", " ").strip()
57
+ text = re.sub(r"\s+", " ", text).strip()
58
+ if not text:
59
+ return ""
60
+ if text.startswith("<user_instructions>") or text.startswith("<environment_context>"):
61
+ return ""
62
+ if len(text) > 900:
63
+ text = text[:897] + "..."
64
+ return text
65
+
66
+
67
+ def parse_iso_date(value: str) -> str:
68
+ try:
69
+ if value.endswith("Z"):
70
+ value = value[:-1] + "+00:00"
71
+ return dt.datetime.fromisoformat(value).date().isoformat()
72
+ except Exception:
73
+ return dt.date.today().isoformat()
74
+
75
+
76
+ def parse_epoch_millis_date(value) -> str:
77
+ try:
78
+ ms = int(value)
79
+ return dt.datetime.utcfromtimestamp(ms / 1000.0).date().isoformat()
80
+ except Exception:
81
+ return dt.date.today().isoformat()
82
+
83
+
84
+ def now_utc_iso() -> str:
85
+ return dt.datetime.now(dt.timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
86
+
87
+
88
+ class ProjectSlugResolver:
89
+ def __init__(self):
90
+ self.cache = {}
91
+
92
+ def from_path(self, maybe_path: str) -> str:
93
+ if not maybe_path:
94
+ return ""
95
+ path = os.path.expanduser(maybe_path)
96
+ if path in self.cache:
97
+ return self.cache[path]
98
+
99
+ slug = ""
100
+ try:
101
+ remote = subprocess.check_output(
102
+ ["git", "-C", path, "remote", "get-url", "origin"],
103
+ stderr=subprocess.DEVNULL,
104
+ text=True,
105
+ ).strip()
106
+ if remote:
107
+ repo = os.path.basename(remote)
108
+ if repo.endswith(".git"):
109
+ repo = repo[:-4]
110
+ slug = slugify(repo)
111
+ except Exception:
112
+ pass
113
+
114
+ if not slug:
115
+ slug = slugify(os.path.basename(path.rstrip(os.sep)))
116
+ self.cache[path] = slug
117
+ return slug
118
+
119
+
120
+ def extract_user_texts_from_content(content):
121
+ texts = []
122
+ if not isinstance(content, list):
123
+ return texts
124
+ for item in content:
125
+ if not isinstance(item, dict):
126
+ continue
127
+ text = item.get("text") or item.get("value") or ""
128
+ text = clean_text(text)
129
+ if text:
130
+ texts.append(text)
131
+ return texts
132
+
133
+
134
+ def extract_codex_session(session_path: Path):
135
+ session_id = session_path.stem
136
+ first_date = ""
137
+ cwd = ""
138
+ messages = []
139
+
140
+ with session_path.open("r", encoding="utf-8", errors="ignore") as f:
141
+ for raw in f:
142
+ raw = raw.strip()
143
+ if not raw:
144
+ continue
145
+ try:
146
+ obj = json.loads(raw)
147
+ except Exception:
148
+ continue
149
+
150
+ ts = obj.get("timestamp", "")
151
+ if not first_date and isinstance(ts, str) and ts:
152
+ first_date = parse_iso_date(ts)
153
+
154
+ if obj.get("type") == "session_meta":
155
+ payload = obj.get("payload", {})
156
+ session_id = payload.get("id") or session_id
157
+ cwd = payload.get("cwd") or cwd
158
+ ts2 = payload.get("timestamp")
159
+ if not first_date and isinstance(ts2, str) and ts2:
160
+ first_date = parse_iso_date(ts2)
161
+ continue
162
+
163
+ role = obj.get("role")
164
+ if role == "user":
165
+ for text in extract_user_texts_from_content(obj.get("content")):
166
+ messages.append((ts, text))
167
+ continue
168
+
169
+ if obj.get("type") == "response_item":
170
+ payload = obj.get("payload", {})
171
+ if payload.get("type") == "message" and payload.get("role") == "user":
172
+ for text in extract_user_texts_from_content(payload.get("content")):
173
+ messages.append((ts, text))
174
+ continue
175
+
176
+ if not first_date:
177
+ first_date = dt.datetime.fromtimestamp(session_path.stat().st_mtime).date().isoformat()
178
+ return {"session_id": session_id, "date": first_date, "cwd": cwd, "messages": messages}
179
+
180
+
181
+ def write_codex_markdown(output_dir: Path, source_file: Path, session_info: dict, project_name: str) -> bool:
182
+ session_id = slugify(session_info["session_id"])
183
+ date = session_info["date"]
184
+ out_path = output_dir / f"{date}-codex-{session_id}.md"
185
+ if out_path.exists():
186
+ return False
187
+
188
+ ensure_dir(output_dir)
189
+ lines = []
190
+ lines.append(f"# Imported Session: Codex {session_info['session_id']}")
191
+ lines.append("")
192
+ lines.append(f"**Date:** {date}")
193
+ lines.append(f"**Project:** {project_name}")
194
+ lines.append(f"**Machine:** {get_machine_id()}")
195
+ lines.append(f"**Imported At:** {now_utc_iso()}")
196
+ lines.append("")
197
+ lines.append("## User Messages")
198
+
199
+ for timestamp, text in session_info["messages"][:200]:
200
+ prefix = ""
201
+ if isinstance(timestamp, str) and timestamp:
202
+ prefix = f"[{timestamp}] "
203
+ lines.append(f"- {prefix}{text}")
204
+
205
+ lines.append("")
206
+ out_path.write_text("\n".join(lines), encoding="utf-8")
207
+ return True
208
+
209
+
210
+ def ingest_codex(args, state, resolver, imported_dir: Path):
211
+ processed = state.setdefault("processed_codex_files", {})
212
+ patterns = [
213
+ os.path.expanduser(os.path.join(args.codex_home, "sessions", "**", "*.jsonl")),
214
+ os.path.expanduser(os.path.join(args.codex_home, "archived_sessions", "*.jsonl")),
215
+ ]
216
+
217
+ files = []
218
+ for pattern in patterns:
219
+ files.extend(glob.glob(pattern, recursive=True))
220
+ files = sorted(set(files))
221
+
222
+ wrote = 0
223
+ scanned = 0
224
+ for file_path in files:
225
+ scanned += 1
226
+ abs_path = os.path.realpath(file_path)
227
+ mtime = os.path.getmtime(abs_path)
228
+ previous = processed.get(abs_path)
229
+ if previous and float(previous) == float(mtime):
230
+ continue
231
+
232
+ info = extract_codex_session(Path(abs_path))
233
+ slug = resolver.from_path(info.get("cwd", ""))
234
+ if slug != args.project:
235
+ processed[abs_path] = mtime
236
+ continue
237
+ if not info["messages"]:
238
+ processed[abs_path] = mtime
239
+ continue
240
+
241
+ if write_codex_markdown(imported_dir / "codex", Path(abs_path), info, args.project):
242
+ wrote += 1
243
+ processed[abs_path] = mtime
244
+
245
+ state["processed_codex_files"] = processed
246
+ return {"scanned": scanned, "written": wrote}
247
+
248
+
249
+ def maybe_skip_claude_display(display: str) -> bool:
250
+ text = (display or "").strip()
251
+ if not text:
252
+ return True
253
+ if text.startswith("/") and " " not in text:
254
+ return True
255
+ return False
256
+
257
+
258
+ def ingest_claude(args, state, resolver, imported_dir: Path):
259
+ history_path = Path(os.path.expanduser(os.path.join(args.claude_home, "history.jsonl")))
260
+ if not history_path.exists():
261
+ return {"written": 0, "messages": 0, "sessions": 0}
262
+
263
+ offset_key = "claude_history_offset"
264
+ old_offset = int(state.get(offset_key, 0))
265
+ file_size = history_path.stat().st_size
266
+ if old_offset > file_size:
267
+ old_offset = 0
268
+
269
+ sessions = defaultdict(list)
270
+ new_messages = 0
271
+ with history_path.open("rb") as f:
272
+ f.seek(old_offset)
273
+ while True:
274
+ line = f.readline()
275
+ if not line:
276
+ break
277
+ try:
278
+ obj = json.loads(line.decode("utf-8", errors="ignore"))
279
+ except Exception:
280
+ continue
281
+
282
+ project_path = obj.get("project", "")
283
+ if resolver.from_path(project_path) != args.project:
284
+ continue
285
+
286
+ display = clean_text(obj.get("display", ""))
287
+ if not display or maybe_skip_claude_display(display):
288
+ continue
289
+
290
+ session_id = obj.get("sessionId") or "unknown"
291
+ date = parse_epoch_millis_date(obj.get("timestamp"))
292
+ ts = obj.get("timestamp")
293
+ sessions[session_id].append((date, ts, display, project_path))
294
+ new_messages += 1
295
+
296
+ state[offset_key] = f.tell()
297
+
298
+ wrote = 0
299
+ for session_id, items in sessions.items():
300
+ items.sort(key=lambda x: int(x[1]) if x[1] else 0)
301
+ date = items[0][0]
302
+ out_path = imported_dir / "claude" / f"{date}-claude-{slugify(session_id)}.md"
303
+ ensure_dir(out_path.parent)
304
+ is_new = not out_path.exists()
305
+
306
+ with out_path.open("a", encoding="utf-8") as out:
307
+ if is_new:
308
+ out.write(f"# Imported Session: Claude {session_id}\n\n")
309
+ out.write(f"**Date:** {date}\n")
310
+ out.write(f"**Project:** {args.project}\n")
311
+ out.write(f"**Machine:** {get_machine_id()}\n")
312
+ out.write(f"**Imported At:** {now_utc_iso()}\n\n")
313
+ out.write("## User Messages\n")
314
+
315
+ for _, ts, display, _ in items[:400]:
316
+ stamp = ""
317
+ if ts:
318
+ try:
319
+ stamp = dt.datetime.utcfromtimestamp(int(ts) / 1000.0).isoformat() + "Z"
320
+ except Exception:
321
+ stamp = str(ts)
322
+ if stamp:
323
+ out.write(f"- [{stamp}] {display}\n")
324
+ else:
325
+ out.write(f"- {display}\n")
326
+ wrote += 1
327
+
328
+ return {"written": wrote, "messages": new_messages, "sessions": len(sessions)}
329
+
330
+
331
+ def main():
332
+ parser = argparse.ArgumentParser(description="Ingest Codex + Claude sessions into shared qmd memory")
333
+ parser.add_argument("--project", required=True, help="Project slug (kebab-case)")
334
+ parser.add_argument("--shared-root", required=True, help="Shared root directory")
335
+ parser.add_argument("--state-file", required=True, help="State file path")
336
+ parser.add_argument("--codex-home", default=os.path.expanduser("~/.codex"))
337
+ parser.add_argument("--claude-home", default=os.path.expanduser("~/.claude"))
338
+ args = parser.parse_args()
339
+
340
+ shared_root = Path(os.path.expanduser(args.shared_root)).resolve()
341
+ state_file = Path(os.path.expanduser(args.state_file)).resolve()
342
+ project_dir = shared_root / args.project
343
+ imported_dir = project_dir / "sessions" / "imported"
344
+ ensure_dir(imported_dir)
345
+
346
+ state = load_json(state_file, {})
347
+ resolver = ProjectSlugResolver()
348
+
349
+ codex_stats = ingest_codex(args, state, resolver, imported_dir)
350
+ claude_stats = ingest_claude(args, state, resolver, imported_dir)
351
+ save_json(state_file, state)
352
+
353
+ result = {
354
+ "project": args.project,
355
+ "codex": codex_stats,
356
+ "claude": claude_stats,
357
+ "state_file": str(state_file),
358
+ "project_dir": str(project_dir),
359
+ "timestamp": now_utc_iso(),
360
+ }
361
+ json.dump(result, sys.stdout, indent=2)
362
+ sys.stdout.write("\n")
363
+
364
+
365
+ if __name__ == "__main__":
366
+ main()
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
+ CLI="$SCRIPT_DIR/total-recall"
6
+
7
+ # Client-only installs don't need qmd — skip the check
8
+ IS_CLIENT=0
9
+ for arg in "$@"; do
10
+ [[ "$arg" == "--client" ]] && IS_CLIENT=1
11
+ done
12
+
13
+ if [[ "$IS_CLIENT" == "0" ]]; then
14
+ if ! command -v qmd >/dev/null 2>&1; then
15
+ if ! command -v bun >/dev/null 2>&1; then
16
+ echo "qmd is required and bun is not installed."
17
+ echo "Install bun first: curl -fsSL https://bun.sh/install | bash"
18
+ exit 1
19
+ fi
20
+ echo "Installing qmd..."
21
+ bun install -g github:tobi/qmd
22
+ fi
23
+ fi
24
+
25
+ chmod +x "$CLI"
26
+ chmod +x "$SCRIPT_DIR/ingest_sessions.py"
27
+ chmod +x "$SCRIPT_DIR/total-recall-server"
28
+
29
+ echo "Running automated install..."
30
+ "$CLI" install "$@"
31
+
32
+ echo "Done."